Libraries needed!

In [2]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import AllChem
import os
from IPython.display import display, Image

31 Discriminative patterns using SMARTSminer on active Cluster 0-11 vs inactive Cluster 0

In [14]:
def visualize_smarts_from_excel(excel_file, sheet_name=0, smarts_column=1, output_dir='ClustDP_struct'):
    
    # Create output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Read Excel file
    try:
        df = pd.read_excel(excel_file, sheet_name=sheet_name)
    except Exception as e:
        print(f"Error reading Excel file: {str(e)}")
        return
    
    # Get column name (Excel columns are 1-based)
    col_name = df.columns[smarts_column - 1]
    
    # Create a figure for the grid of molecules
    mols = []
    labels = []
    errors = []
    
    # Process each SMARTS pattern
    for idx, smarts in enumerate(df[col_name], 1):
        try:
            # Skip empty or non-string values
            if pd.isna(smarts) or not isinstance(smarts, str):
                continue
                
            # Convert SMARTS to molecule
            mol = Chem.MolFromSmarts(smarts)
            
            if mol is None:
                errors.append(f"Row {idx}: Invalid SMARTS pattern - {smarts}")
                continue
            
            # Add molecule and label to lists
            mols.append(mol)
            labels.append(f"Pattern {idx}")
            
            # Save individual molecule image
            img = Draw.MolToImage(mol)
            img.save(os.path.join(output_dir, f"pattern_{idx}.png"))
            
        except Exception as e:
            errors.append(f"Row {idx}: Error processing SMARTS - {str(e)}")
    
    # Generate and save grid image if molecules were found
    if mols:
        try:
            # Calculate grid dimensions
            n_mols = len(mols)
            cols = min(4, n_mols)  # Maximum 4 molecules per row
            rows = (n_mols + cols - 1) // cols
            
            # Create grid image
            img = Draw.MolsToGridImage(
                mols,
                molsPerRow=cols,
                subImgSize=(300, 300),
                legends=labels,
                returnPNG=False
            )
            # Save grid image
            img.save(os.path.join(output_dir, "all_patterns_grid.png"))
            print(f"\nGrid image saved as 'all_patterns_grid.png'")
            
        except Exception as e:
            print(f"Error generating grid image: {str(e)}")
    
    # Print summary
    print(f"\nProcessed {len(mols)} SMARTS patterns")
    print(f"Individual images saved in: {output_dir}")
    
    # Print any errors that occurred
    if errors:
        print("\nErrors encountered:")
        for error in errors:
            print(error)

# Example usage
if __name__ == "__main__":
    # Replace with your Excel file path
    excel_file = "Clustered_DP.xlsx"
    
    visualize_smarts_from_excel(
        excel_file=excel_file,
        sheet_name=0,  # First sheet
        smarts_column=1,  # Column A
        output_dir='ClustDP_struct'
    )


Grid image saved as 'all_patterns_grid.png'

Processed 31 SMARTS patterns
Individual images saved in: ClustDP_struct
