In [1]:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from rdkit import Chem
from rdkit.Chem import Draw, AllChem
from rdkit.Chem.Draw import rdMolDraw2D
import os
from PIL import Image
import numpy as np

# Create output directory
output_dir = "molecular_structures"
os.makedirs(output_dir, exist_ok=True)

# Define molecular structures by category
molecular_groups = {
    "Oxygen-Containing Functional Groups": {
        "Alcohol": ("CCO", "Ethanol"),
        "Phenol": ("Oc1ccccc1", "Phenol"),
        "Ether": ("CCOC", "Diethyl ether"),
        "Ester": ("CC(=O)OC", "Methyl acetate"),
        "Aldehyde": ("CC=O", "Acetaldehyde"),
        "Ketone": ("CC(=O)C", "Acetone"),
        "Carboxyl": ("CC(=O)O", "Acetic acid"),
    },
    
    "Nitrogen-Containing Functional Groups": {
        "Primary Amine": ("CCN", "Ethylamine"),
        "Secondary Amine": ("CCNCC", "Diethylamine"),
        "Tertiary Amine": ("CCN(CC)CC", "Triethylamine"),
        "Amide": ("CC(=O)N", "Acetamide"),
        "Nitro": ("CC[N+](=O)[O-]", "Nitroethane"),
    },
    
    "Halogen-Containing Functional Groups": {
        "Chloro": ("CCCl", "Chloroethane"),
        "Bromo": ("CCBr", "Bromoethane"),
        "Fluoro": ("CCF", "Fluoroethane"),
        "Iodo": ("CCI", "Iodoethane"),
    },
    
    "Sulfur-Containing Functional Groups": {
        "Thiol": ("CCS", "Ethanethiol"),
        "Sulfide": ("CCSC", "Diethyl sulfide"),
        "Sulfate": ("COS(=O)(=O)O", "Methyl sulfate"),
        "Sulfone": ("CS(=O)(=O)C", "Dimethyl sulfone"),
    },
    
    "Aromatic and Heterocyclic Systems": {
        "Aromatic (Benzene)": ("c1ccccc1", "Benzene"),
        "Heterocycle (Pyridine)": ("n1ccccc1", "Pyridine"),
        "Fused Rings (Naphthalene)": ("c1ccc2ccccc2c1", "Naphthalene"),
        "Spiro Center": ("C1CCC2(CC1)CCCCC2", "Spiro[4.5]decane"),
    },
    
    "Aliphatic and Complex Ring Systems": {
        "Bridged (Norbornane)": ("C1CC2CCC1C2", "Norbornane"),
        "Macrocycle": ("C1CCCCCCCCCCC1", "Cyclododecane"),
        "Linear Chain": ("CCCCCCCC", "Octane"),
        "Branched Chain": ("CC(C)CC(C)C", "2,4-Dimethylpentane"),
    }
}

def draw_molecule_group(group_name, molecules_dict, filename):
    """Draw a group of molecules with labels in a grid layout"""
    
    n_molecules = len(molecules_dict)
    
    # Determine grid size
    if n_molecules <= 4:
        rows, cols = 2, 2
    elif n_molecules <= 6:
        rows, cols = 2, 3
    else:
        rows, cols = 3, 3
    
    # Create figure with white background
    fig = plt.figure(figsize=(cols * 4, rows * 3.5), facecolor='white')
    fig.suptitle(group_name, fontsize=16, fontweight='bold', y=0.98)
    
    # Draw each molecule
    for idx, (label, (smiles, compound_name)) in enumerate(molecules_dict.items()):
        ax = plt.subplot(rows, cols, idx + 1)
        ax.set_facecolor('white')
        
        try:
            # Create molecule from SMILES
            mol = Chem.MolFromSmiles(smiles)
            
            if mol is not None:
                # Generate 2D coordinates
                AllChem.Compute2DCoords(mol)
                
                # Draw molecule with RDKit
                drawer = rdMolDraw2D.MolDraw2DCairo(350, 300)
                
                # Set drawing options for cleaner look
                drawer.SetDrawOptions(drawer.drawOptions())
                drawer.drawOptions().addAtomIndices = False
                drawer.drawOptions().addStereoAnnotation = True
                drawer.drawOptions().padding = 0.1
                
                # Draw the molecule
                drawer.DrawMolecule(mol)
                drawer.FinishDrawing()
                
                # Get the image
                img = drawer.GetDrawingText()
                
                # Convert to PIL Image
                from io import BytesIO
                bio = BytesIO(img)
                img_pil = Image.open(bio)
                
                # Display in matplotlib
                ax.imshow(img_pil)
                ax.axis('off')
                
                # Add label with SMILES notation
                title = f"{label}"
                if compound_name:
                    title += f"\n({compound_name})"
                title += f"\n[{smiles}]"
                ax.set_title(title, fontsize=10, pad=5)
                
            else:
                ax.text(0.5, 0.5, f"Error parsing:\n{smiles}", 
                       ha='center', va='center', fontsize=10)
                ax.axis('off')
                
        except Exception as e:
            ax.text(0.5, 0.5, f"Error:\n{str(e)[:30]}", 
                   ha='center', va='center', fontsize=10)
            ax.axis('off')
    
    # Remove empty subplots
    for idx in range(len(molecules_dict), rows * cols):
        ax = plt.subplot(rows, cols, idx + 1)
        ax.axis('off')
    
    plt.tight_layout()
    
    # Save figure
    filepath = os.path.join(output_dir, filename)
    plt.savefig(filepath, dpi=150, bbox_inches='tight', facecolor='white', edgecolor='none')
    plt.close()
    
    print(f"Saved: {filepath}")

def create_summary_figure():
    """Create a summary figure showing one example from each category"""
    
    examples = {
        "Oxygen": ("CCO", "Alcohol"),
        "Nitrogen": ("CCN", "Amine"),
        "Halogen": ("CCCl", "Chloro"),
        "Sulfur": ("CCS", "Thiol"),
        "Aromatic": ("c1ccccc1", "Benzene"),
        "Heterocycle": ("n1ccccc1", "Pyridine"),
        "Macrocycle": ("C1CCCCCCCCCCC1", "Cyclododecane"),
        "Linear": ("CCCCCC", "Hexane"),
    }
    
    fig = plt.figure(figsize=(16, 8), facecolor='white')
    fig.suptitle("Molecular Structure Categories - Overview", fontsize=18, fontweight='bold')
    
    for idx, (category, (smiles, name)) in enumerate(examples.items()):
        ax = plt.subplot(2, 4, idx + 1)
        ax.set_facecolor('white')
        
        try:
            mol = Chem.MolFromSmiles(smiles)
            if mol:
                AllChem.Compute2DCoords(mol)
                
                drawer = rdMolDraw2D.MolDraw2DCairo(300, 250)
                drawer.DrawMolecule(mol)
                drawer.FinishDrawing()
                
                img = drawer.GetDrawingText()
                from io import BytesIO
                bio = BytesIO(img)
                img_pil = Image.open(bio)
                
                ax.imshow(img_pil)
                ax.set_title(f"{category}\n({name})", fontsize=12, fontweight='bold')
            
        except:
            ax.text(0.5, 0.5, category, ha='center', va='center', fontsize=12)
        
        ax.axis('off')
    
    plt.tight_layout()
    filepath = os.path.join(output_dir, "00_overview.png")
    plt.savefig(filepath, dpi=150, bbox_inches='tight', facecolor='white')
    plt.close()
    print(f"Saved: {filepath}")

# Generate all visualizations
def main():
    print(f"Creating molecular structure visualizations in '{output_dir}' folder...")
    print("-" * 50)
    
    # Create summary
    create_summary_figure()
    
    # Create individual group visualizations
    for group_name, molecules in molecular_groups.items():
        # Create safe filename
        safe_name = group_name.replace(" ", "_").replace("-", "_")
        filename = f"{safe_name}.png"
        
        print(f"Processing: {group_name}")
        draw_molecule_group(group_name, molecules, filename)
    
    print("-" * 50)
    print(f"All visualizations saved to '{output_dir}' folder")
    print(f"Total files created: {len(os.listdir(output_dir))}")

if __name__ == "__main__":
    main()

Creating molecular structure visualizations in 'molecular_structures' folder...
--------------------------------------------------
Saved: molecular_structures\00_overview.png
Processing: Oxygen-Containing Functional Groups
Saved: molecular_structures\Oxygen_Containing_Functional_Groups.png
Processing: Nitrogen-Containing Functional Groups
Saved: molecular_structures\Nitrogen_Containing_Functional_Groups.png
Processing: Halogen-Containing Functional Groups
Saved: molecular_structures\Halogen_Containing_Functional_Groups.png
Processing: Sulfur-Containing Functional Groups
Saved: molecular_structures\Sulfur_Containing_Functional_Groups.png
Processing: Aromatic and Heterocyclic Systems
Saved: molecular_structures\Aromatic_and_Heterocyclic_Systems.png
Processing: Aliphatic and Complex Ring Systems
Saved: molecular_structures\Aliphatic_and_Complex_Ring_Systems.png
--------------------------------------------------
All visualizations saved to 'molecular_structures' folder
Total files created: