In [5]:
!pip install rdkit



In [6]:
import json
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from rdkit import Chem
from rdkit.Chem import Draw, AllChem
import rdkit.Chem.Descriptors as Desc
import re

# Load data
with open('cluster_info.json', 'r') as f:
    data = json.load(f)

# Create custom purple-to-yellow colormap
colors = [(0.5, 0, 0.8), (1, 0, 0), (1, 1, 0)]  # Purple -> Red -> Yellow
cm = LinearSegmentedColormap.from_list('custom_cmap', colors, N=100)

# Extract cluster 6
cluster_6 = next((c for c in data if c['cluster_id'] == 6), None)

# Alternative SMILES for cluster 6 representatives that should render properly
simplified_smiles = [
    # First representative - dimethylbenzene with piperidine amide
    "Cc1ccc(C)c(C(=O)N2CCC(C)(CN)CC2)c1",

    # Second representative - complex hydrazone with triazine structure
    "CC1=NN(C2=NC(=O)C(C)=NN2)C(=O)c1C(=O)NN=Cc1cc(O)c(O)c(O)c1[N+](=O)[O-]",

    # Third representative - brominated indole with dioxolane
    "Brc1cnc2c(NCC3OC4(OC3)CCOC4)ccnc2c1"
]

# Create a figure for cluster 6
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Set title
fig.suptitle(f"Cluster 6\n{cluster_6['size']} molecules", fontsize=16)

# Draw each molecule
for i, smiles in enumerate(simplified_smiles):
    # Parse the SMILES string
    mol = Chem.MolFromSmiles(smiles)
    if mol:
        # Compute 2D coordinates
        AllChem.Compute2DCoords(mol)

        # Draw the molecule
        img = Draw.MolToImage(mol, size=(400, 300))
        axes[i].imshow(img)
        axes[i].axis('off')

        # Add properties text from original data
        if i < len(cluster_6['representatives']):
            props = cluster_6['representatives'][i]['properties']
            prop_text = f"HBA: {props['HBA']}\nHBD: {props['HBD']}\nLogP: {props['LogP']:.1f}\nMW: {props['MW']:.1f}\nTPSA: {props['TPSA']:.1f}"

            axes[i].text(0.5, 1.05, prop_text, transform=axes[i].transAxes,
                        ha='center', va='top', fontsize=11)
    else:
        axes[i].text(0.5, 0.5, f"Failed to parse: {smiles[:20]}...",
                    ha='center', va='center', fontsize=10)
        axes[i].set_xlim(0, 1)
        axes[i].set_ylim(0, 1)
        axes[i].axis('on')

# Add colorbar
cax = fig.add_axes([0.92, 0.15, 0.02, 0.7])
sm = plt.cm.ScalarMappable(cmap=cm)
sm.set_array([])
fig.colorbar(sm, cax=cax)

plt.tight_layout(rect=[0, 0, 0.9, 0.95])
plt.savefig('cluster_6_fixed.png', dpi=300, bbox_inches='tight')
plt.close()

print("Successfully created fixed visualization for cluster 6 with simplified SMILES")

  plt.tight_layout(rect=[0, 0, 0.9, 0.95])


Successfully created fixed visualization for cluster 6 with simplified SMILES


In [7]:
from google.colab import files
import shutil

# Zip the output folder
shutil.make_archive('cluster_visualizations', 'zip', './visualization_files')

# Download the zip file
files.download('cluster_visualizations.zip')

FileNotFoundError: [Errno 2] No such file or directory: './visualization_files'