In [1]:
import pandas as pd
import numpy as np
from rdkit import Chem
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import seaborn as sns
import pymol
from pymol import cmd
from pymol.cgo import COLOR, ALPHA, SPHERE

In [2]:
df = pd.read_csv('model_data.csv')
df = pd.concat([df[df['Comment'] == 'inactive'].sample(20), df[df['Comment'] == 'active'].sample(20)])

# Extract file paths
active_sdf_files = [f"diffdock_chembl_output/{row['Molecule ChEMBL ID']}/{row['Filepath']}" for index, row in df.iterrows() if row['Comment'] == 'active']
inactive_sdf_files = [f"diffdock_chembl_output/{row['Molecule ChEMBL ID']}/{row['Filepath']}" for index, row in df.iterrows() if row['Comment'] == 'inactive']

# Function to extract coordinates from an SDF file
def extract_coordinates(sdf_file):
    mol = Chem.SDMolSupplier(sdf_file)[0]
    conf = mol.GetConformer()
    coords = np.array([list(conf.GetAtomPosition(i)) for i in range(mol.GetNumAtoms())])
    return coords


In [3]:
# Extract coordinates
active_coords = [extract_coordinates(f) for f in active_sdf_files]
inactive_coords = [extract_coordinates(f) for f in inactive_sdf_files]
active_coords_flat = np.vstack(active_coords)
inactive_coords_flat = np.vstack(inactive_coords)

# Function to create heatmap data
def create_heatmap_data(coords):
    heatmap, xedges, yedges = np.histogram2d(coords[:, 0], coords[:, 1], bins=50)
    return heatmap, xedges, yedges

In [4]:
# Create heatmaps
active_heatmap, xedges, yedges = create_heatmap_data(active_coords_flat)
inactive_heatmap, xedges, yedges = create_heatmap_data(inactive_coords_flat)

def create_pymol_cgo_heatmap(heatmap, xedges, yedges, color):
    cgo = []
    max_val = np.max(heatmap)
    for i in range(len(xedges)-1):
        for j in range(len(yedges)-1):
            if heatmap[i, j] > 0:
                alpha = heatmap[i, j] / max_val * 0.5  # Adjust alpha for transparency
                cgo.extend([
                    COLOR, color[0], color[1], color[2],
                    ALPHA, alpha,
                    SPHERE, xedges[i], yedges[j], 0, 0.5  # Adjust sphere size as needed
                ])
    return cgo

In [6]:
active_cgo = create_pymol_cgo_heatmap(active_heatmap, xedges, yedges, [1.0, 0.0, 0.0])  # Red color
inactive_cgo = create_pymol_cgo_heatmap(inactive_heatmap, xedges, yedges, [0.0, 0.0, 1.0])  # Blue color


In [7]:
cmd.load("GLP1.pdb")

In [8]:
cmd.load_cgo(active_cgo, "active_heatmap")
cmd.load_cgo(inactive_cgo, "inactive_heatmap")

In [9]:
cmd.set('cgo_transparency', 0.8)  # Set transparency level
cmd.set('transparency', 0.2, "GLP1")
cmd.show("surface", "GLP1")  # Show surface for GLP1
cmd.color("gray70", "GLP1")  # Color GLP1 surface

In [13]:
print(f"Active CGO length: {len(active_cgo)}")
print(f"Inactive CGO length: {len(inactive_cgo)}")

Active CGO length: 4246
Inactive CGO length: 4059


In [14]:
cmd.show("spheres", "active_heatmap")
cmd.show("spheres", "inactive_heatmap")

In [15]:
cmd.zoom("GLP1")
cmd.orient()

In [16]:
cmd.ray(1920, 1080)

In [17]:
cmd.png("GLP1_heatmap_visualization.png")

1