In [None]:
from conphar.Pharmacophores import parse_json_pharmacophore, show_pharmacophoric_descriptors, save_pharmacophore_to_pymol, save_pharmacophore_to_json, compute_concensus_pharmacophore, get_ligand_receptor_pharmacophore
import pandas as pd
import os
from pymol import cmd
import seaborn as sns
import matplotlib.pyplot as plt

ConPhar tools imported successfully


In [None]:
receptor_path = "../Data/target/AF-Q13705-ACVR2B_-_prepared.pdb"  # update with your actual receptor file

for file in os.listdir("../Data/other/SDF poses/"):
    if file.endswith(".sdf"):
        ligand_path = f"../Data/other/SDF poses/{file}"
        output_name = os.path.splitext(file)[0]
        out_path = f"../Results/pharmacophores/{output_name}"
        get_ligand_receptor_pharmacophore(receptor=receptor_path, ligand=ligand_path, out=out_path)

Unnamed: 0,name,cluster,x,y,z,radius,color,weight,balance
0,Aromatic,1,-8.931787,1.623796,-12.175643,0.5,purple,9.0,0.435484
1,Aromatic,2,-4.9081,1.006633,-11.95435,0.5,purple,1.0,0.005376
2,Aromatic,3,-3.029,1.471333,-11.617167,0.5,purple,1.0,0.010753
3,Aromatic,4,-1.298168,0.960476,-12.081137,0.5,purple,10.0,0.543011
4,Aromatic,5,-1.4605,4.074667,-8.658333,0.5,purple,1.0,0.005376
5,HydrogenAcceptor,1,-13.524081,4.713,-10.94723,0.761888,orange,7.0,0.098667
6,HydrogenAcceptor,2,-12.3372,1.8871,-11.1059,0.5,orange,1.0,0.002667
7,HydrogenAcceptor,3,-5.432158,-1.635832,-13.858287,1.014912,orange,11.0,0.269333
8,HydrogenAcceptor,4,-6.959533,1.8506,-10.951567,0.762564,orange,3.0,0.008
9,HydrogenAcceptor,5,-9.1088,2.5681,-9.3453,0.5,orange,1.0,0.002667


In [None]:
cluster = 'cluster_7'

p4_table=pd.DataFrame()
for file in os.listdir(f'../Results/pharmacophores/{cluster}'):
    if '.json' in file:
        try:
            p4,lig,rec=parse_json_pharmacophore(f"../Results/pharmacophores/{cluster}/{file}")
            p4['ligand']=file.replace('.json','')
            p4_table=pd.concat([p4_table,p4],ignore_index=True)
        except Exception:
            pass

In [None]:
# rename colors
p4_table['color'] = p4_table['color'].replace({
    'navy': 'blue',
    'white': 'yellow',
})
p4_table = p4_table[~p4_table['name'].isin(['NegativeIon', 'PositiveIon'])]
p4_table

In [None]:
os.makedirs(f'../Results/pharmacophores/{cluster}/consensus', exist_ok=True)
save_pharmacophore_to_pymol(p4_table,out_file=f'../Results/pharmacophores/{cluster}/consensus/all_features_{cluster}.pse')
save_pharmacophore_to_json(p4_table,out_file=f'../Results/pharmacophores/{cluster}/consensus/all_features_{cluster}.json')

In [None]:
concensus,links=compute_concensus_pharmacophore(p4_table,save_data_per_descriptor=True,out_folder=f'../Results/pharmacophores/{cluster}/consensus',cmap_plots="viridis", h_dist=0.2)

In [None]:
concensus['cluster'] = concensus['cluster'].astype(int)

In [None]:
concensus.to_csv(f'../Results/pharmacophores/sub_models/consensus_weights_{cluster}.csv', index=False)
save_pharmacophore_to_pymol(concensus,out_file=f'../Results/pharmacophores/{cluster}/consensus/{cluster}_concensus.pse',
select='concensus')
save_pharmacophore_to_json(concensus,out_file=f'../Results/pharmacophores/{cluster}/consensus/{cluster}_concensus.json')

In [None]:
# find max weight value for each name
max_weight = concensus.groupby('name')['weight'].max()
print(max_weight)

weighting_list = concensus
# add column frequency to concensus where weight / max_weight
weighting_list['frequency'] = weighting_list['weight'] / max_weight[weighting_list['name']].values

# Normalize the frequency to range from 0 to 1
min_freq = weighting_list['frequency'].min()
max_freq = weighting_list['frequency'].max()
weighting_list['normalized_frequency'] = (weighting_list['frequency'] - min_freq) / (max_freq - min_freq)
weighting_list

In [None]:
# print the top 4 best clusters based on normalized_frequency for each name
top_indices = weighting_list.groupby('name')['normalized_frequency'].nlargest(4).reset_index(level=0, drop=True).index
top_clusters = weighting_list.loc[top_indices]
top_clusters = top_clusters.sort_values(by='name')
top_clusters

In [None]:
# remove frequecy and normalized_frequency columns
concensus = concensus.drop(columns=['frequency', 'normalized_frequency'])
concensus

In [None]:
from pymol import cmd

# Define subsets of the consensus DataFrame
subsets = {
    "Aromatic": weighting_list[weighting_list['name'] == 'Aromatic'],
    "Hydrophobic": weighting_list[weighting_list['name'] == 'Hydrophobic'],
    "HydrogenAcceptor": weighting_list[weighting_list['name'] == 'HydrogenAcceptor'],
    "HydrogenDonor": weighting_list[weighting_list['name'] == 'HydrogenDonor'],
    "PositiveIon": weighting_list[weighting_list['name'] == 'PositiveIon'],
    "NegativeIon": weighting_list[weighting_list['name'] == 'NegativeIon']
}

# Iterate over each subset
for subset_name, subset_data in subsets.items():
    cmd.reinitialize()  # Reset PyMOL session

    # Add pseudoatoms for the current subset
    for index, row in subset_data.iterrows():
        name = row['cluster']
        x, y, z = row['x'], row['y'], row['z']
        radius = row['radius']
        b = row['normalized_frequency']
        color = row['color']
        atom_name = f"{name}_{index}"
        cmd.pseudoatom(object=atom_name, pos=[x, y, z], vdw=radius, b=b)
        cmd.label(atom_name, f'"{name}"')

    # Apply coloring based on B-factor
    cmd.spectrum("b", palette=f"white {color}", selection="*")

    # Group everything and save the PyMOL session
    cmd.group(subset_name, '*')
    cmd.center('all')
    cmd.show('spheres')
    cmd.save(f'../Results/pharmacophores/{cluster}/consensus/{subset_name}_clusters_by_weight.pse')

In [16]:
# for each name in data['name'] check 2 highest weight and print the name and weight and associated cluster
for name in data['name'].unique():
    subset = data[data['name'] == name]
    top_weights = subset.nlargest(3, 'weight')
    for index, row in top_weights.iterrows():
        print(f"{row['name']}, Weight: {row['weight']}, Cluster: {row['cluster']}")


Aromatic, Weight: 4.0, Cluster: 4
Aromatic, Weight: 2.0, Cluster: 1
Aromatic, Weight: 2.0, Cluster: 3
HydrogenAcceptor, Weight: 6.0, Cluster: 1
HydrogenAcceptor, Weight: 4.0, Cluster: 2
HydrogenAcceptor, Weight: 3.0, Cluster: 3
HydrogenDonor, Weight: 3.0, Cluster: 4
HydrogenDonor, Weight: 2.0, Cluster: 3
HydrogenDonor, Weight: 1.0, Cluster: 1
Hydrophobic, Weight: 4.0, Cluster: 11
Hydrophobic, Weight: 3.0, Cluster: 1
Hydrophobic, Weight: 3.0, Cluster: 2


In [None]:
# Define filters as a dictionary: feature name → list of clusters
filters = {
    'Aromatic': [1,12], #purple
    'Hydrophobic': [10], #green
    'HydrogenAcceptor': [9,6], #orange
    'HydrogenDonor': [2] #yellow
}

# Build a set of valid (name, cluster) pairs
valid_pairs = set()
for name, clusters in filters.items():
    for cluster in clusters:
        valid_pairs.add((name, cluster))

# Filter rows matching any of the (name, cluster) pairs
submodel_df = concensus[[ (row['name'], row['cluster']) in valid_pairs for _, row in concensus.iterrows() ]]

save_pharmacophore_to_json(submodel_df,out_file=f'../Results/pharmacophores/{cluster}/consensus/{cluster}/Submodel_4.json')