In [3]:
import os
import numpy as np
import pandas as pd

## generate sdf file of selected batches

In [4]:
compound_df = pd.read_csv('T107_rep2_50_compound_df.csv')

selected_batch = 0
select_df = compound_df.loc[compound_df['label'] == selected_batch]
select_df.head()

Unnamed: 0,T107,chembl_id,canonical_smiles,x,y,pred_T107,label,batch_label_color,batch_label,label_color
1800,3.753245,CHEMBL3664821,O=C(CC1CC1)N[C@@H]2CC[C@@H](CCN3CCC(CC3)c4cccc...,-39.53796,-6.644773,3.695881,0,#1f77b4,2,#393b79
1852,3.662531,CHEMBL3664872,O=C(N[C@@H]1CC[C@@H](CCN2CCC(CC2)c3cccc4OCOc34...,-39.001064,-1.747338,3.654126,0,#1f77b4,2,#393b79
2306,3.787004,CHEMBL3697951,CC(=O)N[C@@H]1CC[C@@H](CCN2CCN(CC2)c3cccc4OCOc...,-39.745975,2.005099,3.765032,0,#1f77b4,2,#393b79
2307,3.673449,CHEMBL3697952,COCCC(=O)N[C@@H]1CC[C@@H](CCN2CCN(CC2)c3cccc4O...,-37.736317,2.43564,3.635353,0,#1f77b4,2,#393b79
2311,3.780144,CHEMBL3697956,O=C(CC1CCCO1)N[C@@H]2CC[C@@H](CCN3CCN(CC3)c4cc...,-40.69907,0.664541,3.700971,0,#1f77b4,2,#393b79


In [6]:
from rdkit import Chem
from rdkit.Chem import PandasTools

def df2sdf(df, output_sdf_name, 
           smiles_field = 'canonical_smiles', id_field = 'chembl_id', 
           selected_batch = None):
    '''
    pack pd.DataFrame to sdf_file
    '''
    if not selected_batch is None:
        df = df.loc[df['label'] == selected_batch]
    PandasTools.AddMoleculeColumnToFrame(df,smiles_field,'ROMol')
    PandasTools.WriteSDF(df, output_sdf_name, idName=id_field, properties=df.columns)

    return

In [8]:
df2sdf(select_df, 'T107_rep2_50_batch0.sdf', selected_batch = 0)

## Building pharmacophore models using [Align-it](http://silicos-it.be.s3-website-eu-west-1.amazonaws.com/software/align-it/1.0.4/align-it.html)

### prepare 3D coordinates for ligands

In [None]:
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
raw_sdf_file = 'T107_rep2_50_batch0.sdf'
ms = [x for x in Chem.SDMolSupplier(raw_sdf_file)]

In [None]:
n_conf = 5
w = Chem.SDWriter('T107_rep2_50_batch0_rdkit_conf.sdf')
for i in range(n_conf):
    ms_addH = [Chem.AddHs(m) for m in ms]
    for m in ms_addH:
        AllChem.EmbedMolecule(m)
        AllChem.MMFFOptimizeMoleculeConfs(m)
        w.write(m)

### from prepared 3D ligands to representative pharmacophores

In [None]:
from align_it_utils import proceed_pharmacophore
import os

In [None]:
home_dir = './data/'
result_dir = home_dir + 'Label13_rdkit_phars/'
sdf_file = '/Users/dingqy14/Desktop/writing/P2_data_Dec27/code_utils_v1/data/Label13_rdkit_conf.sdf' # absolute path is prefered!
output_name = 'Cluster13_'

In [None]:
proceed_pharmacophore(home_dir, sdf_file, result_dir, output_name)

You can also building pharmacophore models using [TeachOpenCADD platform](https://github.com/volkamerlab/TeachOpenCADD), or analyze the selected ligands with other informatic tools, eg. [DataWarrior](http://www.openmolecules.org/datawarrior/), [Schrodinger](https://www.schrodinger.com/drug-discovery)