# RDkit features

There are a variety of descriptor options that [RDKit](https://github.com/rdkit/rdkit) and [AMPL](https://github.com/ATOMScience-org/AMPL) provide. For demonstration purposes, we choose to use rdkit features in this tutorial. [RDKit](https://github.com/rdkit/rdkit) is an open source toolkit for cheminformatics. It is a collection of cheminformatics and machine-learning software written in C++ and Python. Let us see how to calculate descriptors using RDkit.

In [1]:
import pandas as pd

# Set up
dataset_file = 'dataset/curated_kcna5_ic50.csv'
odir='dataset'

In [2]:
# Read the dataset
df = pd.read_csv(dataset_file)

In [3]:
#Calculate descriptors using RDkit

from rdkit.Chem import AllChem
from rdkit import Chem
from rdkit.Chem import Descriptors
from rdkit.ML.Descriptors import MoleculeDescriptors

def RDkit_descriptors(smiles):
    mols = [Chem.MolFromSmiles(i) for i in smiles] 
    calc = MoleculeDescriptors.MolecularDescriptorCalculator([x[0] for x in Descriptors._descList])
    desc_names = calc.GetDescriptorNames()
    
    Mol_descriptors =[]
    for mol in mols:
        # add hydrogens to molecules
        mol=Chem.AddHs(mol)
        # Calculate all 200 descriptors for each molecule
        descriptors = calc.CalcDescriptors(mol)
        Mol_descriptors.append(descriptors)
    return Mol_descriptors,desc_names 

# Function call
Mol_descriptors,desc_names = RDkit_descriptors(df['base_rdkit_smiles'])

In [4]:
# View the descriptors
df_with_descriptors = pd.DataFrame(Mol_descriptors,columns=desc_names)
df_with_descriptors

Unnamed: 0,MaxEStateIndex,MinEStateIndex,MaxAbsEStateIndex,MinAbsEStateIndex,qed,MolWt,HeavyAtomMolWt,ExactMolWt,NumValenceElectrons,NumRadicalElectrons,...,fr_sulfide,fr_sulfonamd,fr_sulfone,fr_term_acetylene,fr_tetrazole,fr_thiazole,fr_thiocyan,fr_thiophene,fr_unbrch_alkane,fr_urea
0,14.101871,-6.232636,14.101871,0.935669,0.770404,403.504,378.304,403.156577,150,0,...,0,1,0,0,0,0,0,0,0,1
1,14.336747,-6.469670,14.336747,0.909616,0.802325,403.891,385.747,403.075740,140,0,...,0,1,0,0,0,0,0,0,0,0
2,9.304449,-5.128857,9.304449,0.958213,0.820421,339.479,310.247,339.219829,134,0,...,0,0,0,0,0,0,0,0,0,0
3,15.212871,-7.160522,15.212871,2.962826,0.445226,456.633,413.289,456.321515,184,0,...,0,0,0,0,0,0,0,0,0,0
4,15.051705,-4.380136,15.051705,0.079079,0.568548,486.378,464.202,485.118544,170,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
797,16.231143,-5.759972,16.231143,0.815520,0.368934,542.724,500.388,542.325691,212,0,...,0,0,0,0,0,0,0,0,0,0
798,13.632325,-4.014188,13.632325,0.381029,0.601473,379.508,350.276,379.237211,148,0,...,0,0,0,0,0,0,0,0,0,1
799,14.376682,-6.507586,14.376682,0.793149,0.412245,479.602,450.370,479.187877,178,0,...,0,1,0,0,0,0,0,0,0,1
800,9.390684,-4.591510,9.390684,0.868797,0.925392,299.374,278.206,299.163377,116,0,...,0,0,0,0,0,0,0,0,0,0
