# RDkit features

There are a variety of descriptor options that [RDKit](https://github.com/rdkit/rdkit) and [AMPL](https://github.com/ATOMScience-org/AMPL) provide. For demonstration purposes, we choose to use rdkit features in this tutorial. [RDKit](https://github.com/rdkit/rdkit) is an open source toolkit for cheminformatics. It is a collection of cheminformatics and machine-learning software written in C++ and Python. Let us see how to calculate descriptors using RDkit.

In [3]:
import pandas as pd

# Set up
dataset_file = 'dataset/SLC6A3_Ki_curated.csv'
odir='dataset'

In [4]:
# Read the dataset
df = pd.read_csv(dataset_file)

In [5]:
#Calculate descriptors using RDkit

from rdkit.Chem import AllChem
from rdkit import Chem
from rdkit.Chem import Descriptors
from rdkit.ML.Descriptors import MoleculeDescriptors

def RDkit_descriptors(smiles):
    mols = [Chem.MolFromSmiles(i) for i in smiles] 
    calc = MoleculeDescriptors.MolecularDescriptorCalculator([x[0] for x in Descriptors._descList])
    desc_names = calc.GetDescriptorNames()
    
    Mol_descriptors =[]
    for mol in mols:
        # add hydrogens to molecules
        mol=Chem.AddHs(mol)
        # Calculate all 200 descriptors for each molecule
        descriptors = calc.CalcDescriptors(mol)
        Mol_descriptors.append(descriptors)
    return Mol_descriptors,desc_names 

# Function call
Mol_descriptors,desc_names = RDkit_descriptors(df['base_rdkit_smiles'])

In [6]:
# View the descriptors
df_with_descriptors = pd.DataFrame(Mol_descriptors,columns=desc_names)
df_with_descriptors

Unnamed: 0,MaxAbsEStateIndex,MaxEStateIndex,MinAbsEStateIndex,MinEStateIndex,qed,MolWt,HeavyAtomMolWt,ExactMolWt,NumValenceElectrons,NumRadicalElectrons,...,fr_sulfide,fr_sulfonamd,fr_sulfone,fr_term_acetylene,fr_tetrazole,fr_thiazole,fr_thiocyan,fr_thiophene,fr_unbrch_alkane,fr_urea
0,13.756128,13.756128,0.704900,-4.183304,0.596433,241.334,222.182,241.146664,94,0,...,0,0,0,1,0,0,0,0,0,0
1,14.693317,14.693317,1.071496,-5.437594,0.275746,491.607,457.335,491.258420,190,0,...,0,0,0,1,0,0,0,0,0,1
2,13.731577,13.731577,0.411616,-4.565351,0.436897,409.267,389.107,409.053877,116,0,...,0,0,0,1,0,0,0,0,0,0
3,9.974184,9.974184,1.177788,-5.260689,0.721043,337.463,310.247,337.204179,132,0,...,0,0,0,1,0,0,0,0,0,0
4,9.648369,9.648369,1.221126,-4.833874,0.718002,296.410,272.218,296.177630,116,0,...,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1814,8.169259,8.169259,0.072714,-0.489920,0.589075,199.278,190.206,199.045570,68,0,...,0,0,0,0,0,0,0,0,0,0
1815,9.055687,9.055687,0.153327,-4.551428,0.702770,299.483,274.283,299.170771,112,0,...,0,0,0,0,0,0,0,1,0,0
1816,7.971842,7.971842,0.116682,-0.318034,0.449064,332.500,324.436,331.957032,98,0,...,0,0,0,0,0,2,0,0,0,0
1817,8.612392,8.612392,0.327778,-0.679488,0.433366,255.320,242.216,255.104799,94,0,...,0,0,0,0,0,0,0,0,0,0
