# Calculating Molecular Descriptors

#### Imports

In [1]:
import pandas as pd
from rdkit import Chem

#### Data

In [2]:
df_x = pd.read_csv('SMILES.csv')
mol = Chem.MolFromSmiles(df_x['SMILES'][0])

#### Topology Descriptors

In [4]:
from smdt import topology
topology_descriptors = topology.GetTopology(df_x.head())
topology_descriptors.to_csv('Topology_Descriptors.csv',encoding='utf-8')
topology_descriptors.head()

Unnamed: 0,AW,Arto,BertzCT,DZ,GMTI,Geto,Hato,Ipc,J,MZM1,...,Thara,Tigdi,Tsch,W,Xu,ZM1,ZM2,diametert,petitjeant,radiust
0,3.077,2.0,2.579,31.333,2.911,1.791,1.592,2.83,3.267,6.306,...,34.3,3.053,953.0,240.0,12.857,62,70,6.0,0.5,3.0
1,2.655,2.0,2.466,26.0,2.696,1.801,1.61,2.44,3.274,5.194,...,26.8,2.837,595.0,146.0,10.735,52,59,5.0,0.4,3.0
2,2.655,2.0,2.108,27.0,2.693,1.801,1.61,2.446,2.512,5.194,...,26.8,2.837,593.0,146.0,10.722,52,59,5.0,0.4,3.0
3,2.764,2.0,2.466,26.0,2.717,1.801,1.61,2.434,3.152,5.194,...,26.5,2.803,619.0,152.0,10.871,52,58,6.0,0.5,3.0
4,2.655,2.0,2.471,25.5,2.696,1.801,1.61,2.44,3.274,5.194,...,26.8,2.837,595.0,146.0,10.735,52,59,5.0,0.4,3.0


#### Constitutional Descriptors

In [5]:
from smdt import constitution
constitution_descriptors = constitution.GetConstitutional(df_x.head())
constitution_descriptors.to_csv('Constitutional_Descriptors.csv',encoding='utf-8')
constitution_descriptors.head()

Unnamed: 0,AWeight,PC1,PC2,PC3,PC4,PC5,PC6,Weight,naccr,naro,...,nhyd,nnitro,noxy,nphos,nring,nrot,nsb,nsulph,nta,ntb
0,15.348,13,18,21,23,26,24,199.529,4,6,...,3,2,4,0,1,2,5,4,16,0
1,13.461,11,15,18,19,18,12,148.073,3,6,...,6,0,4,0,1,1,4,4,17,0
2,13.824,11,15,18,20,20,14,152.061,5,0,...,12,0,5,0,1,1,11,5,23,0
3,13.461,11,15,17,17,20,16,148.073,3,6,...,6,0,4,0,1,1,4,4,17,0
4,13.28,11,15,18,19,18,12,146.081,3,6,...,7,1,3,0,1,1,4,3,18,0


#### Basak Descriptors

-- NOT WORKING --

In [9]:
from smdt import basak
basak.GetBasak(mol)

TypeError: '<' not supported between instances of 'list' and 'int'

#### Burden Eigenvalue Descriptors

In [10]:
from smdt import bcut
burden_descriptors = bcut.GetBurden(df_x.head())
burden_descriptors.to_csv('Burden_Descriptors.csv',encoding='utf-8')
burden_descriptors.head()

Unnamed: 0,bcute1,bcute10,bcute11,bcute12,bcute13,bcute14,bcute15,bcute16,bcute2,bcute3,...,bcutp15,bcutp16,bcutp2,bcutp3,bcutp4,bcutp5,bcutp6,bcutp7,bcutp8,bcutp9
0,3.91,1.036,0.89,0.348,0.235,0.0,0.0,0.0,3.319,3.072,...,0.0,0.0,2.938,2.776,2.144,2.132,1.283,0.84,0.531,1.969
1,3.887,1.05,0.823,0.438,0.0,0.0,0.0,0.0,3.215,2.914,...,0.158,0.0,2.963,2.709,2.265,1.605,1.316,1.19,0.923,1.94
2,3.783,1.197,0.901,0.509,0.077,0.025,0.0,0.0,3.249,3.068,...,0.305,0.281,3.035,2.719,2.243,1.678,1.649,1.205,1.194,1.907
3,3.885,1.055,0.811,0.444,0.0,0.0,0.0,0.0,3.243,2.859,...,0.216,0.0,2.977,2.686,2.272,1.597,1.342,1.223,0.856,1.939
4,3.894,1.052,0.904,0.527,0.162,0.0,0.0,0.0,3.216,2.97,...,0.198,0.0,2.964,2.759,2.318,1.825,1.4,1.216,0.95,1.951


#### CATS2D Descriptors

In [11]:
from smdt import cats2d
cats2d_descriptors = cats2d.CATS2D(df_x.head())
cats2d_descriptors.to_csv('CATS2D_Descriptors.csv',encoding='utf-8')
cats2d_descriptors.head()

Unnamed: 0,CATS_AA0,CATS_AA1,CATS_AA2,CATS_AA3,CATS_AA4,CATS_AA5,CATS_AA6,CATS_AA7,CATS_AA8,CATS_AA9,...,CATS_PP0,CATS_PP1,CATS_PP2,CATS_PP3,CATS_PP4,CATS_PP5,CATS_PP6,CATS_PP7,CATS_PP8,CATS_PP9
0,0.286,0.19,0.095,0.0,0.048,0.19,0.19,0.0,0.0,0.0,...,0.667,0.0,0.0,0.0,0.333,0.0,0.0,0.0,0.0,0.0
1,0.4,0.0,0.1,0.1,0.2,0.2,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.333,0.0,0.067,0.2,0.2,0.2,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.4,0.0,0.1,0.1,0.0,0.2,0.2,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.5,0.0,0.167,0.0,0.0,0.333,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Charge Descriptors

In [12]:
from smdt import charge
charge_descriptors = charge.GetCharge(df_x.head())
charge_descriptors.to_csv('Charge_Descriptors.csv',encoding='utf-8')
charge_descriptors.head()

Unnamed: 0,LDI,Mac,Mnc,Mpc,QCmax,QCmin,QCss,QHmax,QHmin,QHss,...,QOss,Qass,Qmax,Qmin,Rnc,Rpc,SPP,Tac,Tnc,Tpc
0,0.226,0.142,-0.19,0.114,0.294,-0.03,0.187,0.077,0.064,0.015,...,0.267,0.483,0.294,-0.258,0.227,0.258,0.552,2.279,-1.14,1.14
1,0.312,0.218,-0.264,0.185,0.339,-0.057,0.186,0.297,0.062,0.272,...,0.797,1.255,0.339,-0.504,0.273,0.183,0.843,3.698,-1.849,1.849
2,0.289,0.164,-0.377,0.105,0.157,0.027,0.063,0.213,0.035,0.209,...,0.711,0.983,0.213,-0.394,0.209,0.113,0.607,3.767,-1.884,1.884
3,0.32,0.211,-0.256,0.179,0.335,-0.044,0.168,0.297,0.063,0.273,...,0.797,1.238,0.335,-0.504,0.281,0.187,0.839,3.587,-1.794,1.794
4,0.288,0.194,-0.249,0.159,0.337,-0.058,0.154,0.297,0.062,0.235,...,0.545,1.09,0.337,-0.506,0.29,0.193,0.843,3.491,-1.746,1.746


#### Connectivity Descriptors

In [13]:
from smdt import connectivity
connectivity_descriptors = connectivity.GetConnectivity(df_x.head())
connectivity_descriptors.to_csv('Connectivity_Descriptors.csv',encoding='utf-8')
connectivity_descriptors.head()

Unnamed: 0,Chi0,Chi1,Chi10,Chi2,Chi3,Chi3c,Chi3ch,Chi4,Chi4c,Chi4ch,...,Chiv8,Chiv9,dchi0,dchi1,dchi2,dchi3,dchi4,knotp,knotpv,mChi1
0,10.008,6.02,0.0,5.702,4.098,1.205,0.0,2.926,0.0,0.0,...,0.024,0.0,3.115,2.537,3.145,2.441,1.984,1.294,0.305,0.463
1,8.431,5.126,0.0,4.675,3.626,0.898,0.0,2.564,0.0,0.0,...,0.01,0.0,2.949,2.257,2.697,2.381,1.86,1.228,0.199,0.466
2,8.431,5.147,0.0,4.581,3.645,0.883,0.0,2.68,0.0,0.0,...,0.01,0.0,2.51,1.693,1.976,1.915,1.597,1.084,0.285,0.468
3,8.431,5.109,0.0,4.772,3.582,0.971,0.0,2.195,0.0,0.0,...,0.005,0.0,2.949,2.246,2.763,2.359,1.538,1.155,0.158,0.464
4,8.431,5.126,0.0,4.675,3.626,0.898,0.0,2.564,0.0,0.0,...,0.011,0.0,2.819,2.192,2.632,2.313,1.824,1.228,0.234,0.466


#### Estate Descriptors

In [14]:
from smdt import estate
estate_descriptors = estate.GetEstate(df_x.head())
estate_descriptors.to_csv('Estate_Descriptors.csv',encoding='utf-8')
estate_descriptors.head()

Unnamed: 0,S1,S10,S11,S12,S13,S14,S15,S16,S17,S18,...,Smin71,Smin72,Smin73,Smin74,Smin75,Smin76,Smin77,Smin78,Smin8,Smin9
0,0.0,0.0,0.0,3.051,0.0,0.0,0.0,0.0,-0.928,0.0,...,0,0,0,0,0,0,0,0,0.0,0
1,0.0,0.0,0.0,3.762,0.0,0.0,0.0,-1.265,-1.324,0.0,...,0,0,0,0,0,0,0,0,0.0,0
2,0.0,0.0,0.0,0.0,-4.099,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,-0.407,0
3,0.0,0.0,0.0,3.313,0.0,0.0,0.0,-1.139,-0.805,0.0,...,0,0,0,0,0,0,0,0,0.0,0
4,0.0,0.0,0.0,4.092,0.0,0.0,0.0,-1.14,-0.38,0.0,...,0,0,0,0,0,0,0,0,0.0,0


#### Geary Auto-Correlation Descriptors

In [18]:
from smdt import geary
gearyauto_descriptors = geary.GetGearyAuto(df_x.head())
gearyauto_descriptors.to_csv('Geary_Descriptors.csv',encoding='utf-8')
gearyauto_descriptors.head()

Unnamed: 0,GATSe1,GATSe2,GATSe3,GATSe4,GATSe5,GATSe6,GATSe7,GATSe8,GATSm1,GATSm2,...,GATSp7,GATSp8,GATSv1,GATSv2,GATSv3,GATSv4,GATSv5,GATSv6,GATSv7,GATSv8
0,0.393,0.817,1.352,1.543,1.342,0.028,0,0,0.556,0.825,...,0,0,0.233,0.697,1.247,1.736,1.241,0.752,0,0
1,0.714,0.786,1.179,1.375,0.982,0.0,0,0,0.714,0.786,...,0,0,0.714,0.786,1.179,1.375,0.982,0.0,0,0
2,1.0,1.1,0.978,1.1,0.458,0.0,0,0,1.0,1.1,...,0,0,1.0,1.1,0.978,1.1,0.458,0.0,0,0
3,0.714,0.786,1.122,1.719,1.179,0.0,0,0,0.714,0.786,...,0,0,0.714,0.786,1.122,1.719,1.179,0.0,0,0
4,0.69,0.7,1.091,1.585,1.172,0.0,0,0,0.692,0.702,...,0,0,0.702,0.726,1.11,1.519,1.138,0.0,0,0


#### Ghosecrippen Descriptor

-- NOT WORKING --

In [20]:
from smdt import ghosecrippen
ghosecrippen.GhoseCrippenFingerprint(mol)

NotADirectoryError: [Errno 20] Not a directory: '/home/rahulavd/anaconda3/envs/my-rdkit-env/lib/python3.6/site-packages/smdt-1.0-py3.6.egg/smdt'

#### Kappa Descriptors

In [25]:
from smdt import kappa
kappa_descriptors = kappa.GetKappa(df_x.head())
kappa_descriptors.to_csv('Kappa_Descriptors.csv',encoding='utf-8')
kappa_descriptors.head()

Unnamed: 0,kappa1,kappa2,kappa3,kappam1,kappam2,kappam3,phi
0,11.077,4.481,2.721,9.398,3.359,1.909,2.428299
1,9.091,3.6,1.975,7.398,2.494,1.236,1.677328
2,9.091,3.6,1.975,8.893,3.465,1.882,2.801295
3,9.091,3.6,2.215,7.398,2.494,1.403,1.677328
4,9.091,3.6,1.975,7.398,2.494,1.236,1.677328


#### MOE-type Descriptors

In [5]:
from smdt import moe
moe_descriptors = moe.GetMOE(df_x.head())
moe_descriptors.to_csv('MOE_Descriptors',encoding='utf-8')
moe_descriptors.head()

Unnamed: 0,EstateVSA0,EstateVSA1,EstateVSA10,EstateVSA2,EstateVSA3,EstateVSA4,EstateVSA5,EstateVSA6,EstateVSA7,EstateVSA8,...,slogPVSA10,slogPVSA11,slogPVSA2,slogPVSA3,slogPVSA4,slogPVSA5,slogPVSA6,slogPVSA7,slogPVSA8,slogPVSA9
0,15.534,10.71,0.0,0.0,18.199,0.0,0.0,0.0,0.0,11.601,...,0.0,11.601,0.0,20.229,0.0,18.199,5.023,0.0,0.0,11.375
1,17.468,5.563,0.0,0.0,0.0,18.199,0.0,0.0,0.0,15.32,...,11.499,0.0,0.0,0.0,10.358,18.199,0.0,0.0,0.0,0.0
2,31.209,6.421,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25.163,...,0.0,0.0,4.737,0.0,6.421,0.0,0.0,0.0,0.0,0.0
3,11.719,11.313,0.0,0.0,12.133,6.066,0.0,0.0,0.0,15.32,...,11.499,0.0,0.0,0.0,10.358,18.199,0.0,0.0,0.0,0.0
4,5.969,17.0,0.0,0.0,0.0,18.199,0.0,0.0,0.0,15.947,...,5.75,0.0,0.0,0.0,10.358,18.199,0.0,0.0,0.0,5.687


#### Moran Descriptors

In [5]:
from smdt import moran
moran_descriptors = moran.GetMoranAuto(df_x.head())
moran_descriptors.to_csv('Moran_Descriptors',encoding='utf-8')
moran_descriptors.head()

Unnamed: 0,MATSe1,MATSe2,MATSe3,MATSe4,MATSe5,MATSe6,MATSe7,MATSe8,MATSm1,MATSm2,...,MATSp7,MATSp8,MATSv1,MATSv2,MATSv3,MATSv4,MATSv5,MATSv6,MATSv7,MATSv8
0,0.424,0.117,-0.499,-0.643,-0.494,1.386,0,0,0.011,-0.087,...,0,0,0.56,0.189,-0.423,-0.828,-0.285,0.765,0,0
1,-0.0,0.021,-0.293,-0.293,0.375,0.0,0,0,-0.0,0.021,...,0,0,-0.0,0.021,-0.293,-0.293,0.375,0.0,0,0
2,-0.167,-0.242,-0.071,-0.157,0.65,0.0,0,0,-0.167,-0.242,...,0,0,-0.167,-0.242,-0.071,-0.157,0.65,0.0,0,0
3,0.0,0.021,-0.242,-0.804,0.1,1.75,0,0,-0.0,0.021,...,0,0,-0.0,0.021,-0.242,-0.804,0.1,1.75,0,0
4,-0.0,0.102,-0.29,-0.526,0.644,0.0,0,0,-0.0,0.1,...,0,0,-0.0,0.08,-0.293,-0.46,0.572,0.0,0,0


#### Moreau Broto Descriptors

In [6]:
from smdt import moreaubroto
moreaubroto_descriptors = moreaubroto.GetMoreauBrotoAuto(df_x.head())
moreaubroto_descriptors.to_csv('Moreau_Broto_Descriptors.csv',encoding='utf-8')
moreaubroto_descriptors.head()

Unnamed: 0,ATSe1,ATSe2,ATSe3,ATSe4,ATSe5,ATSe6,ATSe7,ATSe8,ATSm1,ATSm2,...,ATSp7,ATSp8,ATSv1,ATSv2,ATSv3,ATSv4,ATSv5,ATSv6,ATSv7,ATSv8
0,2.818,3.136,3.147,2.991,2.634,2.434,0.0,0.0,2.918,3.275,...,0.0,0.0,2.381,2.664,2.608,2.196,1.642,1.122,0.0,0.0
1,2.588,2.93,2.981,2.696,1.971,0.0,0.0,0.0,2.59,2.932,...,0.0,0.0,2.307,2.512,2.386,1.809,0.935,0.0,0.0,0.0
2,2.636,2.981,3.04,2.724,2.03,0.0,0.0,0.0,2.639,2.984,...,0.0,0.0,2.205,2.386,2.291,1.768,0.832,0.0,0.0,0.0
3,2.588,2.93,2.911,2.424,2.141,1.509,0.0,0.0,2.59,2.932,...,0.0,0.0,2.307,2.512,2.338,1.72,1.118,0.421,0.0,0.0
4,2.576,2.912,2.944,2.653,1.971,0.0,0.0,0.0,2.577,2.914,...,0.0,0.0,2.325,2.542,2.443,1.868,0.935,0.0,0.0,0.0


## Total Descriptors

In [7]:
25+30+48+150+25+44+235+32+7+60+32+32

720