In [1]:
import pandas as pd

import rdkit
from rdkit import Chem
from rdkit.Chem import Descriptors

In [2]:
df = pd.read_csv('temp_cleaned_data.tsv',sep='\t')
df = df.set_index('#')
df

Unnamed: 0_level_0,Name,name_smiles,Wavelength,Epsilon,Quantum Yield
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,Benzene,C1=CC=CC=C1,254.75,210,0.0530
2,Toluene,CC1=CC=CC=C1,261.75,2860,0.1700
3,o-Xylene,CC1=CC=CC=C1C,263.00,254,0.1700
4,m-Xylene,CC1=CC(=CC=C1)C,265.00,284,0.1300
5,p-Xylene,CC1=CC=C(C=C1)C,275.00,770,0.2200
6,Mesitylene,CC1=CC(=CC(=C1)C)C,265.00,190,0.0880
7,Durene,CC1=CC(=C(C=C1C)C)C,278.00,214,0.3000
8,Pentamethylbenzene,CC1=CC(=C(C(=C1C)C)C)C,271.00,629,0.0750
9,Hexamethylbenzene,CC1=C(C(=C(C(=C1C)C)C)C)C,271.00,214,0.0100
10,Phenol,C1=CC=C(C=C1)O,270.75,2340,0.0750


In [170]:
mol = Chem.MolFromSmiles(df.name_smiles[1])

In [100]:
Descriptors.NumValenceElectrons(mol)

132

In [101]:
Descriptors.NumRotatableBonds(mol)

4

In [102]:
Descriptors.NumHeteroatoms(mol)

5

In [103]:
#atoms shared between rings that share exactly one atom
Descriptors.MaxPartialCharge(mol)

0.3468726875857757

In [104]:
Descriptors.MinPartialCharge(mol)

-0.4221133952155775

In [105]:
Chem.GraphDescriptors.Ipc(mol)

1298432.7719897337

In [106]:
Chem.GraphDescriptors.Chi0n(mol)

15.191153083317705

In [108]:
import rdkit.Chem.rdMolDescriptors as rdDr

In [109]:
properties = rdDr.Properties() 

In [110]:
a = zip(properties.GetPropertyNames(), properties.ComputeProperties(mol))
for i,j in a:
    print(i,j)

exactmw 347.16337691200005
lipinskiHBA 5.0
lipinskiHBD 0.0
NumRotatableBonds 4.0
NumHBD 0.0
NumHBA 5.0
NumHeteroatoms 5.0
NumAmideBonds 0.0
FractionCSP3 0.23809523809523808
NumRings 4.0
NumAromaticRings 4.0
NumAliphaticRings 0.0
NumSaturatedRings 0.0
NumHeterocycles 2.0
NumAromaticHeterocycles 2.0
NumSaturatedHeterocycles 0.0
NumAliphaticHeterocycles 0.0
NumSpiroAtoms 0.0
NumBridgeheadAtoms 0.0
NumAtomStereoCenters 0.0
NumUnspecifiedAtomStereoCenters 0.0
labuteASA 151.0414894438268
tpsa 51.27
CrippenClogP 4.1929000000000025
CrippenMR 105.75300000000003


In [111]:
import rdkit.Chem.Fingerprints as figpr

In [112]:
a = rdDr.GetHashedAtomPairFingerprint(mol)


In [113]:
rdDr.GetConnectivityInvariants(mol)

[2246728737,
 2245384272,
 848128881,
 2245384272,
 2246728737,
 3217380708,
 3218693969,
 3217380708,
 3217380708,
 3218693969,
 3218693969,
 3218693969,
 3217380708,
 3217380708,
 864942730,
 3189457552,
 3217380708,
 2041434490,
 3217380708,
 3218693969,
 3218693969,
 3218693969,
 3218693969,
 3217380708,
 2092489639,
 2246728737]

In [114]:
import os
from rdkit.Chem import ChemicalFeatures
from rdkit import RDConfig
fdefName = os.path.join(RDConfig.RDDataDir,'BaseFeatures.fdef')
factory = ChemicalFeatures.BuildFeatureFactory(fdefName)

In [119]:
feats = factory.GetFeaturesForMol(mol)

In [120]:
len(feats)

11

In [121]:
import os
from rdkit.Chem import ChemicalFeatures
from rdkit import RDConfig
fdefName = os.path.join(RDConfig.RDDataDir,'BaseFeatures.fdef')
factory = ChemicalFeatures.BuildFeatureFactory(fdefName)

In [126]:
for i in range(len(feats)):
    #print(feats[i].GetFamily())
    print(feats[i].GetType())
    print(feats[i].GetAtomIds())

SingleAtomAcceptor
(14,)
SingleAtomAcceptor
(17,)
Imidazole
(18, 17, 16, 24, 23)
Arom5
(16, 17, 18, 23, 24)
Arom6
(5, 6, 7, 8, 9, 10)
Arom6
(7, 8, 11, 12, 13, 15)
Arom6
(18, 19, 20, 21, 22, 23)
ThreeWayAttach
(8,)
ThreeWayAttach
(12,)
RH6_6
(5, 6, 7, 8, 9, 10)
RH6_6
(18, 19, 20, 21, 22, 23)


In [128]:
factory.GetFeatureDefs()

{'Donor.SingleAtomDonor': '[$([N&!H0&v3,N&!H0&+1&v4,n&H1&+0,$([$([Nv3](-C)(-C)-C)]),$([$(n[n;H1]),$(nc[n;H1])])]),$([O,S;H1;+0])]',
 'Acceptor.SingleAtomAcceptor': '[$([O;H1;v2]),$([O;H0;v2;!$(O=N-*),$([O;-;!$(*-N=O)]),$([o;+0])]),$([n;+0;!X3;!$([n;H1](cc)cc),$([$([N;H0]#[C&v4])]),$([N&v3;H0;$(Nc)])]),$([F;$(F-[#6]);!$(FC[F,Cl,Br,I])])]',
 'NegIonizable.AcidicGroup': '[C,S](=[O,S,P])-[O;H1,H0&-1]',
 'PosIonizable.BasicGroup': '[$([$([N;H2&+0][$([C;!$(C=*)])])]),$([$([N;H1&+0]([$([C;!$(C=*)])])[$([C;!$(C=*)])])]),$([$([N;H0&+0]([$([C;!$(C=*)])])([$([C;!$(C=*)])])[$([C;!$(C=*)])])]);!$(N[a])]',
 'PosIonizable.PosN': '[#7;+;!$([N+]-[O-])]',
 'PosIonizable.Imidazole': 'c1ncnc1',
 'PosIonizable.Guanidine': 'NC(=N)N',
 'ZnBinder.ZnBinder1': '[S;D1]-[#6]',
 'ZnBinder.ZnBinder2': '[#6]-C(=O)-C-[S;D1]',
 'ZnBinder.ZnBinder3': '[#6]-C(=O)-C-C-[S;D1]',
 'ZnBinder.ZnBinder4': '[#6]-C(=O)-N-[O;D1]',
 'ZnBinder.ZnBinder5': '[#6]-C(=O)-[O;D1]',
 'ZnBinder.ZnBinder6': '[#6]-P(=O)(-O)-[C,O,N]-[C,H]',
 

In [132]:
factory

11

In [97]:
fName=os.path.join(RDConfig.RDDataDir,'FunctionalGroups.txt')

In [98]:
from rdkit.Chem import FragmentCatalog

In [133]:
from rdkit.Chem import AllChem


In [199]:
fp1 = AllChem.GetMorganFingerprint(mol,2)

In [194]:
fp1.GetLength()

4294967295

In [192]:
len(list(fp1.ToBinary()))

40

In [195]:
fp2 = AllChem.GetMorganFingerprint(mol2,2)

In [197]:
len(fp2.ToBinary())

208

In [183]:
fp1 = AllChem.GetMorganFingerprintAsBitVect(mol,2,nBits=1024)

In [187]:
fp1.

<rdkit.rdBase._vecti at 0x11c0b5d50>

In [172]:
type(fp1)

rdkit.DataStructs.cDataStructs.ExplicitBitVect

In [181]:
list(fp1.ToBinary())

19

In [175]:
mol2 = Chem.MolFromSmiles(df.name_smiles[150])

In [182]:
AllChem.GetMorganFingerprintAsBitVect(mol2,2,nBits=1024)

<rdkit.DataStructs.cDataStructs.ExplicitBitVect at 0x11c0248f0>

In [179]:
2**8

256