# Validate our ECIF calculations #

Compare ECIF::LD for the same complex, calculated with our script vs. calculated with their example code.

In [73]:
import pandas as pd
from ECIF.ecif import *

# Two example structures
Protein = "ECIF/Example_Structures/1a0q_protein.pdb"
Ligand = "ECIF/Example_Structures/1a0q_ligandCD1.sdf"

# Calculate descriptors with their code
df_ligand = LoadSDFasDF(Ligand)
df_protein = LoadPDBasDF(Protein)

their_ecif = GetECIF(Protein, Ligand, distance_cutoff=6.0)
their_ld = GetRDKitDescriptors(Ligand)

their_ecif_ld = pd.DataFrame([their_ecif], columns=PossibleECIF).join(pd.DataFrame([their_ld], columns=LigandDescriptors))

# Load the descriptors calculated with our script.
our_ecif_ld = pd.read_csv('our_example_descriptors.csv', sep=',', usecols=lambda x: x != 'Name')

RDKit ERROR: [15:46:36] Explicit valence for atom # 17 C, 6, is greater than permitted
Traceback (most recent call last):
  File "/home/Luis/.conda/envs/ecif/lib/python3.7/site-packages/rdkit/ML/Descriptors/MoleculeDescriptors.py", line 88, in CalcDescriptors
    res[i] = fn(mol)
  File "/home/Luis/.conda/envs/ecif/lib/python3.7/site-packages/rdkit/Chem/QED.py", line 286, in qed
    qedProperties = properties(mol)
  File "/home/Luis/.conda/envs/ecif/lib/python3.7/site-packages/rdkit/Chem/QED.py", line 249, in properties
    mol = Chem.RemoveHs(mol)
rdkit.Chem.rdchem.AtomValenceException: Explicit valence for atom # 17 C, 6, is greater than permitted


In [74]:
their_ecif_ld.head()

Unnamed: 0,C;4;1;3;0;0-Br;1;1;0;0;0,C;4;1;3;0;0-C;3;3;0;1;1,C;4;1;3;0;0-C;4;1;1;0;0,C;4;1;3;0;0-C;4;1;2;0;0,C;4;1;3;0;0-C;4;1;3;0;0,C;4;1;3;0;0-C;4;2;0;0;0,C;4;1;3;0;0-C;4;2;1;0;0,C;4;1;3;0;0-C;4;2;1;0;1,C;4;1;3;0;0-C;4;2;1;1;1,C;4;1;3;0;0-C;4;2;2;0;0,...,fr_quatN,fr_sulfide,fr_sulfonamd,fr_sulfone,fr_term_acetylene,fr_tetrazole,fr_thiazole,fr_thiocyan,fr_thiophene,fr_urea
0,0,0,0,0,3,0,0,0,15,8,...,0,0,0,0,0,0,0,0,0,0


In [75]:
our_ecif_ld.head()

Unnamed: 0,C;4;1;3;0;0-Br;1;1;0;0;0,C;4;1;3;0;0-C;3;3;0;1;1,C;4;1;3;0;0-C;4;1;1;0;0,C;4;1;3;0;0-C;4;1;2;0;0,C;4;1;3;0;0-C;4;1;3;0;0,C;4;1;3;0;0-C;4;2;0;0;0,C;4;1;3;0;0-C;4;2;1;0;0,C;4;1;3;0;0-C;4;2;1;0;1,C;4;1;3;0;0-C;4;2;1;1;1,C;4;1;3;0;0-C;4;2;2;0;0,...,fr_quatN,fr_sulfide,fr_sulfonamd,fr_sulfone,fr_term_acetylene,fr_tetrazole,fr_thiazole,fr_thiocyan,fr_thiophene,fr_urea
0,0,0,0,0,3,0,0,0,15,8,...,0,0,0,0,0,0,0,0,0,0


Comparing both descriptors, we find that not all values are equal.

In [76]:
our_ecif_ld.equals(their_ecif_ld)

False

Let's see, which values differ.

In [77]:
eq = our_ecif_ld.eq(their_ecif_ld)
our_ecif_ld.loc[:, eq.columns[(eq == False).all()]]

Unnamed: 0,Chi0n,HallKierAlpha,Kappa1,SMR_VSA7,SlogP_VSA6,TPSA,EState_VSA1,EState_VSA9
0,30.843917,0.15,6.100099,30.212094,30.212094,106.61,104.550921,27.456255


In [78]:
their_ecif_ld.loc[:, eq.columns[(eq == False).all()]]

Unnamed: 0,Chi0n,HallKierAlpha,Kappa1,SMR_VSA7,SlogP_VSA6,TPSA,EState_VSA1,EState_VSA9
0,30.843917,0.15,6.100099,30.212094,30.212094,106.61,104.550921,27.456255


Let's see how much they actually differ. As you can see, the difference is negligible.

In [79]:
for col in eq.columns[(eq == False).all()]:
    print(col)
    print(f'Ours  : {our_ecif_ld.loc[0, col]}')
    print(f'Theirs: {their_ecif_ld.loc[0, col]}')
    print()

Chi0n
Ours  : 30.843916933783092
Theirs: 30.843916933783095

HallKierAlpha
Ours  : 0.1499999999999999
Theirs: 0.14999999999999997

Kappa1
Ours  : 6.100098621183212
Theirs: 6.1000986211832116

SMR_VSA7
Ours  : 30.212093538316477
Theirs: 30.212093538316473

SlogP_VSA6
Ours  : 30.212093538316477
Theirs: 30.212093538316473

TPSA
Ours  : 106.61
Theirs: 106.61000000000001

EState_VSA1
Ours  : 104.55092087165296
Theirs: 104.55092087165295

EState_VSA9
Ours  : 27.45625464556237
Theirs: 27.456254645562375

