# KinSim tutorial

## Imports

In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
from pathlib import Path

import pandas as pd

from kinsim_structure.auxiliary import KlifsMoleculeLoader, PdbChainLoader
from kinsim_structure.encoding import Fingerprint, PhysicoChemicalFeatures, SpatialFeatures
from kinsim_structure.encoding import PharmacophoreSizeFeatures, SideChainOrientationFeature, ExposureFeature

In [4]:
pd.set_option('display.max_rows', 100)

## IO paths

In [5]:
path_to_data = Path('/') / 'home' / 'dominique' / 'Documents' / 'data' / 'kinsim' / '20190724_full'
path_to_kinsim = Path('/') / 'home' / 'dominique' / 'Documents' / 'projects' / 'kinsim_structure'
path_to_results = path_to_kinsim / 'results'

metadata_path = path_to_data / 'preprocessed' / 'klifs_metadata_preprocessed.csv'

## Load metadata

In [6]:
klifs_metadata = pd.read_csv(metadata_path)

In [7]:
klifs_metadata.head()

Unnamed: 0.1,Unnamed: 0,index,kinase,family,groups,pdb_id,chain,alternate_model,species,ligand_orthosteric_name,...,dfg,ac_helix,rmsd1,rmsd2,qualityscore,pocket,resolution,missing_residues,missing_atoms,full_ifp
0,0,2886,AAK1,NAK,Other,4wsq,B,A,Human,K-252A,...,in,in,0.777,2.125,8.6,EVLAEGGFAIVFLCALKRMVCKREIQIMRDLSKNIVGYIDSLILMD...,1.95,0,14,0000000000000010000001000000000000000000000000...
1,1,10043,AAK1,NAK,Other,5l4q,A,A,Human,"~{N}-[5-(4-cyanophenyl)-1~{H}-pyrrolo[2,3-b]py...",...,in,in,0.78,2.137,9.7,EVLAEGGFAIVFLCALKRMVCKREIQIMRDLSKNIVGYIDSLILMD...,1.97,0,3,0000000000000010000000000000000000000000000000...
2,2,7046,AAK1,NAK,Other,5te0,A,-,Human,methyl (3Z)-3-{[(4-{methyl[(4-methylpiperazin-...,...,in,in,0.776,2.12,8.8,EVLAEGGFAIVFLCALKRMVCKREIQIMRDLSKNIVGYIDSLILMD...,1.9,0,12,1000101000000010000001000000000000000000000000...
3,3,843,ABL1,Abl,TK,2f4j,A,-,Human,CYCLOPROPANECARBOXYLIC ACID {4-[4-(4-METHYL-PI...,...,in,in,0.779,2.128,8.0,HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...,1.91,0,0,0000000000000010000001000000000000000000000000...
4,4,815,ABL1,Abl,TK,2g1t,A,-,Human,-,...,in,out,0.825,2.154,8.0,HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...,1.8,0,0,


## Load example molecule

In [8]:
klifs_metadata_entry = klifs_metadata.iloc[250]
klifs_metadata_entry

Unnamed: 0                                                                 254
index                                                                     9595
kinase                                                                    AurA
family                                                                     Aur
groups                                                                   Other
pdb_id                                                                    6c83
chain                                                                        B
alternate_model                                                              -
species                                                                  Human
ligand_orthosteric_name           PHOSPHOMETHYLPHOSPHONIC ACID ADENYLATE ESTER
ligand_orthosteric_pdb_id                                                  ACP
ligand_allosteric_name                                                       -
ligand_allosteric_pdb_id                            

### By metadata entry

In [9]:
klifs_molecule_loader = KlifsMoleculeLoader(klifs_metadata_entry=klifs_metadata_entry)

In [10]:
molecule1 = klifs_molecule_loader.molecule

### By mol2 file

In [11]:
mol2_path = '/home/dominique/Documents/data/kinsim/20190724_full/raw/KLIFS_download/HUMAN/AurA/6c83_chainB/pocket.mol2'

In [12]:
klifs_molecule_loader = KlifsMoleculeLoader(mol2_path=mol2_path)

In [13]:
molecule2 = klifs_molecule_loader.molecule

## Load example chain

In [14]:
pdb_chain_loader = PdbChainLoader(klifs_metadata_entry=klifs_metadata_entry)



In [15]:
chain = pdb_chain_loader.chain

In [42]:
type(chain)

Bio.PDB.Chain.Chain

### Compare both loading methods

In [16]:
all(molecule1.df == molecule2.df)

True

In [17]:
molecule = molecule1

## Fingerprint

### From molecule and chain

In [58]:
fp = Fingerprint()
fp.from_molecule(molecule, chain)

In [19]:
fp.molecule_code

'HUMAN/AurA_6c83_chainB'

In [41]:
fp.features.shape

(85, 12)

In [20]:
fp.features

Unnamed: 0,size,hbd,hba,charge,aromatic,aliphatic,sco,exposure,distance_to_centroid,distance_to_hinge_region,distance_to_dfg_region,distance_to_front_pocket
1,3.0,3.0,0.0,1.0,0.0,0.0,24.34,0.1,18.24,13.39,20.79,14.26
2,1.0,0.0,0.0,0.0,0.0,1.0,34.38,0.26087,15.95,11.83,17.89,11.44
3,2.0,0.0,0.0,0.0,0.0,1.0,28.0,0.75,13.37,9.66,16.38,8.29
4,1.0,0.0,0.0,0.0,0.0,0.0,,0.724138,13.09,10.76,14.94,7.22
5,2.0,1.0,0.0,1.0,0.0,0.0,,0.352941,14.79,13.14,14.93,9.4
6,1.0,0.0,0.0,0.0,0.0,0.0,,0.647059,14.5,14.33,13.65,9.47
7,2.0,1.0,0.0,1.0,0.0,0.0,,0.117647,16.33,16.78,13.83,12.4
8,3.0,0.0,0.0,0.0,1.0,0.0,,0.62963,14.41,15.23,10.72,12.08
9,1.0,0.0,0.0,0.0,0.0,0.0,,0.714286,13.34,12.8,10.73,10.72
10,2.0,1.0,1.0,0.0,0.0,0.0,52.15,0.333333,12.51,10.61,11.87,8.95


### From metadata entry

In [21]:
klifs_metadata_entry.pdb_id

'6c83'

In [22]:
fp.from_metadata_entry(klifs_metadata_entry)



In [23]:
fp.features

Unnamed: 0,size,hbd,hba,charge,aromatic,aliphatic,sco,exposure,distance_to_centroid,distance_to_hinge_region,distance_to_dfg_region,distance_to_front_pocket
1,3.0,3.0,0.0,1.0,0.0,0.0,24.34,0.1,18.24,13.39,20.79,14.26
2,1.0,0.0,0.0,0.0,0.0,1.0,34.38,0.26087,15.95,11.83,17.89,11.44
3,2.0,0.0,0.0,0.0,0.0,1.0,28.0,0.75,13.37,9.66,16.38,8.29
4,1.0,0.0,0.0,0.0,0.0,0.0,,0.724138,13.09,10.76,14.94,7.22
5,2.0,1.0,0.0,1.0,0.0,0.0,,0.352941,14.79,13.14,14.93,9.4
6,1.0,0.0,0.0,0.0,0.0,0.0,,0.647059,14.5,14.33,13.65,9.47
7,2.0,1.0,0.0,1.0,0.0,0.0,,0.117647,16.33,16.78,13.83,12.4
8,3.0,0.0,0.0,0.0,1.0,0.0,,0.62963,14.41,15.23,10.72,12.08
9,1.0,0.0,0.0,0.0,0.0,0.0,,0.714286,13.34,12.8,10.73,10.72
10,2.0,1.0,1.0,0.0,0.0,0.0,52.15,0.333333,12.51,10.61,11.87,8.95


## Pharmacophore and size features

In [24]:
ps = PharmacophoreSizeFeatures()
ps.from_molecule(molecule)

In [25]:
ps.features

Unnamed: 0_level_0,size,hbd,hba,charge,aromatic,aliphatic
klifs_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,3,3,0,1,0,0
2,1,0,0,0,0,1
3,2,0,0,0,0,1
4,1,0,0,0,0,0
5,2,1,0,1,0,0
6,1,0,0,0,0,0
7,2,1,0,1,0,0
8,3,0,0,0,1,0
9,1,0,0,0,0,0
10,2,1,1,0,0,0


## Exposure

In [26]:
ex = ExposureFeature()
ex.from_molecule(molecule, chain)

In [27]:
ex.features

Unnamed: 0_level_0,exposure
klifs_id,Unnamed: 1_level_1
1,0.1
2,0.26087
3,0.75
4,0.724138
5,0.352941
6,0.647059
7,0.117647
8,0.62963
9,0.714286
10,0.333333


In [28]:
ex.from_molecule(molecule, chain, verbose=True)

In [29]:
ex.features

Unnamed: 0_level_0,res_id,ca_up,ca_down,ca_angle_Ca-Cb_Ca-pCb,ca_exposure,cb_up,cb_down,cb_angle_Ca-Cb_Ca-pCb,cb_exposure,exposure
klifs_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,137,0.0,20.0,0.496784,0.0,2.0,18.0,0.0,0.1,0.1
2,138,9.0,14.0,0.519227,0.391304,6.0,17.0,0.0,0.26087,0.26087
3,139,14.0,14.0,0.811814,0.5,21.0,7.0,0.0,0.75,0.75
4,140,13.0,16.0,0.664675,0.448276,21.0,8.0,0.0,0.724138,0.724138
5,141,6.0,11.0,,0.352941,,,,,0.352941
6,142,4.0,13.0,0.905029,0.235294,11.0,6.0,0.0,0.647059,0.647059
7,143,2.0,15.0,,0.117647,,,,,0.117647
8,144,17.0,10.0,,0.62963,,,,,0.62963
9,145,17.0,11.0,1.20961,0.607143,20.0,8.0,0.0,0.714286,0.714286
10,146,8.0,22.0,0.787148,0.266667,10.0,20.0,0.0,0.333333,0.333333


## Side chain orientation

In [31]:
sco = SideChainOrientationFeature()
sco.from_molecule(molecule, chain)

In [32]:
sco.features

Unnamed: 0_level_0,sco
klifs_id,Unnamed: 1_level_1
1,24.34
2,34.38
3,28.0
4,
5,
6,
7,
8,
9,
10,52.15


In [34]:
sco.from_molecule(molecule, chain, fill_missing=True)

In [35]:
sco.features

Unnamed: 0_level_0,sco
klifs_id,Unnamed: 1_level_1
1,24.34
2,34.38
3,28.0
4,0.0
5,99.14
6,0.0
7,99.14
8,87.46
9,0.0
10,52.15


In [36]:
sco.from_molecule(molecule, chain, fill_missing=True, verbose=True)

In [37]:
sco.features

Unnamed: 0,klifs_id,residue_id,residue_name,ca,cb,com,sco
0,1,137,ARG,"<Vector -7.59, 21.20, 17.33>","<Vector -7.83, 21.63, 15.88>","<Vector -6.91, 21.41, 17.31>",24.34
1,2,138,PRO,"<Vector -4.75, 18.62, 17.40>","<Vector -5.08, 17.12, 17.44>","<Vector -5.41, 18.29, 16.94>",34.38
2,3,139,LEU,"<Vector -1.87, 19.95, 15.32>","<Vector -0.94, 21.09, 15.75>","<Vector -1.45, 20.87, 15.48>",28.0
3,4,140,GLY,"<Vector -0.11, 16.57, 15.27>",,"<Vector -0.53, 16.36, 16.14>",0.0
4,5,141,LYS,"<Vector -0.10, 13.06, 16.67>",,"<Vector 0.37, 12.66, 15.96>",99.14
5,6,142,GLY,"<Vector 3.05, 10.92, 16.51>",,"<Vector 2.48, 10.43, 17.11>",0.0
6,7,143,LYS,"<Vector 4.23, 7.96, 18.56>",,"<Vector 4.19, 8.29, 19.30>",99.14
7,8,144,PHE,"<Vector 5.92, 9.49, 21.62>",,"<Vector 5.34, 9.93, 21.90>",87.46
8,9,145,GLY,"<Vector 3.16, 12.10, 21.98>",,"<Vector 2.94, 12.09, 21.04>",0.0
9,10,146,ASN,"<Vector 1.03, 14.81, 20.37>","<Vector -0.39, 14.24, 20.32>","<Vector 0.27, 14.62, 21.36>",52.15


## Spatial features

In [38]:
space = SpatialFeatures()
space.from_molecule(molecule)

In [39]:
space.features

Unnamed: 0_level_0,distance_to_centroid,distance_to_hinge_region,distance_to_dfg_region,distance_to_front_pocket
klifs_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,18.24,13.39,20.79,14.26
2,15.95,11.83,17.89,11.44
3,13.37,9.66,16.38,8.29
4,13.09,10.76,14.94,7.22
5,14.79,13.14,14.93,9.4
6,14.5,14.33,13.65,9.47
7,16.33,16.78,13.83,12.4
8,14.41,15.23,10.72,12.08
9,13.34,12.8,10.73,10.72
10,12.51,10.61,11.87,8.95


In [40]:
space.reference_points

Unnamed: 0,centroid,hinge_region,dfg_region,front_pocket
x,0.716746,1.720233,6.708967,-0.0073
y,20.991932,22.141433,19.7478,15.3157
z,36.71663,42.152067,33.560533,40.5391


## Thoughts on normalization of fingerprint

In [66]:
fp.features

Unnamed: 0,size,hbd,hba,charge,aromatic,aliphatic,sco,exposure,distance_to_centroid,distance_to_hinge_region,distance_to_dfg_region,distance_to_front_pocket
1,3.0,3.0,0.0,1.0,0.0,0.0,24.34,0.1,18.24,13.39,20.79,14.26
2,1.0,0.0,0.0,0.0,0.0,1.0,34.38,0.26087,15.95,11.83,17.89,11.44
3,2.0,0.0,0.0,0.0,0.0,1.0,28.0,0.75,13.37,9.66,16.38,8.29
4,1.0,0.0,0.0,0.0,0.0,0.0,,0.724138,13.09,10.76,14.94,7.22
5,2.0,1.0,0.0,1.0,0.0,0.0,,0.352941,14.79,13.14,14.93,9.4
6,1.0,0.0,0.0,0.0,0.0,0.0,,0.647059,14.5,14.33,13.65,9.47
7,2.0,1.0,0.0,1.0,0.0,0.0,,0.117647,16.33,16.78,13.83,12.4
8,3.0,0.0,0.0,0.0,1.0,0.0,,0.62963,14.41,15.23,10.72,12.08
9,1.0,0.0,0.0,0.0,0.0,0.0,,0.714286,13.34,12.8,10.73,10.72
10,2.0,1.0,1.0,0.0,0.0,0.0,52.15,0.333333,12.51,10.61,11.87,8.95
