# Fingerprint distance

## Imports

In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
from pathlib import Path
import sys

import pandas as pd

sys.path.append('../..')
from kinsim_structure.auxiliary import KlifsMoleculeLoader
from kinsim_structure.encoding import Fingerprint
from kinsim_structure.similarity import FeatureDistances, FingerprintDistance

_ColormakerRegistry()

## IO paths

In [4]:
path_to_kinsim = Path('.') / '..' / '..'
path_to_data = path_to_kinsim / 'examples' / 'data'
path_to_results = None

## Load KLIFS metadata

In [5]:
klifs_metadata = pd.read_csv(path_to_data / 'postprocessed' / 'klifs_metadata_postprocessed.csv' , index_col=0)

In [6]:
klifs_metadata.shape

(3878, 23)

In [7]:
klifs_metadata.head()

Unnamed: 0,metadata_index,kinase,family,groups,pdb_id,chain,alternate_model,species,ligand_orthosteric_name,ligand_orthosteric_pdb_id,...,ac_helix,rmsd1,rmsd2,qualityscore,pocket,resolution,missing_residues,missing_atoms,full_ifp,code
0,2886,AAK1,NAK,Other,4wsq,B,A,Human,K-252A,KSA,...,in,0.777,2.125,8.6,EVLAEGGFAIVFLCALKRMVCKREIQIMRDLSKNIVGYIDSLILMD...,1.95,0,14,0000000000000010000001000000000000000000000000...,HUMAN/AAK1/4wsq_chainB_altA
1,10043,AAK1,NAK,Other,5l4q,A,A,Human,"~{N}-[5-(4-cyanophenyl)-1~{H}-pyrrolo[2,3-b]py...",LKB,...,in,0.78,2.137,9.7,EVLAEGGFAIVFLCALKRMVCKREIQIMRDLSKNIVGYIDSLILMD...,1.97,0,3,0000000000000010000000000000000000000000000000...,HUMAN/AAK1/5l4q_chainA_altA
2,7046,AAK1,NAK,Other,5te0,A,-,Human,methyl (3Z)-3-{[(4-{methyl[(4-methylpiperazin-...,XIN,...,in,0.776,2.12,8.8,EVLAEGGFAIVFLCALKRMVCKREIQIMRDLSKNIVGYIDSLILMD...,1.9,0,12,1000101000000010000001000000000000000000000000...,HUMAN/AAK1/5te0_chainA
3,843,ABL1,Abl,TK,2f4j,A,-,Human,CYCLOPROPANECARBOXYLIC ACID {4-[4-(4-METHYL-PI...,VX6,...,in,0.779,2.128,8.0,HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...,1.91,0,0,0000000000000010000001000000000000000000000000...,HUMAN/ABL1/2f4j_chainA
4,815,ABL1,Abl,TK,2g1t,A,-,Human,-,-,...,out,0.825,2.154,8.0,HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...,1.8,0,0,,HUMAN/ABL1/2g1t_chainA


## Load two fingerprints

In [8]:
fp1 = Fingerprint()
fp1.from_metadata_entry(klifs_metadata_entry=klifs_metadata.iloc[100])

In [9]:
fp2 = Fingerprint()
fp2.from_metadata_entry(klifs_metadata_entry=klifs_metadata.iloc[101])

In [10]:
fp2.fingerprint_normalized

{'physicochemical':     size       hbd  hba  charge  aromatic  aliphatic       sco  exposure
 1    1.0  1.000000  0.0     1.0       0.0        0.0  0.502833  0.142857
 2    0.0  0.000000  0.0     0.5       0.0        0.0       NaN  0.190476
 3    0.5  0.000000  0.0     0.5       0.0        1.0  0.334056  0.814815
 4    0.0  0.000000  0.0     0.5       0.0        0.0       NaN  0.423077
 5    0.5  0.333333  0.5     0.5       1.0        0.0       NaN  0.142857
 ..   ...       ...  ...     ...       ...        ...       ...       ...
 81   0.5  0.000000  1.0     0.0       0.0        0.0  0.649444  0.400000
 82   1.0  0.000000  0.0     0.5       1.0        0.0  0.444556  0.613636
 83   0.0  0.000000  0.0     0.5       0.0        0.0       NaN  0.444444
 84   0.5  0.000000  0.0     0.5       0.0        1.0  0.706722  0.612903
 85   0.0  0.000000  0.0     0.5       0.0        1.0       NaN  0.742857
 
 [85 rows x 8 columns],
 'distances':     distance_to_centroid  distance_to_hinge_region  d

## Get similarity

### Feature distances

In [11]:
feature_distances = FeatureDistances()

In [12]:
feature_distances.from_fingerprints(fp1, fp2)

# Fingerprint distance

In [13]:
fingerprint_distance = FingerprintDistance()

In [15]:
fingerprint_distance._add_weight_per_feature_type(feature_distances.data, {'physicochemical': 0.5, 'distances':0.5, 'moments': 0})

Unnamed: 0,feature_type,feature_name,distance,bit_coverage,bit_number,weights
0,physicochemical,size,0.0,0.94,80,0.0625
1,physicochemical,hbd,0.0,0.94,80,0.0625
2,physicochemical,hba,0.0,0.94,80,0.0625
3,physicochemical,charge,0.0,0.94,80,0.0625
4,physicochemical,aromatic,0.0,0.94,80,0.0625
5,physicochemical,aliphatic,0.0,0.94,80,0.0625
6,physicochemical,sco,0.004035,0.76,65,0.0625
7,physicochemical,exposure,0.004225,0.94,80,0.0625
8,distances,distance_to_centroid,0.001501,0.94,80,0.125
9,distances,distance_to_hinge_region,0.000939,0.94,80,0.125


In [16]:
fingerprint_distance._add_weight_per_feature(feature_distances.data, None)

Unnamed: 0,feature_type,feature_name,distance,bit_coverage,bit_number,weights
0,physicochemical,size,0.0,0.94,80,0.0625
1,physicochemical,hbd,0.0,0.94,80,0.0625
2,physicochemical,hba,0.0,0.94,80,0.0625
3,physicochemical,charge,0.0,0.94,80,0.0625
4,physicochemical,aromatic,0.0,0.94,80,0.0625
5,physicochemical,aliphatic,0.0,0.94,80,0.0625
6,physicochemical,sco,0.004035,0.76,65,0.0625
7,physicochemical,exposure,0.004225,0.94,80,0.0625
8,distances,distance_to_centroid,0.001501,0.94,80,0.125
9,distances,distance_to_hinge_region,0.000939,0.94,80,0.125


In [17]:
fingerprint_distance._add_weight_per_feature(feature_distances.data, {'physicochemical': [0]*8, 'distances': [0.25]*4, 'moments': [0]*3})

Unnamed: 0,feature_type,feature_name,distance,bit_coverage,bit_number,weights
0,physicochemical,size,0.0,0.94,80,0.0
1,physicochemical,hbd,0.0,0.94,80,0.0
2,physicochemical,hba,0.0,0.94,80,0.0
3,physicochemical,charge,0.0,0.94,80,0.0
4,physicochemical,aromatic,0.0,0.94,80,0.0
5,physicochemical,aliphatic,0.0,0.94,80,0.0
6,physicochemical,sco,0.004035,0.76,65,0.0
7,physicochemical,exposure,0.004225,0.94,80,0.0
8,distances,distance_to_centroid,0.001501,0.94,80,0.25
9,distances,distance_to_hinge_region,0.000939,0.94,80,0.25


In [18]:
a = fingerprint_distance._add_weight_column(feature_distances.data, {'physicochemical': [0]*8, 'distances': [0.25]*4, 'moments': [0]*3})
a

Unnamed: 0,feature_type,feature_name,distance,bit_coverage,bit_number,weights
0,physicochemical,size,0.0,0.94,80,0.0
1,physicochemical,hbd,0.0,0.94,80,0.0
2,physicochemical,hba,0.0,0.94,80,0.0
3,physicochemical,charge,0.0,0.94,80,0.0
4,physicochemical,aromatic,0.0,0.94,80,0.0
5,physicochemical,aliphatic,0.0,0.94,80,0.0
6,physicochemical,sco,0.004035,0.76,65,0.0
7,physicochemical,exposure,0.004225,0.94,80,0.0
8,distances,distance_to_centroid,0.001501,0.94,80,0.25
9,distances,distance_to_hinge_region,0.000939,0.94,80,0.25


In [19]:
fingerprint_distance.from_feature_distances(feature_distances, None)

In [20]:
fingerprint_distance.data

0.0015635017064432074

In [21]:
fingerprint_distance.molecule_codes

['HUMAN/ALK_6e0r_altA_chainA', 'HUMAN/ALK_6ebw_altA_chainA']