# Ligand-based validation

Check if we can retrieve on-/off-targets for a selected ligand (as reported by Karaman et al.) solely based on the structurally most similar kinases to respective main target of that ligand.

1. Select ligands to investigate: Erlotinib and Imatinib (target ligands).
2. Get target of Erlotinib and Imatinib: EGFR and ABL1 (target kinases).
3. Find top20 and top30 similar kinases to target kinase.
4. Save these top kinases in KinMap format for visualization using the KinMap website.

## Imports

In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
from pathlib import Path

import pandas as pd

In [21]:
pd.set_option('display.max_rows', 300)

## IO paths

In [5]:
PATH_TO_DATA = Path('/') / 'home' / 'dominique' / 'Documents' / 'data' / 'kinsim' / '20190724_full'
PATH_TO_KINSIM = Path('/') / 'home' / 'dominique' / 'Documents' / 'projects' / 'kinsim_structure'
path_to_similarities = PATH_TO_KINSIM / 'results' / 'similarity'
path_to_validation = PATH_TO_KINSIM / 'results' / 'validation' / 'lb_on_off_targets'

## Functions

In [6]:
def load_best_kinase_pairs(filename):
    
    path_to_best_kinase_pairs = path_to_similarities / filename
    
    best_kinase_pairs = pd.read_csv(
        path_to_similarities / path_to_best_kinase_pairs,
        index_col=0
    )
    
    print(best_kinase_pairs.shape)
    
    return best_kinase_pairs

In [7]:
def top_n_kinases(best_kinase_pairs, kinase_name, top_n):
    
    top_kinases_by_kinase = best_kinase_pairs[kinase_name].sort_values(ascending=False)[:top_n]
    
    return top_kinases_by_kinase

In [8]:
def kinmap_file(best_kinase_pairs, kinase_name, top_n, ligand_name):
    
    top_kinases_by_kinase = best_kinase_pairs[kinase_name].sort_values(ascending=False)[:top_n]
    
    kinmap = pd.DataFrame(
        [list(top_kinases_by_kinase.index), list(top_kinases_by_kinase)],
        index = ['xName', 'size']
    ).transpose()

    kinmap['size'] = kinmap['size'].apply(lambda x: x*40.0)
    kinmap['shape'] = 'pentagon'
    kinmap['fill'] = 'CornflowerBlue'

    kinmap.to_csv(path_to_results / f'kinmap_{kinase_name}_{ligand_name}_top{top_n}.csv')

    return kinmap

In [9]:
def main(filename, path_to_results, target_kinase, target_ligand):
    
    # Load best kinase pairs
    best_kinase_pairs = load_best_kinase_pairs(filename)
    
    # Generate KinMap file format
    kinmap_file(best_kinase_pairs, target_kinase, 20, target_ligand)
    kinmap_file(best_kinase_pairs, target_kinase, 30, target_ligand)

## Main

In [10]:
# Experiment 1

filename = 'best_scores_type2_ballester.csv'
path_to_results = path_to_validation / 'fp2_ballester'
target_kinase = 'EGFR'
target_ligand = 'erlotinib'

main(filename, path_to_results, target_kinase, target_ligand)

(253, 253)


In [11]:
# Experiment 2

filename = 'best_scores_type2_normballester.csv'
path_to_results = path_to_validation / 'fp2_normballester'
target_kinase = 'EGFR'
target_ligand = 'erlotinib'

main(filename, path_to_results, target_kinase, target_ligand)

(253, 253)


In [13]:
# Experiment 3

filename = 'best_scores_type2_ballester.csv'
path_to_results = path_to_validation / 'fp2_ballester'
target_kinase = 'ABL1'
target_ligand = 'imatinib'

main(filename, path_to_results, target_kinase, target_ligand)

(253, 253)


In [12]:
# Experiment 4

filename = 'best_scores_type2_normballester.csv'
path_to_results = path_to_validation / 'fp2_normballester'
target_kinase = 'ABL1'
target_ligand = 'imatinib'

main(filename, path_to_results, target_kinase, target_ligand)

(253, 253)


## Manual investigations

In [15]:
a = load_best_kinase_pairs('best_scores_type2_normballester.csv')

(253, 253)


In [25]:
b = a['ABL1'].sort_values(ascending=False).reset_index()

In [33]:
b

Unnamed: 0,kinase1,ABL1
0,ABL1,0.954562
1,ABL2,0.950548
2,BTK,0.877833
3,BRK,0.876332
4,ACK,0.869351
5,SYK,0.867508
6,EGFR,0.867334
7,FAK,0.862538
8,MET,0.858672
9,ALK,0.856875


In [35]:
b[b.kinase1 == 'PDGFRb']

Unnamed: 0,kinase1,ABL1
156,PDGFRb,0.778


In [36]:
klifs_metadata = pd.read_csv(PATH_TO_KINSIM / 'data' / 'postprocessed' / 'klifs_metadata_postprocessed.csv')

In [37]:
klifs_metadata.shape

(3878, 24)

In [40]:
klifs_metadata[klifs_metadata.kinase.isin('DDR1 DDR2 KIT FMS PDGFRa PDGFRb'.split())]

Unnamed: 0.1,Unnamed: 0,metadata_index,kinase,family,groups,pdb_id,chain,alternate_model,species,ligand_orthosteric_name,...,ac_helix,rmsd1,rmsd2,qualityscore,pocket,resolution,missing_residues,missing_atoms,full_ifp,code
2420,2420,283,KIT,PDGFR,TK,1pkg,A,-,Human,ADENOSINE-5'-DIPHOSPHATE,...,in,0.776,2.104,8.0,KTLGAGAFGKVVEVAVKMLALMSELKVLSYLGMNIVNLLGALVITE...,2.9,0,0,0000000000000010000001000000100000000010000000...,HUMAN/KIT/1pkg_chainA
2809,2809,977,PDGFRb,PDGFR,TK,3lcd,A,A,Human,"N~3~-(2,6-dichlorobenzyl)-5-(4-{[(2R)-2-(pyrro...",...,in,0.785,2.102,9.4,KTLGAGAFGKVVEVAVKMLALMSELKIMSHLGENIVNLLGALVITE...,2.5,0,6,1000000000000010000001000000000000000000000000...,HUMAN/PDGFRb/3lcd_chainA_altA


In [51]:
klifs_metadata = pd.read_csv(PATH_TO_DATA / 'preprocessed' / 'klifs_metadata_unfiltered.csv')

In [52]:
klifs_metadata.shape

(10136, 22)

In [55]:
imatinib = 'DDR1 DDR2 KIT FMS PDGFRa PDGFRb'.split()

In [68]:
klifs_metadata[klifs_metadata.kinase.isin(imatinib)]

Unnamed: 0.1,Unnamed: 0,kinase,family,groups,pdb_id,chain,alternate_model,species,ligand_orthosteric_name,ligand_orthosteric_pdb_id,...,dfg,ac_helix,rmsd1,rmsd2,qualityscore,pocket,resolution,missing_residues,missing_atoms,full_ifp
276,276,KIT,PDGFR,TK,4u0i,A,B,Human,"3-(imidazo[1,2-b]pyridazin-3-ylethynyl)-4-meth...",0LI,...,out,out,0.919,2.321,7.6,KTLGAGAFGKVVEVAVKMLALMSELKVLSYLGMNIVNLLGALVITE...,2.0,0,0,0000000000000010000000000000000000000000000000...
283,283,KIT,PDGFR,TK,1pkg,A,-,Human,ADENOSINE-5'-DIPHOSPHATE,ADP,...,in,in,0.776,2.104,8.0,KTLGAGAFGKVVEVAVKMLALMSELKVLSYLGMNIVNLLGALVITE...,2.9,0,0,0000000000000010000001000000100000000010000000...
286,286,KIT,PDGFR,TK,1t46,A,A,Human,4-(4-METHYL-PIPERAZIN-1-YLMETHYL)-N-[4-METHYL-...,STI,...,out,out,0.907,2.309,7.6,KTLGAGAFGKVVEVAVKMLALMSELKVLSYLGMNIVNLLGALVITE...,1.6,0,0,0000000000000010000000000000000000000000000000...
292,292,KIT,PDGFR,TK,1t45,A,B,Human,-,-,...,out-like,out,1.008,2.398,7.2,KTLGAGAFGKVVEVAVKMLALMSELKVLSYLGMNIVNLLGALVITE...,1.9,0,0,
311,311,KIT,PDGFR,TK,4u0i,A,A,Human,"3-(imidazo[1,2-b]pyridazin-3-ylethynyl)-4-meth...",0LI,...,out,out,0.919,2.321,7.6,KTLGAGAFGKVVEVAVKMLALMSELKVLSYLGMNIVNLLGALVITE...,2.0,0,0,0000000000000010000000000000000000000000000000...
314,314,KIT,PDGFR,TK,1t46,A,B,Human,4-(4-METHYL-PIPERAZIN-1-YLMETHYL)-N-[4-METHYL-...,STI,...,out,out,0.907,2.309,7.6,KTLGAGAFGKVVEVAVKMLALMSELKVLSYLGMNIVNLLGALVITE...,1.6,0,0,0000000000000010000000000000000000000000000000...
316,316,KIT,PDGFR,TK,1pkg,B,-,Human,ADENOSINE-5'-DIPHOSPHATE,ADP,...,in,in,0.777,2.106,8.0,KTLGAGAFGKVVEVAVKMLALMSELKVLSYLGMNIVNLLGALVITE...,2.9,0,0,0000000000000010000001000000100000010000000000...
328,328,KIT,PDGFR,TK,4hvs,A,-,Human,"5-(1H-pyrrolo[2,3-b]pyridin-3-ylmethyl)-N-[4-(...",647,...,out,out,0.958,2.341,7.2,KTLGAGAFGKVVEVAVKMLALMSELKVLSYLGENIVNLLGALVITE...,1.9,0,0,0000000000000010000000000000000000000000000000...
329,329,KIT,PDGFR,TK,3g0e,A,A,Human,N-[2-(diethylamino)ethyl]-5-[(Z)-(5-fluoro-2-o...,B49,...,out-like,out,0.954,2.366,7.2,KTLGAGAFGKVVEVAVKMLALMSELKVLSYLGMNIVNLLGALVITE...,1.6,0,0,0000000000000010000000000000000000000000000000...
337,337,KIT,PDGFR,TK,3g0f,B,-,Human,N-[2-(diethylamino)ethyl]-5-[(Z)-(5-fluoro-2-o...,B49,...,out,out,0.934,2.28,8.0,KTLGAGAFGKVVEVAVKMLALMSELKVLSYLGMNIVNLLGALVITE...,2.6,0,0,0000000000000010000000000000000000000000000000...


In [60]:
klifs_metadata[klifs_metadata.kinase.isin(imatinib)]['kinase family groups pdb_id dfg'.split()]

Unnamed: 0,kinase,family,groups,pdb_id,dfg
276,KIT,PDGFR,TK,4u0i,out
283,KIT,PDGFR,TK,1pkg,in
286,KIT,PDGFR,TK,1t46,out
292,KIT,PDGFR,TK,1t45,out-like
311,KIT,PDGFR,TK,4u0i,out
314,KIT,PDGFR,TK,1t46,out
316,KIT,PDGFR,TK,1pkg,in
328,KIT,PDGFR,TK,4hvs,out
329,KIT,PDGFR,TK,3g0e,out-like
337,KIT,PDGFR,TK,3g0f,out
