In [11]:
# data preprocessing
import pandas as pd
df = pd.read_csv('KiDatabase.csv')
df = df[df["species"] == "HUMAN"]
# print(df.columns)
df = df[df["ki Val"] <= 10000]
df = df[["Name"," Ligand Name","ki Val"]]
df.rename(columns={"Name":"protein"," Ligand Name":"drug","ki Val":"Ki"},inplace=True)
df.to_csv("KiDatabase_filtered.csv",index=False)



In [12]:
# Protein name to ENSP
import requests
def query_uniport_to_ENSP(protein_name):
    url = f"https://rest.uniprot.org/uniprotkb/search?query={protein_name}&format=json"
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json().get("results",[])
        for item in data:
            xrefs = item.get("uniProtKBCrossReferences",[])
            for ref in xrefs:
                if ref.get("database") == "STRING":
                    id = ref.get("id")
                    if "9606" in id:
                        return id
    except requests.exceptions.RequestException as e:
        print(e)
        return None
    return None

to_search = df["protein"].unique()
mapping = {}
for protein in to_search:
    ensp = query_uniport_to_ENSP(protein)
    if ensp:
        mapping[protein] = ensp
    else:
        print(f"Failed to map {protein}")
mapping

Failed to map 5-HT7L
Failed to map 5-HT7S
Failed to map 5-HT4b
Failed to map 5-HT4c
Failed to map 5-HT4d
Failed to map adrenergic Alpha2A
Failed to map 5-HT7b
Failed to map NPY-Y1
Failed to map PARATHYROID HT1
Failed to map GABA A Alpha2Beta3Gamma2
Failed to map GABA A Alpha3Beta3Gamma2
Failed to map GABA A Alpha5Beta3Gamma2
Failed to map GABA A Alpha6Beta3Gamma2
Failed to map GABA A Alpha1Beta1Gamma2
Failed to map GABA A Alpha1Beta2Gamma2
Failed to map 5-HT4hb
Failed to map GABA A Alpha4Beta3Gamma2
Failed to map GABA A Alpha4Gamma2
Failed to map GABA A Alpha2Beta1Gamma2
Failed to map GABA A Alpha3Beta1Gamma2
Failed to map GABA A Alpha5Beta1Gamma2
Failed to map GABA A Alpha4Beta2Gamma2
Failed to map GABA A Alpha6Beta2Gamma2
Failed to map Cholinergic, Nicotinic Alpha2Beta4
Failed to map Cholinergic, Nicotinic Alpha3Beta4x
Failed to map Cholinergic, Nicotinic Alpha4Beta4
Failed to map Cholinergic, muscarinic M3
Failed to map Cholinergic, muscarinic M4
Failed to map adrenergic Alpha1B
Fai

{'5-HT2C': '9606.ENSP00000276198',
 '5-HT1A': '9606.ENSP00000316244',
 '5-HT1D': '9606.ENSP00000363748',
 '5-HT2A': '9606.ENSP00000437737',
 '5-HT3': '9606.ENSP00000322617',
 'DOPAMINE D1': '9606.ENSP00000377353',
 'DOPAMINE D2': '9606.ENSP00000354859',
 'DOPAMINE D3': '9606.ENSP00000373169',
 'OPIATE Mu': '9606.ENSP00000394624',
 'OPIATE Delta': '9606.ENSP00000394624',
 'OPIATE Kappa': '9606.ENSP00000394624',
 '5-HT1F': '9606.ENSP00000322924',
 '5-HT1B': '9606.ENSP00000358963',
 '5-HT1E': '9606.ENSP00000307766',
 '5-HT2B': '9606.ENSP00000258400',
 '5-HT7': '9606.ENSP00000337949',
 'DOPAMINE D2 Short': '9606.ENSP00000354859',
 'DOPAMINE D2 Long': '9606.ENSP00000354859',
 'DOPAMINE D4': '9606.ENSP00000176183',
 'DOPAMINE D5': '9606.ENSP00000306129',
 '5-HT6': '9606.ENSP00000289753',
 'Dopamine Transporter': '9606.ENSP00000270349',
 '5-HT4': '9606.ENSP00000353915',
 'Neurokinin NK1': '9606.ENSP00000303522',
 '5-HT4a': '9606.ENSP00000400333',
 '5-HT4S': '9606.ENSP00000353915',
 'adrenergi

In [13]:
df["ENSP"] = df["protein"].map(mapping)
df["ENSP"] = df["ENSP"].str.replace("9606.","")
df.to_csv("data_filtered_ENSP.csv",index=False)
df["ENSP"].unique().shape

(445,)

In [11]:
# durg: target
import pandas as pd
df = pd.read_csv("data_filtered_ENSP.csv")
df = df[~df["ENSP"].isnull()]
df.to_csv("data_filtered_ENSP.csv",index=False)

In [12]:
drugs = df["drug"].unique()
print(drugs)
drug_mapping = {drug:[] for drug in drugs}
for _,row in df.iterrows():
    if row["ENSP"] is not None:
        drug_mapping[row["drug"]].append(row["ENSP"])
drug_mapping

['8-OH-DPAT' '5-CT' '5-Hydroxy Tryptamine' ...
 '3-[2-[Diphenethylamino]ethyl]phenol'
 '4-[2-[(Cyclobutylmethyl)(phenethyl)amino]ethyl]phenol Hydrochloride\t'
 '4-[2-[(Cyclopropylmethyl)(phenethyl)amino]ethyl]phenol Hydrochloride']


{'8-OH-DPAT': ['ENSP00000276198',
  'ENSP00000316244',
  'ENSP00000316244',
  'ENSP00000363748',
  'ENSP00000363748',
  'ENSP00000437737',
  'ENSP00000377353',
  'ENSP00000354859',
  'ENSP00000316244',
  'ENSP00000358963',
  'ENSP00000289753',
  'ENSP00000258400',
  'ENSP00000337949',
  'ENSP00000322924',
  'ENSP00000307766',
  'ENSP00000354859',
  'ENSP00000373169',
  'ENSP00000316244',
  'ENSP00000176183',
  'ENSP00000176183',
  'ENSP00000316244',
  'ENSP00000363748',
  'ENSP00000358963',
  'ENSP00000358963',
  'ENSP00000316244',
  'ENSP00000316244',
  'ENSP00000316244',
  'ENSP00000316244',
  'ENSP00000316244',
  'ENSP00000316244',
  'ENSP00000316244',
  'ENSP00000316244',
  'ENSP00000316244',
  'ENSP00000358963',
  'ENSP00000358963',
  'ENSP00000358963',
  'ENSP00000358963',
  'ENSP00000358963',
  'ENSP00000337949',
  'ENSP00000316244',
  'ENSP00000316244',
  'ENSP00000322924',
  'ENSP00000316244',
  'ENSP00000337949',
  'ENSP00000437737',
  'ENSP00000258400',
  'ENSP00000276198',


In [13]:
import json
with open("drug_target.json","w") as f:
    json.dump(drug_mapping,f)

In [None]:
import networkx as nx
import scipy.io

mat_file = "adjacency_matrix.mat"
mat = scipy.io.loadmat(mat_file)
adjacency_matrix = mat["adj_matrix"]
PPI = nx.from_scipy_sparse_array(adjacency_matrix)

<networkx.classes.graph.Graph at 0x15c803f1160>

In [None]:
node_to_index = json.load(open("node_to_index.json"))
index_to_node = json.load(open("index_to_node.json"))

{'ENSP00000000233': 0,
 'ENSP00000158762': 1,
 'ENSP00000357048': 2,
 'ENSP00000262305': 3,
 'ENSP00000329419': 4,
 'ENSP00000469035': 5,
 'ENSP00000256682': 6,
 'ENSP00000314615': 7,
 'ENSP00000331342': 8,
 'ENSP00000389095': 9,
 'ENSP00000480301': 10,
 'ENSP00000261890': 11,
 'ENSP00000434442': 12,
 'ENSP00000249923': 13,
 'ENSP00000306010': 14,
 'ENSP00000429900': 15,
 'ENSP00000482620': 16,
 'ENSP00000356737': 17,
 'ENSP00000324287': 18,
 'ENSP00000263245': 19,
 'ENSP00000296557': 20,
 'ENSP00000440005': 21,
 'ENSP00000418915': 22,
 'ENSP00000359000': 23,
 'ENSP00000484121': 24,
 'ENSP00000307634': 25,
 'ENSP00000281419': 26,
 'ENSP00000449270': 27,
 'ENSP00000000412': 28,
 'ENSP00000438085': 29,
 'ENSP00000376792': 30,
 'ENSP00000386443': 31,
 'ENSP00000311962': 32,
 'ENSP00000371175': 33,
 'ENSP00000349437': 34,
 'ENSP00000221957': 35,
 'ENSP00000001008': 36,
 'ENSP00000351646': 37,
 'ENSP00000350815': 38,
 'ENSP00000444810': 39,
 'ENSP00000401645': 40,
 'ENSP00000437125': 41,
 '

In [22]:
def protein_neighbors(ENSP):
    neighbors = PPI.neighbors(node_to_index[ENSP])
    return [index_to_node[str(neighbor)] for neighbor in neighbors]

In [24]:
# shortest path length between two proteins
def shortest_path_length(ENSP1,ENSP2):
    try:
        length = nx.shortest_path_length(PPI,node_to_index[ENSP1],node_to_index[ENSP2])
    except nx.NetworkXNoPath:
        length = -1
    return length

In [23]:
import numpy as np

In [26]:
def drug_target_interections(drug,target):
    drug_targets = drug_mapping[drug]
    res = 0
    for p in drug_targets:
        length = shortest_path_length(p,target)
        if length != -1:
            res += np.exp(-length**2)
    return res

drug = "8-OH-DPAT"
target = "ENSP00000269305"
drug_target_interections(drug,target)

0.31630225327194805