In [1]:
import pandas as pd
import numpy as np
import seaborn as sn
import matplotlib.pyplot as plt
import networkx as nx
from node2vec import Node2Vec



In [126]:
drugs_used = pd.read_csv('../data/Drug_sensitivity_dose-level_(PRISM_Repurposing_Secondary_Screen).csv', index_col=0)
drugs_used = drugs_used.columns.str.split(' ').str[0].drop_duplicates()
drugs_used

Index(['8-BROMO-CGMP', 'NORETYNODREL', 'PREDNISOLONE-ACETATE', 'BETAMETHASONE',
       'MEPIVACAINE', 'XL888', 'METOPROLOL', 'METHSCOPOLAMINE', 'LAPPACONITE',
       'TERFENADINE',
       ...
       'LORLATINIB', 'HEXYLRESORCINOL', 'BOSUTINIB', 'AMMONIUM-LACTATE',
       'NEMONAPRIDE', 'CROMAKALIM', 'EFONIDIPINE-MONOETHANOLATE',
       'DICHLOROACETATE', 'TYLOXAPOL', 'SEVELAMER'],
      dtype='object', length=1442)

(1442,)

In [141]:
df = pd.read_csv('../data/Repurposing_Public_23Q2_Extended_Primary_Compound_List.csv')[['Drug.Name', 'repurposing_target', 'MOA']]
df = df.rename(columns = {'Drug.Name': 'DRUG_NAME', 'repurposing_target': 'TARGET', 'MOA': 'TARGET_PATHWAY'})
df = df.dropna() #df[(df['TARGET']!= 'NaN') & (df['TARGET_PATHWAY']!= 'NaN')]
df = df[np.isin(df['DRUG_NAME'], drugs_used)]
df.iloc[20:30,:]

Unnamed: 0,DRUG_NAME,TARGET,TARGET_PATHWAY
111,CARVEDILOL,"ADRA1A, ADRA1B, ADRA1D, ADRA2A, ADRA2B, ADRA2C...",ADRENERGIC RECEPTOR ANTAGONIST
113,NADIDE,"AHCY, AKR1A1, ALDH2, BLVRA, DHPS, DLD, GALE, H...",ELECTRON ACCEPTOR
120,LORAZEPAM,"GABRA1, GABRA2, GABRA3, GABRA4, GABRA5, GABRA6...",BENZODIAZEPINE RECEPTOR AGONIST
122,NUTLIN-3,"MDM2, TP53",MDM INHIBITOR
124,USNIACIN-(+),PTPN1,MAP KINASE ACTIVATOR
127,POMALIDOMIDE,"CRBN, PTGS2, TNF","ANGIOGENESIS INHIBITOR, TUMOR NECROSIS FACTOR ..."
129,FLUOROMETHOLONE,NR3C1,GLUCOCORTICOID RECEPTOR AGONIST
132,ORNITHINE,"ARG1, ARG2, GATM, GPRC6A, OAT, OAZ1, OAZ2, OAZ...",NFKB PATHWAY MODULATOR
135,TC1,BACE1,BETA-SECRETASE INHIBITOR
137,OCTOPAMINE,"F10, TAAR1",TRACE AMINE ASSOCIATED RECEPTOR AGONIST


In [142]:
df  = df.set_index(['DRUG_NAME', 'TARGET_PATHWAY']).apply(lambda x: x.str.split(',').explode()).reset_index()

In [143]:
PATHWAY_network = df[['DRUG_NAME', 'TARGET_PATHWAY']].rename(columns={'TARGET_PATHWAY': 'TARGET'})
DRUG_NAME_network = pd.concat([df['DRUG_NAME'], df[['DRUG_NAME']].rename(columns= {'DRUG_NAME': 'TARGET'})], axis=1)
DRUG_NAME_network['TARGET'] = DRUG_NAME_network['TARGET'] + '_tar'

small_network = df[['DRUG_NAME', 'TARGET']]
#small_network.to_csv('../use_data/small_compund_network.csv')
small_network

Unnamed: 0,DRUG_NAME,TARGET
0,8-BROMO-CGMP,PRKG1
1,NORETYNODREL,PGR
2,PREDNISOLONE-ACETATE,NR3C1
3,BETAMETHASONE,NR3C1
4,MEPIVACAINE,SCN10A
...,...,...
3870,IMATINIB,NTRK1
3871,IMATINIB,PDGFRA
3872,IMATINIB,PDGFRB
3873,IMATINIB,RET


In [144]:
g = nx.from_pandas_edgelist(small_network, 'DRUG_NAME', 'TARGET')
plt.rcParams['figure.figsize'] = [15, 15]

In [145]:
#nx.draw(g, node_size=5)

In [146]:
large_network = pd.concat([small_network, PATHWAY_network, DRUG_NAME_network], axis=0).drop_duplicates()
g_large = nx.from_pandas_edgelist(large_network, 'DRUG_NAME', 'TARGET')
g_large
large_network.to_csv('../use_data/compound_edge_list.csv')

In [147]:
#nx.draw(g_large, node_size=10)

In [148]:
node2vec = Node2Vec(g_large, dimensions=500, walk_length=20, num_walks=200, workers=7) #4 walk_length:30

Computing transition probabilities:   0%|          | 0/3981 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 29/29 [00:07<00:00,  3.71it/s]
Generating walks (CPU: 2): 100%|██████████| 29/29 [00:07<00:00,  3.74it/s]
Generating walks (CPU: 5): 100%|██████████| 28/28 [00:07<00:00,  3.72it/s]
Generating walks (CPU: 6): 100%|██████████| 28/28 [00:07<00:00,  3.80it/s]
Generating walks (CPU: 3): 100%|██████████| 29/29 [00:07<00:00,  3.63it/s]
Generating walks (CPU: 7): 100%|██████████| 28/28 [00:07<00:00,  3.85it/s]
Generating walks (CPU: 4): 100%|██████████| 29/29 [00:08<00:00,  3.57it/s]


In [149]:
model = node2vec.fit(window=10, min_count=1, batch_words=4) #batch_words=4

In [150]:
embeddings = pd.DataFrame({drugname: model.wv[drugname] for drugname in df['DRUG_NAME']})

In [151]:
embeddings.to_csv('../use_data/embeddings.csv')

In [160]:

from numpy import dot
from numpy.linalg import norm

def cos(a,b):
    return dot(a, b)/(norm(a)*norm(b))
print(cos(embeddings['POZIOTINIB'], embeddings['DACOMITINIB']))
print(cos(embeddings['POZIOTINIB'], embeddings['CISPLATIN']))
print(cos(embeddings['VEMURAFENIB'], embeddings['DABRAFENIB']))
print(cos(embeddings['VEMURAFENIB'], embeddings['CISPLATIN']))
print(cos(embeddings['VEMURAFENIB'], embeddings['DACOMITINIB']))


0.8476811
0.312396
0.8235181
0.33285716
0.34151053


In [161]:
res = []
filt = np.isin(embeddings.columns, drugs_used)

filtered_embeddings  = embeddings.iloc[:, filt]

dat = np.array(filtered_embeddings)

for row in range(dat.shape[1]):
#for row in range(50):
    if row%200==0:
        print(row)
        
    cols = []
    for col in range(dat.shape[1]):
        cols.append(cos(dat[:,row], dat[:,col]))
    res.append(np.array(cols))

0
200
400
600
800
1000


In [162]:
results = np.stack(res, axis=0)
results.shape
np.fill_diagonal(results,0)

In [163]:
results.argmax()

38766

In [164]:
#results.sort()

In [165]:
results_frame = pd.DataFrame(results, index = filtered_embeddings.columns, columns = filtered_embeddings.columns)

In [166]:
results_frame_long = results_frame.melt(ignore_index=False)

In [167]:
sorted_values = results_frame_long.sort_values('value', ascending=False)
sorted_values

Unnamed: 0,variable,value
TALTIRELIN,PROTIRELIN,0.995579
PROTIRELIN,TALTIRELIN,0.995579
IMIDAPRIL,TEMOCAPRIL,0.994493
TEMOCAPRIL,IMIDAPRIL,0.994493
CHICAGO-SKY-BLUE-6B,4-IODO-6-PHENYLPYRIMIDINE,0.991556
...,...,...
TRAPIDIL,TRAPIDIL,0.000000
SB-366791,SB-366791,0.000000
SB-205384,SB-205384,0.000000
CLOMIFENE,CLOMIFENE,0.000000


In [170]:
sorted_values.to_numpy()[:50]

array([['PROTIRELIN', 0.9955785274505615],
       ['TALTIRELIN', 0.9955785274505615],
       ['TEMOCAPRIL', 0.9944934844970703],
       ['IMIDAPRIL', 0.9944934844970703],
       ['4-IODO-6-PHENYLPYRIMIDINE', 0.9915556907653809],
       ['CHICAGO-SKY-BLUE-6B', 0.9915556907653809],
       ['BI-78D3', 0.9890983700752258],
       ['SU3327', 0.9890983700752258],
       ['FK-866', 0.9885023832321167],
       ['STF-118804', 0.9885023832321167],
       ['DOXYCYCLINE', 0.986400842666626],
       ['WAY-170523', 0.986400842666626],
       ['SAXAGLIPTIN', 0.9843536019325256],
       ['ALOGLIPTIN', 0.9843536019325256],
       ['EMBELIN', 0.9692172408103943],
       ['CISPLATIN', 0.9692172408103943],
       ['TERIFLUNOMIDE', 0.965831995010376],
       ['BREQUINAR', 0.965831995010376],
       ['PF-3845', 0.9635732173919678],
       ['PF-04457845', 0.9635732173919678],
       ['PUROMYCIN', 0.9634006023406982],
       ['ANISOMYCIN', 0.9634006023406982],
       ['NAPABUCASIN', 0.9615297317504883],
     