__Aim:__
- [x] Exploring PrimeKG to find potential drugs for targets listed through multi-omics data integration.
- [x] List of potential drugs for combination with DAC to target genes listed through multi-omics data integration.
- [x] Evaluation of _Clinical Phase_ for listed drugs.
- [ ] Rank drugs for potential experimental validations.
- [ ] GDSC ...

__Contributions:__
- Expanding TDC data loader for PrimeKG
  - https://github.com/mims-harvard/PrimeKG#dataloader-therapeutics-data-commons
  - https://github.com/mims-harvard/PrimeKG/pull/12#issuecomment-1741878955

<!-- 
__Related works/links:__

- https://github.com/AstraZeneca/skywalkR-graph-features

> KR4SL: knowledge graph reasoning for explainable prediction of synthetic lethality 
> - https://doi.org/10.1093/bioinformatics/btad261

- https://tdcommons.ai/multi_pred_tasks/ppi/ -->

___

### Load ...

In [1]:
import numpy as np 
import pandas as pd
import anndata as ad
import screenpro

from screenpro.load import loadScreenProcessingData, read_adata_pkl

In [2]:
import matplotlib.pyplot as plt

from matplotlib import font_manager as fm
from matplotlib import rcParams

font_files = fm.findSystemFonts(fontpaths=None, fontext='ttf')

for font_file in font_files:
    fm.fontManager.addfont(font_file)

# {f.name for f in matplotlib.font_manager.fontManager.ttflist}

rcParams['font.family'] = ['Arial']

___

In [3]:
import igraph as ig

### Drug KG

In [4]:
!mkdir -p datasets

In [5]:
import pandas as pd

from tdc.multi_pred import DrugRes
from tdc.resource import PrimeKG

In [6]:
from tdc.knowledge_graph import KnowledgeGraph

In [7]:
# Drug Response Prediction Task Overview
# Y is the log normalized IC50. This is the version 2 of GDSC, which uses improved experimental procedures.

# https://tdcommons.ai/multi_pred_tasks/drugres/

In [8]:
GDSC1 = DrugRes(name = 'GDSC1', path = './datasets/GDSC1')
GDSC2 = DrugRes(name = 'GDSC2', path = './datasets/GDSC2')

primekg = PrimeKG(path = './datasets/PrimeKG').to_KG()

Found local copy...
Loading...
Done!
Found local copy...
Loading...
Done!
Found local copy...
Loading...


### DAC + X Drug

In [9]:
primekg_drug_target = primekg.copy()

primekg_drug_target.run_query(query='relation == "drug_protein" & display_relation == "target"')

In [10]:
durgs = primekg.get_nodes_by_source('DrugBank')

In [11]:
durgs

Unnamed: 0,id,type,name,source
0,DB09130,drug,Copper,DrugBank
1,DB09140,drug,Oxygen,DrugBank
2,DB00180,drug,Flunisolide,DrugBank
3,DB00240,drug,Alclometasone,DrugBank
4,DB00253,drug,Medrysone,DrugBank
...,...,...,...,...
7952,DB01486,drug,Cathine,DrugBank
7953,DB11104,drug,Sulfur hexafluoride,DrugBank
7954,DB00639,drug,Butoconazole,DrugBank
7955,DB00538,drug,Gadoversetamide,DrugBank


In [12]:
primekg_dac_synergy = primekg.copy()

primekg_dac_synergy.run_query('(x_name == "Decitabine" | y_name == "Decitabine")&(display_relation == "synergistic interaction")')

In [13]:
primekg_dac_synergy_drug_names = primekg_dac_synergy.get_nodes_by_source(source='DrugBank').name.to_list()

___

see Figure 4B – https://biorxiv.org/content/10.1101/2022.12.14.518457v2

In [14]:
target_genes = [
    "PMPCA","RNF126","SLC7A6","DHODH","ZNF777","SQLE","MYBBP1A",
    "RBM14-RBM4","INTS5","INO80D",
    'BCL2'
] 
# + ['DNMT1']

In [15]:
target_genes

['PMPCA',
 'RNF126',
 'SLC7A6',
 'DHODH',
 'ZNF777',
 'SQLE',
 'MYBBP1A',
 'RBM14-RBM4',
 'INTS5',
 'INO80D',
 'BCL2']

In [16]:
primekg_drugs_for_combo = primekg_drug_target.copy()
primekg_drugs_for_combo.run_query(f'x_name in {target_genes} | y_name in {target_genes}')

In [17]:
primekg_drugs_for_combo.get_nodes_by_source('NCBI')

Unnamed: 0,id,type,name,source
0,596,gene/protein,BCL2,NCBI
1,1723,gene/protein,DHODH,NCBI
2,6713,gene/protein,SQLE,NCBI


In [18]:
primekg_dac_synergy_drugs_for_combo = primekg_drugs_for_combo.copy()
primekg_dac_synergy_drugs_for_combo.run_query(f'x_name in {primekg_dac_synergy_drug_names} | y_name in {primekg_dac_synergy_drug_names}')

In [19]:
primekg_dac_synergy_drugs_for_combo_list = primekg_dac_synergy_drugs_for_combo.get_nodes_by_source(source='DrugBank').name.to_list()

### 
prep a table for paper...

In [20]:
table_0 = primekg_drugs_for_combo.df.query('x_type=="drug"')#.set_index(['y_name','x_id'])[['x_name']]

table_0['dac_synergy'] = table_0.x_name.isin(primekg_dac_synergy_drugs_for_combo_list)
table_0.sort_values(['y_name','dac_synergy'],ascending=False,inplace=True)

In [21]:
table_1 = table_0[['y_name','x_id','x_name','dac_synergy']].rename(columns={'y_name':'target','x_id':'DrugBank','x_name':'Drug full name'}).set_index(['target','DrugBank'])

In [22]:
drug_targets = {}

for drug in table_1.reset_index().DrugBank:
    drug_kg = primekg_drug_target.copy()
    drug_kg.run_query(f'x_id == "{drug}" | y_id == "{drug}"')
    
    drug_targets[drug] = ','.join(drug_kg.get_nodes_by_source('NCBI').name.to_list())

table_1['drug_targets'] = drug_targets.values()

del drug_targets

In [23]:
table_1

Unnamed: 0_level_0,Unnamed: 1_level_0,Drug full name,dac_synergy,drug_targets
target,DrugBank,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
SQLE,DB00735,Naftifine,False,SQLE
SQLE,DB00857,Terbinafine,False,SQLE
SQLE,DB01091,Butenafine,False,SQLE
SQLE,DB08846,Ellagic acid,False,"CA1,CA2,CA4,SQLE,PRKCB,PRKCA,CSNK2A1,SYK,CA12,..."
DHODH,DB01097,Leflunomide,True,"DHODH,PTK2B,AHR"
DHODH,DB03523,Brequinar,True,DHODH
DHODH,DB08880,Teriflunomide,True,DHODH
DHODH,DB01117,Atovaquone,False,DHODH
DHODH,DB02262,Orotic acid,False,DHODH
DHODH,DB02613,Capric dimethyl amine oxide,False,"DHODH,PNLIPRP2"


In [24]:
table_1.to_excel('DAC_combo_candidates.xlsx')

### Clinical Phase

Manually searched here ... https://clue.io/repurposing-app
<!--     
    - leflunomide	Launched
    - brequinar	Phase 2
    - teriflunomide	Launched
    - butenafine	Launched
    - terbinafine	Launched
    - atovaquone	Launched
    - manitimus	Preclinical
    - naftifine	Launched -->

In [32]:
# '","'.join(table_1['Drug full name'])

In [30]:
cell_lines = ['HL-60','MOLM-13']

___

### GDSC ...

In [31]:
# GDSC1.get_data().query(f"Drug_ID in {primekg_dac_synergy_drugs_for_combo_list} &  `Cell Line_ID` in {cell_lines}").sort_values('Y',ascending=False)

In [32]:
GDSC2.get_data().query(f"Drug_ID in {primekg_drugs_for_combo.get_nodes_by_source('DrugBank').name.to_list()} &  `Cell Line_ID` in {cell_lines}").sort_values('Y',ascending=False)

Unnamed: 0,Drug_ID,Drug,Cell Line_ID,Cell Line,Y
47559,Leflunomide,CC1=C(C=NO1)C(=O)NC2=CC=C(C=C2)C(F)(F)F,HL-60,"[3.3509853404098497, 2.99626031382298, 9.94965...",4.818404
48115,Leflunomide,CC1=C(C=NO1)C(=O)NC2=CC=C(C=C2)C(F)(F)F,MOLM-13,"[3.4406201832575, 2.92685171367912, 9.11426679...",2.624924
4792,Navitoclax,CC1(CCC(=C(C1)CN2CCN(CC2)C3=CC=C(C=C3)C(=O)NS(...,HL-60,"[3.3509853404098497, 2.99626031382298, 9.94965...",-0.887512
5286,Navitoclax,CC1(CCC(=C(C1)CN2CCN(CC2)C3=CC=C(C=C3)C(=O)NS(...,MOLM-13,"[3.4406201832575, 2.92685171367912, 9.11426679...",-1.97875
67374,Venetoclax,CC1(CCC(=C(C1)C2=CC=C(C=C2)Cl)CN3CCN(CC3)C4=CC...,HL-60,"[3.3509853404098497, 2.99626031382298, 9.94965...",-4.545733
19635,Paclitaxel,CC1=C2[C@H](C(=O)[C@@]3([C@H](C[C@@H]4[C@]([C@...,HL-60,"[3.3509853404098497, 2.99626031382298, 9.94965...",-4.939358
3242,Docetaxel,CC1=C2[C@H](C(=O)[C@@]3([C@H](C[C@@H]4[C@]([C@...,HL-60,"[3.3509853404098497, 2.99626031382298, 9.94965...",-5.69119
67914,Venetoclax,CC1(CCC(=C(C1)C2=CC=C(C=C2)Cl)CN3CCN(CC3)C4=CC...,MOLM-13,"[3.4406201832575, 2.92685171367912, 9.11426679...",-5.782811
20211,Paclitaxel,CC1=C2[C@H](C(=O)[C@@]3([C@H](C[C@@H]4[C@]([C@...,MOLM-13,"[3.4406201832575, 2.92685171367912, 9.11426679...",-5.874419
3786,Docetaxel,CC1=C2[C@H](C(=O)[C@@]3([C@H](C[C@@H]4[C@]([C@...,MOLM-13,"[3.4406201832575, 2.92685171367912, 9.11426679...",-6.550864


### Drug -> AML
Finding links between drugs and AML phenotypes in PrimeKG ...

In [27]:
# `Cell Line_ID` in ["HL-60","MOLM-13"] &

In [28]:
# g = PrimeKG.to_nx()

In [29]:
# g = ig.Graph.from_networkx(g)

this is problematic – https://github.com/mims-harvard/TDC/blob/6af2a41679a0699446ad627be8051504548e86fa/tdc/resource/primekg.py#L30

# 

In [33]:
from watermark import watermark
print(
    watermark()
)
print('_'*80)
print(
    watermark(iversions=True, globals_=globals())
)

Last updated: 2023-10-15T04:12:44.821774-07:00

Python implementation: CPython
Python version       : 3.9.16
IPython version      : 8.14.0

Compiler    : GCC 11.3.0
OS          : Linux
Release     : 3.10.0-957.27.2.el7.x86_64
Machine     : x86_64
Processor   : x86_64
CPU cores   : 64
Architecture: 64bit

________________________________________________________________________________
anndata   : 0.9.1
matplotlib: 3.7.2
screenpro : 0.2.3
pandas    : 2.0.3
igraph    : 0.10.4
numpy     : 1.24.4

