In [1]:
'''
Get first hop PPI targets of known targets of drug candidates and intersect them with IPF DEGs.
Run enrichment on these selected genes to look for potential drug mechanisms
'''
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import requests
from io import StringIO

In [7]:
# read all necessary data including gene id table, PPI table from Toppgene, IPF deg and drug targets
try:
    path = 'E:/Box Sync/Jake-Jegga/IPF Drug Discovery'
    if path not in os.getcwd():  
        os.chdir(path)
except:
    path = 'M:/Box/Jake-Jegga/IPF Drug Discovery'
    if path not in os.getcwd():  
        os.chdir(path)
ipf_deg = pd.read_excel('Intermediate results/Deg lists included in paper/IPF FC06 pval005 protein coding DEG network.xlsx',index_col=0)
PPI_data = StringIO(requests.post("https://toppgene.cchmc.org/table.jsp", data={"category":'Interaction'}).text)
ppi = pd.read_csv(PPI_data,sep='\t',index_col=0)
geneidtable = pd.read_csv('../../Ontology_Info/Master Gene Conversion Table.csv',index_col=0,dtype = str)
dc_targets = pd.read_excel('./Results/Combined report from 6 IPF mocroarry queries.xlsx',index_col=0,sheetname=0)
# preprocessing tables
geneidtable = geneidtable.h_entrez_id.dropna().drop_duplicates()
dc_targets = dc_targets.merge(pd.DataFrame(geneidtable),left_on='Target',right_index=True,how='inner')
# reformat ppi table 
ppi['source'] = ppi.concept_name.apply(lambda x: x.split(' ')[0])
ppi = ppi.merge(pd.DataFrame(geneidtable),left_on='source',right_index=True,how = 'left')
ppi.set_index('h_entrez_id',inplace=True)
ppi = ppi.merge(pd.DataFrame(geneidtable),left_on='symbol',right_index=True,how = 'left')
ppi.drop_duplicates(['source','h_entrez_id'],inplace = True)
ppi = ppi[ppi.symbol!=ppi.source]
# get conserved IPF degs
ipf_deg['score'] = ipf_deg.iloc[:,2]-ipf_deg.iloc[:,3]
ipf_cm = ipf_deg[abs(ipf_deg.score)>=2]
ipf_cm_deg = geneidtable.loc[ipf_cm.index.unique()].dropna().values
# get new clue repurpose data
# The NEW data is very different from our old one!
drug_candidates = dc_targets.index.tolist() + ['nintedanib','pirfenidone']
# drug_candidates = pd.read_clipboard(index_col=0).index.tolist()+['nintedanib','pirfenidone']
clue = pd.read_table('../../Lincs_data/All 6402 cmpd metadata from clue.txt',index_col=0)
clue_targets = clue.Target.str.split(', ',expand=True).stack().reset_index()
clue_targets = clue_targets.set_index('Name').iloc[:,1]
clue_targets = clue_targets.loc[drug_candidates].dropna()
clue_targets = pd.merge(pd.DataFrame(clue_targets),pd.DataFrame(geneidtable),left_on=0,right_index=True,how='inner')
clue_targets = clue_targets.reset_index().drop_duplicates().set_index('Name')
clue_targets.columns = ['Target','h_entrez_id']

In [9]:
dc_targets = clue_targets.copy()
TC_target_network = pd.DataFrame()
for cmpd in dc_targets.index.unique():
    _targets = dc_targets.loc[cmpd,'h_entrez_id']
    if isinstance(_targets,str):
        _targets = [_targets]
    else:
        _targets = _targets.tolist()
    _targets_ppi = ppi.loc[_targets,'h_entrez_id'].dropna().values
    _total_interactants = len(_targets_ppi)-1
    _targets_ppi = [x for x in _targets_ppi if x in ipf_cm_deg]
    _ipf_deg_interactants = len([x for x in _targets_ppi if x not in _targets])
    _targets_ppi += _targets
    _targets_ppi_symbol = geneidtable[geneidtable.isin(_targets_ppi)].index.values
    _targets_symbol = dc_targets.loc[cmpd,'Target']
    _tmp_df = pd.DataFrame(cmpd,index = _targets_ppi_symbol,columns=['Compound'])
    _tmp_df.loc[_targets_symbol,'Direct_target'] = 'Yes'
    _tmp_df = _tmp_df.join(ipf_cm.score,how = 'left').reset_index()
    _tmp_df.drop_duplicates(inplace=True)
    TC_target_network = TC_target_network.append(_tmp_df)
TC_target_network.set_index('index',inplace=True)
direct_tar = TC_target_network.Direct_target.dropna().index.unique()
dt_ppi = ppi[ppi.source.isin(direct_tar)]
dt_ppi_total_counts = dt_ppi.source.value_counts()
dt_ppi_ipf = dt_ppi[dt_ppi.symbol.isin(geneidtable[geneidtable.isin(ipf_cm_deg)].index.unique())]
dt_ppi_ipf_counts = dt_ppi_ipf.source.value_counts()
dt_ppi_ratio = pd.DataFrame(dt_ppi_ipf_counts/dt_ppi_total_counts)
dt_ppi_ratio.columns = ['ratio_ipf_genes']
TC_target_network = TC_target_network.merge(dt_ppi_ratio,left_index=True,right_index=True,how='left')

all_genes = TC_target_network.index.unique()
valid_ppi = ppi.set_index('source').loc[all_genes,'symbol']
valid_ppi = valid_ppi[valid_ppi.index!=valid_ppi.values]
valid_ppi = valid_ppi[valid_ppi.isin(all_genes)]
valid_ppi.name = 'Compound'
TC_target_network = TC_target_network.append(pd.DataFrame(valid_ppi))
TC_target_network.to_csv('Drug Target+PPI network.csv')