In [1]:
import pickle
import networkx as nx
import numpy as np
import pandas as pd
import os
import scipy
import matplotlib.pyplot as plt
from collections import deque
from statsmodels.stats.multitest import fdrcorrection
from obrwr import futils as fu 
from obrwr import path_extract as pe

In [2]:
cell_lines = ['BT20','BT549','MCF7','UACC812']
list_of_activators = ['EGF', 'FGF1', 'HGF', 'IGF1', 'INS', 'NRG1']
dict_of_actual_experiments = {'BT20':['EGF', 'FGF1', 'HGF', 'IGF1', 'INS', 'NRG1'],
                             'BT549':['EGF', 'FGF1', 'HGF', 'INS'],
                             'MCF7':['EGF', 'FGF1', 'HGF', 'IGF1', 'INS', 'NRG1'],
                             'UACC812':['EGF', 'FGF1', 'HGF', 'IGF1', 'INS', 'NRG1']}

## Building path without the statistical test

In [5]:
data_dir = "../05_Output/HPNObrwr_novoom"
for cell_line,activators in dict_of_actual_experiments.items():
    for activator in activators :
        experiment = '_'.join(['HPNObrwr_novoom',cell_line,activator,])
        folder_exp = '/'.join([data_dir,cell_line,activator])
        folder = '/'.join([folder_exp,'random'])
        try:
            OriginalG = nx.read_gml(folder_exp+'/'+'_'.join(['hpn_subgraph',experiment])+'.gml')
        except :
            print(f'No folder : {folder_exp} \n ... skipping')
            continue
        targets_attr = nx.get_node_attributes(OriginalG,'Targets')
        original_targets = [n for n in OriginalG.nodes() if targets_attr[n]]
        
        Stot = np.sum(list(nx.get_node_attributes(OriginalG,'Pi'+experiment).values()))
        if np.abs(Stot - 1e6) < 1e-2 :
            print(f'OK {experiment}')
        else:
            print(f'NOK {experiment}')

OK HPNObrwr_novoom_BT20_EGF
OK HPNObrwr_novoom_BT20_FGF1
OK HPNObrwr_novoom_BT20_HGF
OK HPNObrwr_novoom_BT20_IGF1
OK HPNObrwr_novoom_BT20_INS
OK HPNObrwr_novoom_BT20_NRG1
OK HPNObrwr_novoom_BT549_EGF
OK HPNObrwr_novoom_BT549_FGF1
OK HPNObrwr_novoom_BT549_HGF
OK HPNObrwr_novoom_BT549_INS
OK HPNObrwr_novoom_MCF7_EGF
OK HPNObrwr_novoom_MCF7_FGF1
OK HPNObrwr_novoom_MCF7_HGF
OK HPNObrwr_novoom_MCF7_IGF1
OK HPNObrwr_novoom_MCF7_INS
OK HPNObrwr_novoom_MCF7_NRG1
OK HPNObrwr_novoom_UACC812_EGF
OK HPNObrwr_novoom_UACC812_FGF1
OK HPNObrwr_novoom_UACC812_HGF
OK HPNObrwr_novoom_UACC812_IGF1
OK HPNObrwr_novoom_UACC812_INS
OK HPNObrwr_novoom_UACC812_NRG1


In [7]:
data_dir = "../05_Output/HPNObrwr_novoom"
for cell_line,activators in dict_of_actual_experiments.items():
    for activator in activators :
        experiment = '_'.join(['HPNObrwr_novoom',cell_line,activator,])
        folder_exp = '/'.join([data_dir,cell_line,activator])
        folder = '/'.join([folder_exp,'random'])
        try:
            OriginalG = nx.read_gml(folder_exp+'/'+'_'.join(['hpn_subgraph',experiment])+'.gml')
        except :
            print(f'No folder : {folder_exp} \n ... skipping')
            continue
        targets_attr = nx.get_node_attributes(OriginalG,'Targets')
        original_targets = [n for n in OriginalG.nodes() if targets_attr[n]]
        
        Stot = np.sum(list(nx.get_node_attributes(OriginalG,'Pi'+experiment).values()))
        if np.abs(Stot - 1e6) < 1e-5 :
            print(f'OK {experiment}')
        else:
            print(f'NOK {experiment}')
            continue
        
        pathstotarget = pe.enumerate_path_heap(OriginalG,original_targets,experiment,source=activator,Tot=Stot,r=1e-2,mode='percent',verbose = True)
        
        paths = []
        for target in original_targets:
            T = OriginalG.nodes[target]['Pi'+experiment]
            tpaths,s = pathstotarget[target]
            paths += tpaths
            print(f'Explained prob. of {target}: {s/T}')
            print(f'Number of paths : {len(tpaths)}')
        
        sources,targets = [],[]
        edges = []
        for path in paths:
            for s,t in zip(path[:-1],path[1:]):
                sources.append(s)
                targets.append(t)
                edges.append((s,t))
        df = pd.DataFrame({'Sources':sources,'Targets':targets})
        df.to_csv(folder_exp+'/paths_noselection_'+experiment+'.tsv',sep='\t',index=False)
        FinG = OriginalG.edge_subgraph(edges).copy()
        nx.write_gml(FinG,folder_exp+'/paths_no_selection_'+experiment+'.gml')

OK HPNObrwr_novoom_BT20_EGF


KeyboardInterrupt: 

## Prediction of effect of inhibitor downstream its target

### GSK690693 (AKTi)

In [57]:
def get_inh_nodes(G,substring):
    l = []
    for node in G.nodes:
        if substring in node:
            l.append(node)
    return l

def union_of_desc(G,inhibited):
    s = set()
    for node in inhibited:
        s = s.union({node}.union(set(nx.descendants(G,node))))
    return s

def union_of_desc_length(G,inhibited,m):
    s = set()
    for node in inhibited:
        lengths = nx.single_source_dijkstra_path_length(G, source=node, cutoff=m)
        s = s.union({node}.union(set(dict(lengths).keys())))
    return s

def get_perturbed(df,cell_line,activator):
    if activator == 'INS':
        activator = 'Insulin'
    goodcol = df[cell_line + '_' + activator]
    return list(df.index[goodcol==1])

def maxdist(G,inhibited,perturbed):
    m = 0
    desc = union_of_desc(G,inhibited)
    for pert in perturbed:
        if pert in desc:
            d = np.min([nx.shortest_path_length(G.reverse(),pert,inh) for inh in inhibited])
            if d > m:
                m = d
    return m
    

In [62]:
inh = 'AKTi'
nodeinh = 'AKT'
data_dir = "../05_Output/HPNObrwr_novoom"
phoneme_dir = "../05_Output/phonemes_whole_novoom"
CDM = pd.read_csv('../00_InputData/inhibitions/'+inh+'.csv',sep='\t',index_col=0)

allmeasured = set(CDM.index)
print(len(allmeasured))

obrwrres = {'sensitivity':[],'specificity':[]}
phonres = {'sensitivity':[],'specificity':[]}

def get_TP_prop(G,OriginalG,nodinh,CDM,cell_line,activator,name,downstreamM):
    inhibited = set(get_inh_nodes(G,nodeinh))
    downstream = set(union_of_desc(G,inhibited))
    perturbed = set(get_perturbed(CDM,cell_line,activator))
    print("###########")
    print(name)
    print(f'Downstream: {len(downstream)}')
    print(f'Perturbed: {len(perturbed)}')
    TP = len(downstream.intersection(perturbed))
    FP = len(downstream.intersection(downstreamM.difference(perturbed)))
    FN = len(perturbed.difference(downstream))
    TN = len(downstreamM.difference(perturbed).difference(downstream))
    print(name+f' TP: {TP}')
    print(name+f' FP: {FP}')
    sens = TP/(TP+FN)
    spec = TN/(TN+FP)
    print(name+f' sensitivity:{sens}')
    print(name+f' specificity:{spec}')
    return sens,spec
        
for cell_line,activators in dict_of_actual_experiments.items():
    for activator in activators :
        experiment = '_'.join(['HPNObrwr_novoom',cell_line,activator])
        folder_exp = data_dir+'/'+'/'.join([cell_line,activator])
        
        OriginalG = nx.read_gml(folder_exp+'/'+'_'.join(['hpn_subgraph',experiment])+'.gml')
        Stot = np.sum(list(nx.get_node_attributes(OriginalG,'Pi'+experiment).values()))
        
        if np.abs(Stot - 1e6) < 1e-5 :
            print(f'OK {experiment}')
        else:
            print(f'NOK {experiment}')
            continue

        try:
            obrwrG = nx.read_gml(folder_exp+'/'+'_'.join(['paths_no_selection',experiment])+'.gml')
            #pathsG = nx.read_gml(folder_exp+'/'+'_'.join(['paths_no_selection',experiment])+'.gml')
            dfphon = pd.read_csv(phoneme_dir+'/'+'_'.join([cell_line,activator,'SIF'])+'.csv')
            phonemeG = nx.from_pandas_edgelist(dfphon,source='Node1',target='Node2', create_using=nx.DiGraph)
        except:
            print('PHONEME not run on '+experiment)
        else:
            sens,spec = get_TP_prop(obrwrG,OriginalG,nodeinh,CDM,cell_line,activator,'OBRWR',allmeasured)
            obrwrres['sensitivity'].append(sens)
            obrwrres['specificity'].append(spec)
            sens,spec = get_TP_prop(phonemeG,OriginalG,nodeinh,CDM,cell_line,activator,'PHON',allmeasured)
            phonres['sensitivity'].append(sens)
            phonres['specificity'].append(spec)
                             

53
OK HPNObrwr_novoom_BT20_EGF
###########
OBRWR
Downstream: 17
Perturbed: 16
OBRWR TP: 8
OBRWR FP: 0
OBRWR sensitivity:0.5
OBRWR specificity:1.0
###########
PHON
Downstream: 7
Perturbed: 16
PHON TP: 4
PHON FP: 1
PHON sensitivity:0.25
PHON specificity:0.972972972972973
OK HPNObrwr_novoom_BT20_FGF1
###########
OBRWR
Downstream: 10
Perturbed: 13
OBRWR TP: 3
OBRWR FP: 0
OBRWR sensitivity:0.23076923076923078
OBRWR specificity:1.0
###########
PHON
Downstream: 1
Perturbed: 13
PHON TP: 1
PHON FP: 0
PHON sensitivity:0.07692307692307693
PHON specificity:1.0
OK HPNObrwr_novoom_BT20_HGF
###########
OBRWR
Downstream: 24
Perturbed: 15
OBRWR TP: 9
OBRWR FP: 3
OBRWR sensitivity:0.6
OBRWR specificity:0.9210526315789473
###########
PHON
Downstream: 2
Perturbed: 15
PHON TP: 2
PHON FP: 0
PHON sensitivity:0.13333333333333333
PHON specificity:1.0
OK HPNObrwr_novoom_BT20_IGF1
PHONEME not run on HPNObrwr_novoom_BT20_IGF1
OK HPNObrwr_novoom_BT20_INS
PHONEME not run on HPNObrwr_novoom_BT20_INS
OK HPNObrwr_novo

In [64]:
print(np.mean(obrwrres['sensitivity']),np.mean(obrwrres['specificity']))

0.5095471806141142 0.9416264029418611


In [65]:
print(np.mean(phonres['sensitivity']),np.mean(phonres['specificity']))

0.22911759028749876 0.9343972621378507
