In [None]:
%matplotlib inline
import sys
sys.path.append("../..")
from IPython.display import display

import networkx as nx
import numpy as np
import matplotlib.pyplot as plt

In [None]:
from magine.networks.utils import subtract_network_from_network
from magine.networks.subgraphs import Subgraph
from magine.networks.visualization.notebooks.view import display_graph
from magine.enrichment.enrichr import Enrichr
from exp_data import exp_data
from collections import Counter
e = Enrichr()

In [None]:
# expanded canonical network
ecn = nx.read_gml('Networks/canonical_kegg_hmdb_biogrid_reactome_signor.gml')

# data driven network
ddn = nx.read_gpickle('Networks/cisplatin_network_w_attributes.p')

In [None]:
subtracted_net = subtract_network_from_network(ddn, ecn)

nx.write_gml(subtracted_net, 'Networks/subtracted_network.gml')

# make copy to edit
subtracted_net_copy = subtracted_net.copy()

# conver to undirected, sort by connectivity
tmp_g = subtracted_net.to_undirected()
sorted_graphs = sorted(nx.connected_component_subgraphs(tmp_g), key=len, reverse=True)

In [None]:
node_list = []
group_to_nodes = dict()
counter = 0

for i in sorted_graphs:
    nodes = set(i.nodes)
    n_nodes = len(nodes)
    node_list.append(n_nodes)
    if n_nodes <= 2:
        subtracted_net_copy.remove_nodes_from(nodes)
        continue
    
    sig_hit = 0
    measured_hit = 0
    
    for node, data in i.nodes(data=True):
        if data['sigMeasured'] == 'red':
            sig_hit += 1
        if data['measured'] == 'red':
            measured_hit += 1
    
    fraction_sig = float(sig_hit)/float(n_nodes)*100.
    fraction_measured = float(measured_hit)/float(n_nodes)*100.
    
    if fraction_sig <= 50.:
        subtracted_net_copy.remove_nodes_from(nodes)
    else:
        for i in nodes:
            subtracted_net_copy.node[i]['group'] = 'group{0:05d}'.format(counter)
        group_to_nodes['group{}'.format(counter)] = nodes
        counter += 1
        
nx.write_gml(subtracted_net_copy, 'Networks/subtracted_network_filtered.gml')  

display(Counter(node_list))

In [None]:
def run(genes, db='Reactome_2016'):
    df = e.run(genes, db)
    df['sample_id']= 1
    if df.shape[0] > 1:
        df.filter_multi(p_value=0.05, inplace=True)
        df.remove_redundant(inplace=True)
    if df.shape[0] > 1:
        df.dist_matrix(level='sample');
    display(df.head(20))
    return df

In [None]:
group0_go_bp = run(group_to_nodes['group0'], 'GO_Biological_Process_2017b')

In [None]:
group0_disease = run(group_to_nodes['group0'], 'Jensen_DISEASES')

In [None]:
group1_disease = run(group_to_nodes['group1'], 'Jensen_DISEASES')

"Deafness/dystonia syndrome thus may be caused by decreased levels of Tim23 in the mitochondrial inner membrane in affected tissues."
https://academic.oup.com/hmg/article/11/5/477/2901604

In [None]:
exp_data.species.plot_species(group1_disease.term_to_genes('deafness_dystonia_syndrome'),
                      'TIMM23_deafness', image_format='png', plot_type='matplotlib');

In [None]:
ns = Subgraph(network=ddn)

In [None]:
sub = ns.neighbors('TIMM23', max_dist=2,)
print(sub.nodes)

In [None]:
display_graph(sub)

In [None]:
sub = ns.paths_between_list(df.term_to_genes('3-methylglutaconic_aciduria'))
sub = ns.expand_neighbors(sub, nodes=list(sub.nodes), upstream=True, downstream=True, max_dist=1)
# sub = ns.expand_neighbors(sub, nodes=list(sub.nodes), upstream=True, downstream=True, max_dist=3)
display_graph(sub)

In [None]:
sub = ns.neighbors('TIMM23')
display_graph(sub)
                   

# Group 1
MPTP complex is known to get activated under high ROS conditions and Ca2+ overload, leading to increase fluid uptake due to mitochondria protein transport; thus resulting in potential loss, organellar swelling and induction of cell death.

This points to a DNA damage independent form of cell death induced from cisplatin. 

https://www.nature.com/articles/cddis201472

In [None]:
group0 = run(group_to_nodes['group0'])

In [None]:
group1 = run(group_to_nodes['group1'])

In [None]:
group2 = run(group_to_nodes['group2'])

In [None]:
group2 = run(group_to_nodes['group2'])

In [None]:
group3 = run(group_to_nodes['group3'])

## Kinases

In [None]:
phospho = set(i.split('_')[0] for i in exp_data.ph_silac.sig.label_list if '(ph' in i)
df = e.run(phospho, 'KEA_2015')
df['term_name'] = df['term_name'].str.upper()
df['sample_id'] = 1
df.filter_multi(p_value=0.05, combined_score=1, inplace=True)
df.remove_redundant(inplace=True)
df.dist_matrix();

In [None]:
display(df.head(10))

## CDK2 activation linked to Nephrotoxicity
CDK2 is the top enriched kinase. This paper shows that in kidney cells (non-cancerous, thus an off target side effect) have high levels of CDK2. Inhibiting CDK2 alleviates apoptosis in these cells. 

http://jasn.asnjournals.org/content/17/9/2434.full

In [None]:
exp_data.genes.plot_species(['CDK1'], 'CDK1_plot', plot_type='matplotlib');

# Auditory system clue
GSK3B is highly active. This paper shows that its inhibition could save auditory cells. 
https://www.ncbi.nlm.nih.gov/pubmed/19666099

In [None]:
exp_data.genes.plot_species(['GSK3B'], 'test', plot_type='matplotlib');

In [None]:
display_graph(ns.neighbors('GSK3B', upstream=True, downstream=False,
                           include_only=exp_data.species.sig.id_list))

In [None]:
all_genes = exp_data.proteins.sig.id_list
nodes = set(ddn.nodes)
missing = all_genes.difference(nodes)
print(len(missing))
# remove orfs and non-read transcripts

acs = set(i for i in missing if i.startswith('AC') and '.' in i)
rps = set(i for i in missing if i.startswith('RP') and '.' in i)
orfs = set(i for i in missing if 'orf' in i)
mirs = set(i for i in missing if i.startswith('MIR'))
for i in (acs, rps, orfs, mirs):
    missing.difference_update(i)
    print(len(missing))


In [None]:
print(sorted(missing))

In [None]:
missing_df = run(missing, 'GO_Biological_Process_2017b')
