 ## Set up

In [17]:
import networkx as nx
import numpy as np
import pandas as pd
import seaborn as sns

import config
from config import LEVELS, sm, with_sm, no_sm, with_unique_sm, GRAPHS_PATH
from interactomes import get_interactome, print_interactome_details, get_json_filename, read_graph
from lib.graph_database import get_participants, get_components

%load_ext autoreload
%autoreload 2
%reload_ext autoreload

config.set_root_wd()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Working directory: c:\git\pathwayanalysisplatform\proteoformnetworks


In [12]:
# Genes:
# Pathway, Reaction, Entity, Name, Type, Id, Database, Role

# Proteins:
# Pathway, Reaction, Entity, Name, Type, Id, PrevId, Database, Role

# Proteoforms:
# Pathway, Reaction, Entity, Name, Type, Id, PrevId, Database, Role

# Small molecules:
# Pathway, Reaction, Entity, Name, Type, Id, Database, Role

participant_records = { l: get_participants(l) for l in LEVELS}
participant_records[sm] = get_participants(sm)

participant_records

{'genes':           Pathway       Reaction         Entity          Name  \
 0   R-HSA-1059683  R-HSA-1067640   R-HSA-447100           IL6   
 1   R-HSA-1059683  R-HSA-1067640  R-HSA-1067661        IL6R-2   
 2   R-HSA-1059683  R-HSA-1067640  R-HSA-1067661        IL6R-2   
 3   R-HSA-1059683  R-HSA-1067640   R-HSA-447100           IL6   
 4   R-HSA-1059683  R-HSA-1067646   R-HSA-877348          TYK2   
 ..            ...            ...            ...           ...   
 95  R-HSA-1059683  R-HSA-1112538   R-HSA-914048  p-Y705-STAT3   
 96  R-HSA-1059683  R-HSA-1112565   R-HSA-197824          IL6R   
 97  R-HSA-1059683  R-HSA-1112565  R-HSA-1112543  p-Y1034-JAK1   
 98  R-HSA-1059683  R-HSA-1112565   R-HSA-873799  p-Y1007-JAK2   
 99  R-HSA-1059683  R-HSA-1112565  R-HSA-1112598  p-Y1054-TYK2   
 
                              Type     Id Database    Role  
 0   EntityWithAccessionedSequence    IL6  UniProt   input  
 1   EntityWithAccessionedSequence   IL6R  UniProt   input  
 2   EntityWit

In [13]:
# Genes:
# Complex, Entity, Name, Type, Id

# Proteins:
# Complex, Entity, Name, Type, Id, PrevId

# Proteoforms:
# Complex, Entity, Name, Type, Id, PrevId

# Small molecules:
# Complex, Entity, Name, Type, Id

components_records = { l: get_components(l) for l in LEVELS}
components_records[sm] = get_components(sm)

components_records

{'genes':           Complex         Entity              Name  \
 0   R-HSA-1006173   R-HSA-976788               CFH   
 1   R-HSA-1008206  R-HSA-1008221              NFE2   
 2   R-HSA-1008206  R-HSA-1008261              MAFF   
 3   R-HSA-1008206  R-HSA-1008212              MAFG   
 4   R-HSA-1008206  R-HSA-1008242              MAFK   
 ..            ...            ...               ...   
 95  R-HSA-1027359   R-HSA-193545            CREBBP   
 96  R-HSA-1027359   R-HSA-381325             EP300   
 97  R-HSA-1027359   R-HSA-177654    p-4S,T404-IRF3   
 98  R-HSA-1027360   R-HSA-450270  p-S477,S479-IRF7   
 99  R-HSA-1027360   R-HSA-381325             EP300   
 
                              Type      Id  
 0   EntityWithAccessionedSequence     CFH  
 1   EntityWithAccessionedSequence    NFE2  
 2   EntityWithAccessionedSequence    MAFF  
 3   EntityWithAccessionedSequence    MAFG  
 4   EntityWithAccessionedSequence    MAFK  
 ..                            ...     ...  
 95  EntityWit

In [18]:
json_file = get_json_filename(config.genes, "no_sm", config.GRAPHS_PATH)

G = read_graph(json_file)
G

<networkx.classes.graph.Graph at 0x25bc90050a0>

In [19]:
print(G.nodes())

['IL6', 'IL6R', 'TYK2', 'JAK2', 'IL6ST', 'JAK1', 'STAT3', 'STAT1', 'CFH', 'NFE2', 'MAFF', 'MAFG', 'MAFK', 'IRF1', 'RBSN', 'EHD1', 'EHD2', 'EHD3', 'VPS45', 'RAB5A', 'DOCK3', 'DOCK8', 'DOCK2', 'DOCK7', 'CDC42', 'DOCK1', 'DOCK5', 'DOCK4', 'DOCK9', 'RAC1', 'DOCK10', 'DOCK6', 'DOCK11', 'KCNJ9', 'KCNJ15', 'GNB2', 'GNB1', 'GNG8', 'GNG2', 'KCNJ12', 'KCNJ4', 'GNGT1', 'GNG5', 'KCNJ10', 'GABBR1', 'KCNJ6', 'GNG7', 'GNG11', 'GABBR2', 'GNG4', 'GNG12', 'KCNJ3', 'KCNJ5', 'GNB5', 'GNGT2', 'GNB4', 'GNB3', 'GNG13', 'KCNJ2', 'GNG10', 'KCNJ16', 'GNG3', 'CABLES2', 'CDK5', 'ABL1', 'CABLES1', 'WEE1', 'CDK2', 'IRF9', 'STAT2', 'B4GALT6', 'B4GALT5', 'KIFC1', 'LMAN1', 'MCFD2', 'IRF2', 'CREBBP', 'EP300', 'IRF3', 'IRF7']


In [20]:
interactomes_no_sm = { l: get_interactome(l, no_sm, participant_records, components_records, GRAPHS_PATH) for l in LEVELS}
interactomes_with_sm = { l: get_interactome(l, with_sm, participant_records, components_records, GRAPHS_PATH) for l in LEVELS}
interactomes_with_unique_sm = { l: get_interactome(l, with_unique_sm, participant_records, components_records, GRAPHS_PATH) for l in LEVELS}

for level, interactome in interactomes_no_sm.items():
     print_interactome_details(interactome)

for level, interactome in interactomes_with_sm.items():
     print_interactome_details(interactome)

for level, interactome in interactomes_with_unique_sm.items():
     print_interactome_details(interactome)

Graph for genes 
Graph edges: 532
Graph nodes: 80
Graph genes nodes: 80
Graph small molecule nodes: 0
Graph for proteins 
Graph edges: 532
Graph nodes: 80
Graph proteins nodes: 80
Graph small molecule nodes: 0
Graph for proteoforms 
Graph edges: 577
Graph nodes: 88
Graph proteoforms nodes: 88
Graph small molecule nodes: 0
Graph for genes 
Graph edges: 532
Graph nodes: 137
Graph genes nodes: 80
Graph small molecule nodes: 57
Graph for proteins 
Graph edges: 532
Graph nodes: 137
Graph proteins nodes: 80
Graph small molecule nodes: 57
Graph for proteoforms 
Graph edges: 577
Graph nodes: 145
Graph proteoforms nodes: 88
Graph small molecule nodes: 57
Graph for genes 
Graph edges: 532
Graph nodes: 274
Graph genes nodes: 80
Graph small molecule nodes: 194
Graph for proteins 
Graph edges: 532
Graph nodes: 274
Graph proteins nodes: 80
Graph small molecule nodes: 194
Graph for proteoforms 
Graph edges: 577
Graph nodes: 282
Graph proteoforms nodes: 88
Graph small molecule nodes: 194


In [None]:
print("Number of edges: ")
e = pd.Series([interactomes_no_sm[l].size() for l in LEVELS], index=LEVELS)
v_entities = pd.Series([interactomes_no_sm[l].graph['num_' + l] for l in LEVELS], index=LEVELS)
v_small_molecules = pd.Series([interactomes_no_sm[l].graph['num_small_molecules'] for l in LEVELS], index=LEVELS)

sizes = pd.DataFrame({"Interactions": e, "Accessioned Entities": v_entities, "Simple Entities": v_small_molecules}, index=LEVELS)
sizes.columns.name = "Entity Type"
sizes

In [None]:
degrees = {l: [graphs[l].degree(n) for n in graphs[l].nodes()] for l in LEVELS}
mins = pd.Series([min(degrees[l]) for l in LEVELS], index=LEVELS)
maxs = pd.Series([max(degrees[l]) for l in LEVELS], index=LEVELS)
avgs = pd.Series([sum(degrees[l])/len(degrees[l]) for l in LEVELS], index=LEVELS)

degrees_mma = pd.DataFrame({"Min": mins, "Max": maxs, "Avg": avgs}, index=LEVELS)
degrees_mma.columns.name = "Entity Type"
degrees_mma

In [None]:
datas = { l: {'ENTITY': np.repeat(l, len(degrees[l])), 'DEGREE': degrees[l]} for l in LEVELS}
degrees_dfs = [pd.DataFrame(datas[l], columns=['ENTITY', 'DEGREE']) for l in LEVELS]

degrees_df = pd.concat(degrees_dfs)
sns.set(style="ticks")
ax = sns.violinplot(x="ENTITY", y="DEGREE", data=degrees_df)

In [None]:
import matplotlib.pyplot as plt

[sns.kdeplot(degrees[l], label=l) for l in LEVELS]
plt.legend();

In [None]:
degrees = {l: [graphs[l].degree(n) for n, t in graphs[l].nodes(data='type') if t != "SimpleEntity"] for l in LEVELS}
mins = pd.Series([min(degrees[l]) for l in LEVELS], index=LEVELS)
maxs = pd.Series([max(degrees[l]) for l in LEVELS], index=LEVELS)
avgs = pd.Series([sum(degrees[l])/len(degrees[l]) for l in LEVELS], index=LEVELS)

degrees_df = pd.DataFrame({"Min": mins, "Max": maxs, "Avg": avgs}, index=LEVELS)
degrees_df.columns.name = "Entity Type"
degrees_df

In [None]:
datas = { l: {'ENTITY': np.repeat(l, len(degrees[l])), 'DEGREE': degrees[l]} for l in LEVELS}
degrees_dfs = [pd.DataFrame(datas[l], columns=['ENTITY', 'DEGREE']) for l in LEVELS]

degrees_df = pd.concat(degrees_dfs)
sns.set(style="ticks")
ax = sns.violinplot(x="ENTITY", y="DEGREE", data=degrees_df)

In [None]:
import matplotlib.pyplot as plt

[sns.kdeplot(degrees[l], label=l) for l in LEVELS]
plt.legend();

## Bridges and Articulation points

In [None]:
br_ap = pd.DataFrame({
    "Articulation Points": [len(list(nx.articulation_points(graphs[l]))) for l in LEVELS],
    "Bridges": [len(list(nx.bridges(graphs[l]))) for l in LEVELS]},
    index=LEVELS)
br_ap.columns.name = "Entity Type"

br_ap

In [None]:
#plot_low_level_pathways(figures_path="figures/pathways/", graphs_path="reports/pathways/")

In [None]:
# TODO: Write function that plots and highlights the articulation points of an interaction network.
# TODO: Write function that plots and highlights also the bridges of the network.