 ## Set up

In [54]:
from lib.networks import get_multiindex
import networkx as nx
import pandas as pd
import seaborn as sns

import config
from config import LEVELS, sm, with_sm, no_sm, with_unique_sm, GRAPHS_PATH
from lib.graph_database_access import get_participants, get_components, get_pathways
from lib.networks import get_multiindex
from lib.networks import get_or_create_interaction_network, print_interactome_details, get_sizes

%load_ext autoreload
%autoreload 2
%reload_ext autoreload

config.set_root_wd()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Working directory: c:\git\pathwayanalysisplatform\proteoformnetworks


In [55]:
# Genes:
# Pathway, Reaction, Entity, Name, Type, Id, Database, Role

# Proteins:
# Pathway, Reaction, Entity, Name, Type, Id, PrevId, Database, Role

# Proteoforms:
# Pathway, Reaction, Entity, Name, Type, Id, PrevId, Database, Role

# Small molecules:
# Pathway, Reaction, Entity, Name, Type, Id, Database, Role

participant_records = { l: get_participants(l, GRAPHS_PATH) for l in LEVELS}
participant_records[sm] = get_participants(sm, GRAPHS_PATH)

for level in [*LEVELS, sm]:
    participant_records

In [56]:
# Genes:
# Complex, Entity, Name, Type, Id

# Proteins:
# Complex, Entity, Name, Type, Id, PrevId

# Proteoforms:
# Complex, Entity, Name, Type, Id, PrevId

# Small molecules:
# Complex, Entity, Name, Type, Id

components_records = { l: get_components(l, GRAPHS_PATH) for l in LEVELS}
components_records[sm] = get_components(sm, GRAPHS_PATH)

components_records

{'genes':         Unnamed: 0        Complex         Entity          Name  \
 0                0  R-HSA-1006173   R-HSA-976788           CFH   
 1                1  R-HSA-1008206  R-HSA-1008221          NFE2   
 2                2  R-HSA-1008206  R-HSA-1008261          MAFF   
 3                3  R-HSA-1008206  R-HSA-1008212          MAFG   
 4                4  R-HSA-1008206  R-HSA-1008242          MAFK   
 ...            ...            ...            ...           ...   
 108399      108399   R-NUL-997399   R-HSA-879433         S100B   
 108400      108400   R-NUL-997399   R-HSA-976740  APP(672-711)   
 108401      108401   R-NUL-997399   R-HSA-879382         HMGB1   
 108402      108402   R-NUL-997399  R-HSA-2457833  SAA1(19-122)   
 108403      108403   R-NUL-997399   R-HSA-197639          AGER   
 
                                  Type     Id  
 0       EntityWithAccessionedSequence    CFH  
 1       EntityWithAccessionedSequence   NFE2  
 2       EntityWithAccessionedSequence   

In [57]:
interactomes_no_sm = { l: get_or_create_interaction_network(l, no_sm, participant_records, components_records, GRAPHS_PATH) for l in LEVELS}
for level, interactome in interactomes_no_sm.items():
     print_interactome_details(interactome)

Graph for genes 
Graph edges: 453137
Graph nodes: 10968
Graph genes nodes: 10968
Graph small molecule nodes: 0

***********************


Graph for proteins 
Graph edges: 462430
Graph nodes: 11066
Graph proteins nodes: 11066
Graph small molecule nodes: 0

***********************


Graph for proteoforms 
Graph edges: 677057
Graph nodes: 14295
Graph proteoforms nodes: 14295
Graph small molecule nodes: 0

***********************




In [None]:
interactomes_with_sm = { l: get_or_create_interaction_network(l, with_sm, participant_records, components_records, GRAPHS_PATH) for l in LEVELS}
for level, interactome in interactomes_with_sm.items():
     print_interactome_details(interactome)

In [None]:
interactomes_with_unique_sm = { l: get_or_create_interaction_network(l, with_unique_sm, participant_records, components_records, GRAPHS_PATH) for l in LEVELS}

for level, interactome in interactomes_with_unique_sm.items():
     print_interactome_details(interactome)


In [None]:
index = get_multiindex()
index

In [None]:
nums_no_sm = get_sizes(interactomes_no_sm)
nums_with_sm = get_sizes(interactomes_with_sm)
nums_with_unique_sm = get_sizes(interactomes_with_unique_sm)

In [None]:
df_sizes = pd.DataFrame({
    "Interactions": [*nums_no_sm[0], *nums_with_sm[0], *nums_with_unique_sm[0]],
    "Accessioned Entities": [*nums_no_sm[1], *nums_with_sm[1], *nums_with_unique_sm[1]],
    "Small Molecules": [*nums_no_sm[2], *nums_with_sm[2], *nums_with_unique_sm[2]]
}, index=index)
df_sizes.columns.name = "Sizes"
df_sizes

In [None]:
# Create a list with the number of genes, proteins, proteoforms, small molecules
num_nodes = [*df_sizes.loc[['Not Included']]["Accessioned Entities"], df_sizes.loc[['Included']]["Small Molecules"][0]]

# Create list with all levels:
node_types = [*LEVELS, "small molecules"]

# Create a list with the number of interactions without the small molecules
num_interactions_no_sm = df_sizes.loc[['Not Included']]["Interactions"]

num_interactions_with_sm = df_sizes.loc[['Included']]["Interactions"]

maxy = max([*num_interactions_with_sm, *num_interactions_no_sm])

maxy = 800000

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from config import COLORS_BORDER

figure, axis = plt.subplots(2, 2)
# Placing the plots in the plane
plot1 = plt.subplot2grid((2, 2), (0, 0), rowspan=1, colspan=2)
plot2 = plt.subplot2grid((2, 2), (1, 0), rowspan=1, colspan=1)
plot3 = plt.subplot2grid((2, 2), (1, 1), rowspan=1, colspan=1)

# Plot number of nodes
plot1.bar(node_types, num_nodes, color=config.COLORS_FILL, edgecolor=COLORS_BORDER)
plot1.set_title('A) Number of Entities')

plt.xticks(rotation=90)

# Plot number of interactions without small molecules
plot2.bar(LEVELS, num_interactions_no_sm, color=config.COLORS_FILL, edgecolor=COLORS_BORDER)
plot2.set_title('B) Interactions without S. M.')
plot2.set_ylim([0, maxy])
plot2.set_xticklabels(LEVELS, rotation=45)

# Plot number of interactions with small molecules
plot3.bar(LEVELS, num_interactions_with_sm, color=config.COLORS_FILL, edgecolor=COLORS_BORDER)
plot3.set_title('C) Interactions with S. M.')
plot3.set_ylim([0, maxy])
plot3.set_xticklabels(LEVELS, rotation=45)

# Packing all the plots and displaying them
plt.tight_layout()
plt.show()

figure.savefig('figures/figure_4.png')
plt.close()

In [None]:
interactomes = [*interactomes_no_sm.values(), *interactomes_with_sm.values(), *interactomes_with_unique_sm.values()]

degree_sequences = []
for interactome in interactomes:
    accessioned_nodes = [node for node,data in interactome.nodes(data=True) if not data['type'].startswith("Simple")]
    sequence = sorted([d for n, d in interactome.degree(accessioned_nodes)], reverse=True)
    degree_sequences.append(sequence)

# degree_sequences = pd.Series([sorted([d for n, d in interactome.degree()], reverse=True) for interactome in interactomes], index=index)
len(degree_sequences)

In [None]:
i = interactomes_no_sm["genes"]
df = pd.DataFrame(data=list(i.degree()), columns =['Node', 'Degree'])
df.sort_values(by=['Degree'], inplace=True)
df

In [None]:
print(f"(Genes, No SM) {interactomes_no_sm['genes'].degree['GPR35']}")
print(f"Neighbours: {[n for n in interactomes_no_sm['genes'].neighbors('GPR35')]}")
print(f"(Genes, With SM) {interactomes_with_sm['genes'].degree['GPR35']}")
print(f"Neighbours: {[n for n in interactomes_with_sm['genes'].neighbors('GPR35')]}")
print(f"(Genes, With Unique SM) {interactomes_with_unique_sm['genes'].degree['GPR35']}")
print(f"Neighbours: {[n for n in interactomes_with_unique_sm['genes'].neighbors('GPR35')]}")


In [None]:
mins = pd.Series([min(degree_sequence) for degree_sequence in degree_sequences], index=index)
maxs = pd.Series([max(degree_sequence) for degree_sequence in degree_sequences], index=index)
avgs = pd.Series([sum(degree_sequence)/len(degree_sequence) for degree_sequence in degree_sequences], index=index)

df_degrees = pd.DataFrame({"Min": mins, "Max": maxs, "Avg": avgs}, index=index)
df_degrees


In [None]:
df_seq = pd.DataFrame({"Sequence": degree_sequences})
df_seq.reset_index(inplace=True)

lst_col = 'Sequence'

df_seq = pd.DataFrame({
      col:np.repeat(df_seq[col].values, df_seq[lst_col].str.len())
      for col in df_seq.columns.drop(lst_col)}
    ).assign(**{lst_col:np.concatenate(df_seq[lst_col].values)})[df_seq.columns]

df_seq = df_seq.rename(columns={'Sequence': 'Degree'})
df_seq['Degree'] = df_seq['Degree'].apply(lambda x: 0 if x == 0 else np.log10(x))

df_seq

In [None]:
from matplotlib import ticker as mticker

fig, ax = plt.subplots(1, 1, figsize=(5, 6))
fig.set_size_inches(10, 6)
fig5 = sns.violinplot(x="Small Molecules", y="Degree", hue="Entity Level", data=df_seq, palette=config.COLORS_BORDER)
ax.yaxis.set_major_formatter(mticker.StrMethodFormatter("$10^{{{x:.0f}}}$")) # Convert ticks to powers of 10
plt.legend(bbox_to_anchor=(1.0, 1),borderaxespad=0.5) # Put the legend out of the figure
plt.show()
fig5.figure.savefig('figures/figure_5.png')

## Connected components

- Number of connected components
- Size of the largest connected component
- Average size of the connected components
- Size of the smalles connected component

In [None]:
import itertools

def getTuples(i):
    sizes = [len(c) for c in sorted(nx.connected_components(i), key=len, reverse=True)]
    d = [(i.graph["method"], i.graph["level"], s) for s in sizes]
    return d

tuples = [getTuples(i) for i in interactomes]
data = list(itertools.chain.from_iterable(tuples))
data
df_cc = pd.DataFrame(data, columns=['Method', 'Level', 'Size'])
df_cc

In [None]:
# Requires creating a dataframe with two columns
g = sns.FacetGrid(df_cc, col="Method")

g.map(sns.stripplot, "Level", "Size", alpha=.7)
g.map(sns.boxplot, "Level", "Size")
g.add_legend()

In [None]:
import itertools

def getTuples(i):
    sizes = [len(c)/i.number_of_nodes() for c in sorted(nx.connected_components(i), key=len, reverse=True)]
    d = [(i.graph["method"], i.graph["level"], s) for s in sizes]
    return d

tuples = [getTuples(i) for i in interactomes]
data = list(itertools.chain.from_iterable(tuples))
data
df_cc = pd.DataFrame(data, columns=['Method', 'Level', 'Size'])
df_cc

In [None]:
# Requires creating a dataframe with two columns
with sns.axes_style("white"):
    g = sns.FacetGrid(df_cc, col="Method")
g.map(sns.stripplot, "Level", "Size", alpha=.7)
g.map(sns.boxplot, "Level", "Size")
g.set_axis_labels("Level", "Relative Size")
g.add_legend()

## Articulation points and bridges

In [None]:
br_ap = pd.DataFrame({
    "Articulation Points": [len(list(nx.articulation_points(i))) for i in interactomes],
    "Bridges": [len(list(nx.bridges(i))) for i in interactomes]},
    index=index)
br_ap.columns.name = "Entity Type"

br_ap

In [None]:
br_ap["Articulation Points"].unstack()

In [None]:
br_ap["Articulation Points"].unstack().plot(kind='bar', stacked=False, color=COLORS_BORDER, title="Articulation Points")

In [None]:
br_ap["Bridges"].unstack()

In [None]:
br_ap["Bridges"].unstack().plot(kind='bar', stacked=False, color=COLORS_BORDER, title="Bridges")

In [None]:
df_pathways = get_pathways()
df_pathways["stId"]

## Articulation points and bridges

- [ ] Get bridges and articulation points for all pathways
- [ ] Calculate changes across the 3 levels with small molecules and with unique small molecules
- [ ] Plot the top 5 with most changes

In [None]:
from lib.networks import create_pathway_interaction_networks
from bokeh.io import show

pathway1 = "R-HSA-9673163"  # Regulation of glycolysis by fructose 2,6-bisphosphate metabolism
graphs = create_pathway_interaction_networks(pathway1, "resources/pathway_networks/")

for node in G.nodes:
    print(f"{node}: {G.nodes[node]['Articulation Point']}")

In [None]:
from visualization.visualize_single_network import plot_pathway_all_levels, Coloring

p = plot_pathway_all_levels(pathway1, out_path="resources/pathway_networks/", graphs=graphs, coloring=Coloring.ENTITY_TYPE)
show(p)