# Node Degree

## Load dependencies

In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import ticker as mticker
from lib.dictionaries import read_dictionary_one_to_set
import re
import dataframe_image as dfi
from statistics import median, mean
import visualization.visualize_single_network as v
import networkx as nx

import config
from config import LEVELS, INTERACTOMES_PATH, METHODS, DATA_REACTOME_PATH, PATHWAY_GRAPHS_PATH, genes, proteins, proteoforms, COLOR_GENES_FILL, COLOR_PROTEOFORMS_FILL
from lib.networks import get_interactomes, get_combinations, get_combinations_with_pathways
from lib.networks import get_multiindex, get_sizes, get_json_filename, create_pathway_interaction_network, read_graph, \
    get_pathways_with_multiple_proteoforms, get_increase_percentage, create_pathway_interaction_networks
from lib.graph_database_access import get_pathways
from queries import QUERY_GET_PATHWAYS_BY_PROTEIN
from lib.graph_database_access import get_query_result

%load_ext autoreload
%autoreload 2
%reload_ext autoreload

config.set_root_wd()

Initial working directory: C:\git\ProteoformNetworks\src\Python
New working directory: c:\git\ProteoformNetworks


# Degree by Interactomes

## Load interactomes

In [None]:
index = get_multiindex()
interactomes_no_sm, interactomes_with_sm, interactomes_with_unique_sm = get_interactomes(DATA_REACTOME_PATH, INTERACTOMES_PATH)
interactomes = [interactomes_no_sm[genes], interactomes_no_sm[proteoforms], interactomes_with_sm[genes], interactomes_with_sm[proteoforms], interactomes_with_unique_sm[genes], interactomes_with_unique_sm[proteoforms]]

## Degree of all proteoforms and its genes

In [None]:
map_proteins_to_genes = read_dictionary_one_to_set(INTERACTOMES_PATH, "mapping_proteins_to_genes.tsv", col_indices=(0, 1))
map_proteins_to_genes
proteoform_degrees = [(node, val) for (node, val) in interactomes_no_sm[proteoforms].degree()]
proteoform_degrees

df_degrees_genes_vs_all_proteoforms = pd.DataFrame(proteoform_degrees, columns=["Proteoform", "Proteoform Degree"])
df_degrees_genes_vs_all_proteoforms["Gene"] = df_degrees_genes_vs_all_proteoforms['Proteoform'].apply(lambda proteoform: list(map_proteins_to_genes[proteoform[:re.search("[;-]", proteoform).start()]])[0])
df_degrees_genes_vs_all_proteoforms["Gene Degree"] = df_degrees_genes_vs_all_proteoforms["Gene"].apply(lambda gene: interactomes_no_sm[genes].degree[gene])
df_degrees_genes_vs_all_proteoforms["Annotations"] = df_degrees_genes_vs_all_proteoforms['Proteoform'].apply(lambda proteoform: any((c in {':', '-'}) for c in str(proteoform)))
df_degrees_genes_vs_all_proteoforms

## Plot degree distribution in the interactomes

In [None]:
degree_sequences = []
for interactome in interactomes:            
    accessioned_nodes = [node for node, data in interactome.nodes(data=True) if not data['type'].startswith("Simple")]
    sequence = sorted([d for n, d in interactome.degree(accessioned_nodes)], reverse=True)
    degree_sequences.append(sequence)

degree_sequences = pd.Series(degree_sequences, index=index)
degree_sequences

In [None]:
df_seq = pd.DataFrame({"Sequence": degree_sequences})
df_seq.reset_index(inplace=True)

lst_col = 'Sequence'

df_seq = pd.DataFrame({
    col: np.repeat(df_seq[col].values, df_seq[lst_col].str.len())
    for col in df_seq.columns.drop(lst_col)}
).assign(**{lst_col: np.concatenate(df_seq[lst_col].values)})[df_seq.columns]

df_seq = df_seq.rename(columns={'Sequence': 'Degree'})
df_seq
ax = sns.boxplot(x="Small Molecules", y="Degree", hue="Entity Level",
                 data=df_seq, palette=[COLOR_GENES_FILL, COLOR_PROTEOFORMS_FILL])
ax.set_yscale('log')
ax.figure.savefig('figures/degree_interactomes.png')

In [None]:
degrees_genes = degree_sequences["Included", "genes"]
degrees_proteoforms = degree_sequences["Included", "proteoforms"]
change = get_increase_percentage(len(degrees_genes), len(degrees_proteoforms))
print(f"the degree in the complete proteoform network is increased in average from {mean(degrees_genes)} to {mean(degrees_proteoforms)} connections ({change} %)")

## Degree of proteoforms with annotations

In [None]:
proteoforms_no_annotations = [node for node in interactomes_no_sm['proteoforms'].nodes if not any((c in {':', '-'}) for c in str(node))]
proteoforms_with_annotations = [node for node in interactomes_no_sm['proteoforms'].nodes if any((c in {':', '-'}) for c in str(node))]

modified_proteoform_degrees = [(node, val) for (node, val) in interactomes_no_sm["proteoforms"].degree(proteoforms_with_annotations)]

df_degrees_genes_vs_proteoforms_with_annotations = pd.DataFrame(modified_proteoform_degrees, columns=["Proteoform", "Proteoform Degree"])
df_degrees_genes_vs_proteoforms_with_annotations["Gene"] = df_degrees_genes_vs_proteoforms_with_annotations['Proteoform'].apply(lambda proteoform: list(map_proteins_to_genes[proteoform[:re.search("[;-]", proteoform).start()]])[0])
df_degrees_genes_vs_proteoforms_with_annotations["Gene Degree"] = df_degrees_genes_vs_proteoforms_with_annotations["Gene"].apply(lambda gene: interactomes_no_sm[genes].degree[gene])
df_degrees_genes_vs_proteoforms_with_annotations["Annotations"] = df_degrees_genes_vs_proteoforms_with_annotations['Proteoform'].apply(lambda proteoform: any((c in {':', '-'}) for c in str(proteoform)))
dfi.export(df_degrees_genes_vs_proteoforms_with_annotations.set_index("Proteoform").sample(20), 'figures/df_degrees_genes_vs_modified_proteoforms.png')
df_degrees_genes_vs_proteoforms_with_annotations.sample(20)

In [None]:
degrees_proteoforms_with_annotations = df_degrees_genes_vs_proteoforms_with_annotations["Proteoform Degree"]
print(f"Selected {len(degrees_proteoforms_with_annotations)} unique proteoforms.")
degrees_genes = df_degrees_genes_vs_proteoforms_with_annotations.drop_duplicates(subset= ["Gene"])["Gene Degree"]
print(f"Selected {len(degrees_genes)} unique genes.")
change = get_increase_percentage(len(degrees_genes), len(degrees_proteoforms_with_annotations))
print(f"the degree of annotated proteoform nodes in the complete proteoform network changes from {mean(degrees_genes)} to {mean(degrees_proteoforms_with_annotations)} connections ({change} %)")

## Plot degree of proteoforms with annotations

In [None]:
# The interactoms are 0: no_sm-genes, 1: no_sm-proteoforms, 2: with_sm-genes, 3: with_sm-proteoforms, 4: with_unique_sm-genes, 5: with_unique_sm-proteoforms
for i in [1, 3, 5]:
    degree_sequences[i] = [degree for proteoform_node, degree in interactomes[i].degree(proteoforms_with_annotations)]

degree_sequences = pd.Series(degree_sequences, index=index)

df_seq = pd.DataFrame({"Sequence": degree_sequences})
df_seq
df_seq.reset_index(inplace=True)

lst_col = 'Sequence'

df_seq = pd.DataFrame({
    col: np.repeat(df_seq[col].values, df_seq[lst_col].str.len())
    for col in df_seq.columns.drop(lst_col)}
).assign(**{lst_col: np.concatenate(df_seq[lst_col].values)})[df_seq.columns]

df_seq = df_seq.rename(columns={'Sequence': 'Degree'})
df_seq
ax = sns.boxplot(x="Small Molecules", y="Degree", hue="Entity Level",
                 data=df_seq, palette=[COLOR_GENES_FILL, COLOR_PROTEOFORMS_FILL])
ax.set_yscale('log')
ax.figure.savefig('figures/degree_interactomes_proteoforms_with_annotations.png')

## Count proteoforms with higher or lower degree than genes

In [None]:
def getChange(value):
    if value > 0: return "+"
    elif value == 0: return "="
    else: return "-"

df_degrees_genes_vs_all_proteoforms["Degree Difference"] = df_degrees_genes_vs_all_proteoforms["Proteoform Degree"] - df_degrees_genes_vs_all_proteoforms["Gene Degree"]
df_degrees_genes_vs_all_proteoforms["Change"] = df_degrees_genes_vs_all_proteoforms["Degree Difference"].apply(lambda d: getChange(d))
df_degrees_genes_vs_all_proteoforms

print(f"For all proteoforms: ")
df_cases = pd.DataFrame(df_degrees_genes_vs_all_proteoforms.groupby(['Change']).size(), columns=["Count"])

df_cases["Percentage"] = df_cases["Count"] / len(df_degrees_genes_vs_all_proteoforms) * 100
print(df_cases)

print(f"For proteoforms with annotations: ")
cases = df_degrees_genes_vs_all_proteoforms.loc[df_degrees_genes_vs_all_proteoforms["Annotations"] == True].groupby(['Change']).size()
df_cases = pd.DataFrame(cases, columns=["Count"])
df_cases["Percentage"] = df_cases["Count"] / len(df_degrees_genes_vs_all_proteoforms.loc[df_degrees_genes_vs_all_proteoforms["Annotations"] == True]) * 100
print(df_cases)

print(f"For proteoforms no annotations: ")
df_cases = pd.DataFrame(df_degrees_genes_vs_all_proteoforms.loc[df_degrees_genes_vs_all_proteoforms["Annotations"] == False].groupby(['Change']).size(), columns=["Count"])
df_cases["Percentage"] = df_cases["Count"] / len(df_degrees_genes_vs_all_proteoforms.loc[df_degrees_genes_vs_all_proteoforms["Annotations"] == False]) * 100
print(df_cases)

## Examples of proteoforms that have higher degree than gene nodes

In [None]:
df_degrees_genes_vs_all_proteoforms.sort_values(by = "Degree Difference").tail(20)

## Example of how the degree of some proteoforms increases

In [2]:
gene = "COL7A1"
protein = "Q02388"

# Get pathways in which gene X participates
pathways = get_query_result(QUERY_GET_PATHWAYS_BY_PROTEIN.replace("identifier:\"\"", f"identifier:\"{protein}\""))
pathway = "R-HSA-8934903"
# Check plots at gene and proteoform level

graphs = create_pathway_interaction_networks(pathway, config.PATHWAY_GRAPHS_PATH)
list(graphs[config.with_unique_sm][proteoforms].nodes())


['P67870;',
 'P19784;',
 'P68400;',
 'Q8IVP5;',
 'Q8IVP5;00046:13',
 'sm_R-HSA-8948039_ATP',
 'sm_R-HSA-8948039_ADP',
 'O75385;',
 'Q96HS1-2;',
 'P12931-1;',
 'Q8IVP5;00046:13,00048:18',
 'sm_R-HSA-8948143_ATP',
 'sm_R-HSA-8948143_ADP',
 'Q8IVP5;00046:17',
 'sm_R-HSA-8948146_ATP',
 'sm_R-HSA-8948146_ADP',
 'Q9H1Y0;',
 'O94817;',
 'Q9GZQ8;',
 'Q9H492;']

In [3]:

nx.get_node_attributes(graphs[config.no_sm][config.proteoforms], "prevId")

p = v.plot_pathway_all_levels(pathway, out_path=PATHWAY_GRAPHS_PATH, graphs=graphs,
                                coloring=v.Coloring.ENTITY_TYPE, outline_line_width=1,
                                node_size=12,
                                inner_plot_size=350,
                                highlight_articulations=True,
                                highlight_bridges=True,
                                toolbar_location=None)

Processing node P67870;
Processing node P19784;
Processing node P68400;
Processing node Q8IVP5;
Processing node Q8IVP5;00046:13
Processing node O75385;
Processing node Q96HS1-2;
Processing node P12931-1;
Processing node Q8IVP5;00046:13,00048:18
Processing node Q8IVP5;00046:17
Processing node Q9H1Y0;
Processing node O94817;
Processing node Q9GZQ8;
Processing node Q9H492;
Plotting network for genes
with method: no_sm
Plotting network for proteoforms
with method: no_sm
Plotting network for genes
with method: with_sm
Plotting network for proteoforms
with method: with_sm
Plotting network for genes
with method: with_unique_sm
Plotting network for proteoforms
with method: with_unique_sm
Generated figure: networks/pathways/R-HSA-8934903_ENTITY_TYPE_network.html


## Plot degree of all proteoforms compared to gene degree

In [None]:


markers = {True: "s", False: "X"}
sns.set_style("ticks")
plot = sns.scatterplot(data=df_degrees_genes_vs_all_proteoforms, x="Proteoform Degree", y="Gene Degree", hue="Annotations", style="Annotations", markers=markers, palette=[COLOR_GENES_FILL, COLOR_PROTEOFORMS_FILL])
plot.set(xscale="log", yscale="log")

plot = sns.FacetGrid(df_degrees_genes_vs_all_proteoforms, col="Annotations", hue="Annotations", palette=[COLOR_GENES_FILL, COLOR_PROTEOFORMS_FILL])
plot.map(sns.scatterplot, "Proteoform Degree", "Gene Degree", alpha=.7, markers=markers)
plot.add_legend()
for ax in plot.axes_dict.values():
    ax.axline((0, 0), slope=.2, c=".2", ls="--", zorder=0)
plot.set(xscale="log", yscale="log")


In [None]:
# Plot proteoform vs gene degree for non annotated proteoforms

df = df_degrees_genes_vs_all_proteoforms.loc[df_degrees_genes_vs_all_proteoforms["Annotations"] == False]

plot = sns.scatterplot(data=df, x="Proteoform Degree", y="Gene Degree", hue="Annotations", style="Annotations", markers=markers, palette=[COLOR_GENES_FILL])
plot.set(xscale="log", yscale="log")

# Draw a line of x=y 
x0, x1 = plot.get_xlim()
y0, y1 = plot.get_ylim()
lims = [max(x0, y0), min(x1, y1)]
plot.plot(lims, lims, '-r')

In [None]:
# Plot proteoform vs gene degree for annotated proteoforms

df = df_degrees_genes_vs_all_proteoforms.loc[df_degrees_genes_vs_all_proteoforms["Annotations"] == True]
plot = sns.scatterplot(data=df_degrees_genes_vs_proteoforms_with_annotations, x="Proteoform Degree", y="Gene Degree", style="Annotations", markers=markers, palette=[COLOR_PROTEOFORMS_FILL])
plot.set(xscale="log", yscale="log")

# Draw a line of x=y 
x0, x1 = plot.get_xlim()
y0, y1 = plot.get_ylim()
lims = [max(x0, y0), min(x1, y1)]
plot.plot(lims, lims, '-r')

## Plot degree of small molecules

In [None]:
# TODO: Plot degree of small molecules

# Degree by Pathways

In [None]:
columns = [
    'Pathway',
    'Small Molecules',
    'Increase Nodes Genes-Proteins',
    'Increase Interactions Genes-Proteins',
    'Increase Nodes Proteins-Proteoforms',
    'Increase Interactions Proteins-Proteoforms'
]


def get_pathway_tuples(pathway, graphs):
    increase_nodes_genes_proteins = get_increase_percentage(graphs[genes].number_of_nodes(),
                                                            graphs[proteins].number_of_nodes())
    increase_links_genes_proteins = get_increase_percentage(graphs[genes].number_of_edges(),
                                                            graphs[proteins].number_of_edges())
    increase_nodes_proteins_proteoforms = get_increase_percentage(graphs[proteins].number_of_nodes(),
                                                                  graphs[proteoforms].number_of_nodes())
    increase_links_proteins_proteoforms = get_increase_percentage(graphs[proteoforms].number_of_edges(),
                                                                  graphs[proteoforms].number_of_edges())

    return (
        pathway,
        graphs[genes].graph["method"],
        increase_nodes_genes_proteins,
        increase_links_genes_proteins,
        increase_nodes_proteins_proteoforms,
        increase_links_proteins_proteoforms
    )


data_pathways = []
# pathways = get_pathways()["stId"]
pathways = get_pathways_with_multiple_proteoforms(INTERACTOMES_PATH, PATHWAY_GRAPHS_PATH)
for pathway in pathways:
    for method in METHODS:
        pathway_graphs = {}
        for level in LEVELS:
            filename = get_json_filename(level, method, PATHWAY_GRAPHS_PATH, pathway)
            if not os.path.exists(filename):
                create_pathway_interaction_network(pathway, level, method, PATHWAY_GRAPHS_PATH)
            G = read_graph(filename)
            pathway_graphs[level] = G
        if pathway_graphs[genes].number_of_nodes() > 0\
                and pathway_graphs[genes].number_of_edges() > 0\
                and pathway_graphs[genes].number_of_nodes() < 20:
            data_pathways.append(get_pathway_tuples(pathway, pathway_graphs))

df_pathways = pd.DataFrame.from_records(data_pathways, columns=columns)
df_pathways

In [None]:
from bokeh.io import show
from visualization.visualize_single_network import Coloring, plot_pathway_all_levels
from lib.networks import create_pathway_interaction_networks

# Example nodes genes-proteins
df_pathways.sort_values(by=['Increase Nodes Genes-Proteins'], ascending=False, inplace=True)
examples = df_pathways.head(20)["Pathway"].unique()
print(examples)

for example in examples:
    graphs = create_pathway_interaction_networks(example, "resources/pathway_networks/")
    p = plot_pathway_all_levels(example, out_path="resources/pathway_networks/", graphs=graphs,
                                coloring=Coloring.ENTITY_TYPE, outline_line_width=1,
                                node_size = 12,
                                inner_plot_size=350,
                                highlight_articulations=True,
                                highlight_bridges=True,
                                toolbar_location='below')
    show(p)
df_pathways.head(20)

In [None]:
df_pathways.sort_values(by=['Increase Nodes Proteins-Proteoforms'], ascending=False, inplace=True)
examples = df_pathways.head(10)["Pathway"].unique()
print(examples)
df_pathways[["Pathway", "Increase Nodes Proteins-Proteoforms", "Increase Interactions Proteins-Proteoforms"]].head(20)

In [None]:
df_pathways.sort_values(by=['Increase Interactions Proteins-Proteoforms'], ascending=False, inplace=True)
df_pathways[["Pathway", "Increase Nodes Proteins-Proteoforms", "Increase Interactions Proteins-Proteoforms"]].head(12)

In [None]:
# Table with degrees.
columns = [
    "Small Molecules",
    "Entity Level",
    "Q_1 Acc. Ent.",
    "Q_2 Acc. Ent.",
    "Q_3 Acc. Ent.",
    "Q_4 Acc. Ent.",
    "Q_1 S. Mol.",
    "Q_2 S. Mol.",
    "Q_3 S. Mol.",
    "Q_4 S. Mol.",
]

def get_tuples(G):

    acc_ents = list(G.nodes)
    sm_ents = []
    if G.graph['num_small_molecules'] > 0:
        acc_ents = [n for n in G.nodes if not G.nodes[n]['type'].startswith("Simple")]
        sm_ents = [n for n in G.nodes if G.nodes[n]['type'].startswith("Simple")]

    deg_seq_acc_ents = [d for n, d in list(G.degree(acc_ents))]

    values_ae = np.quantile(deg_seq_acc_ents, [0,0.25,0.5,0.75,1])
    values_sm = [0] * 5
    if len(sm_ents) > 0:
        deg_seq_sm_ents = [d for n, d in list(G.degree(sm_ents))]
        values_sm = np.quantile(deg_seq_sm_ents, [0,0.25,0.5,0.75,1])

    return (G.graph["method"], G.graph["level"], *(values_ae)[1:], *(values_sm[1:]))

In [None]:
values = [get_tuples(I) for I in interactomes]
df_deg_interactomes = pd.DataFrame(values, columns=columns, index=index)
df_deg_interactomes.drop(["Small Molecules","Entity Level"], axis=1)
dfi.export(df_deg_interactomes, "figures/df_degree_interactomes.png")
df_deg_interactomes

In [None]:
for method in config.SMALL_MOLECULES:
    print(f"Small Molecules: {method}")
    avg_genes = df_deg_interactomes.loc[method,'genes']["Avg. Degree Accessioned Entities"]
    avg_proteoforms = df_deg_interactomes.loc[method,'proteoforms']["Avg. Degree Accessioned Entities"]
    inc_genes_proteoforms = get_increase_percentage(avg_genes, avg_proteins)
    inc_proteins_proteoforms = get_increase_percentage(avg_proteins, avg_proteoforms)
    print(f"There is an increase of degree: {inc_genes_proteins}% from genes to proteins")
    print(f"There is an increase of degree: {inc_proteins_proteoforms}% from proteins to proteoforms")


In [None]:
combinations = get_combinations_with_pathways(3)

columns = [
    "Small Molecules",
    "Entity Level",
    "Pathway",
    "Min. Degree Accessioned Entities",
    "Avg. Degree Accessioned Entities",
    "Max. Degree Accessioned Entities",
    "Min. Degree Small Molecules",
    "Avg. Degree Small Molecules",
    "Max. Degree Small Molecules"
]

out_path = "resources/pathway_networks/"

def get_tuples(combination):
    G = create_pathway_interaction_network(
        method=combination[0], level=combination[1], pathway=combination[2], out_path=out_path
    )

    acc_ents = list(G.nodes)
    sm_ents = []
    if G.graph['num_small_molecules'] > 0:
        acc_ents = [n for n in G.nodes if not G.nodes[n]['type'].startswith("Simple")]
        sm_ents = [n for n in G.nodes if G.nodes[n]['type'].startswith("Simple")]

    deg_seq_acc_ents = [d for n, d in list(G.degree(acc_ents))]
    min_deg_sm_ents = 0
    avg_deg_sm_ents = 0
    max_deg_sm_ents = 0
    if len(sm_ents) > 0:
        deg_seq_sm_ents = [d for n, d in list(G.degree(sm_ents))]
        min_deg_sm_ents = min(deg_seq_sm_ents)
        avg_deg_sm_ents = mean(deg_seq_sm_ents)
        max_deg_sm_ents = max(deg_seq_sm_ents)

    return (
        combination[0], combination[1], combination[2],
        min(deg_seq_acc_ents), mean(deg_seq_acc_ents), max(deg_seq_acc_ents),
        min_deg_sm_ents, avg_deg_sm_ents, max_deg_sm_ents
    )

In [None]:
df_deg_pathways = pd.DataFrame([get_tuples(combination) for combination in combinations], columns=columns)
df_deg_pathways

In [None]:
df_deg_pathways_agg = df_deg_pathways.groupby(['Small Molecules', 'Entity Level']).agg(
    {
        "Min. Degree Accessioned Entities": ['mean'],
        "Avg. Degree Accessioned Entities": ['mean'],
        "Max. Degree Accessioned Entities": ['mean'],
        "Min. Degree Small Molecules": ['mean'],
        "Avg. Degree Small Molecules": ['mean'],
        "Max. Degree Small Molecules": ['mean']
    })
df_deg_pathways_agg.columns = [
    "Min. Degree Accessioned Entities",
    "Avg. Degree Accessioned Entities",
    "Max. Degree Accessioned Entities",
    "Min. Degree Small Molecules",
    "Avg. Degree Small Molecules",
    "Max. Degree Small Molecules"
]
df_pathways_grouped = df_deg_pathways_agg.set_index(get_multiindex())
df_pathways_grouped

In [None]:
import matplotlib.pyplot as plt

sns.set(rc = {"figure.facecolor": "white"})

fig, ax = plt.subplots(1, 1, figsize=(5, 6))
fig.set_size_inches(10, 6)
f = sns.violinplot(x="Small Molecules", y="Avg. Degree Accessioned Entities", hue="Entity Level",
                   data=df_deg_pathways, palette=config.COLORS_BORDER)
#ax.yaxis.set_major_formatter(mticker.StrMethodFormatter("$10^{{{x:.0f}}}$")) # Convert ticks to powers of 10
f.legend(loc='center left', bbox_to_anchor=(1, 0.5))
f.set_title("Distribution of Avg. Degree of Accessioned Entities in all Pathways")

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 1, figsize=(5, 6))
fig.set_size_inches(10, 6)
f = sns.violinplot(x="Small Molecules", y="Avg. Degree Small Molecules", hue="Entity Level",
                   data=df_deg_pathways, palette=config.COLORS_BORDER)
#ax.yaxis.set_major_formatter(mticker.StrMethodFormatter("$10^{{{x:.0f}}}$")) # Convert ticks to powers of 10
f.legend(loc='center left', bbox_to_anchor=(1, 0.5))
f.set_title("Distribution of Avg. Degree of Small Molecules in all Pathways")

In [None]:
df_deg_pathways[
    (df_deg_pathways["Small Molecules"] == 'with_unique_sm') & (df_deg_pathways["Entity Level"] == 'proteoforms')]

In [None]:
df_pathways_grouped.reset_index(inplace=True)
df_pathways_grouped

In [None]:
df_interactomes_temp = df_deg_interactomes[["Avg. Degree Accessioned Entities",
    "Avg. Degree Small Molecules"]]
df_interactomes_temp.reset_index(inplace=True)
df_interactomes_temp

In [None]:
df_degrees = pd.concat([df_interactomes_temp, df_pathways_grouped[["Avg. Degree Accessioned Entities",
    "Avg. Degree Small Molecules"]]], axis=1)
df_degrees.set_index(index, inplace=True)
df_degrees.drop(df_degrees.columns[[0,1]],axis=1,inplace=True)
df_degrees