# Abstraction of the "Signaling by EGFR" pathway (R-HSA-177929) from Reactome

#### Load libraries

In [21]:
import networkx as nx
import pandas as pd

#### Load the pathway extracted in `01_extraction_signaling_by_egfr_pathway_reactome.ipynb`

In [22]:
# read edgelist of EGF pathway from Reactome
graph = pd.read_csv("../Results/Final_pathway_egf_completed.tsv", sep="\t", header=None)

# Convert it as a networkx object
Graphtype = nx.DiGraph()
egf_pathway = nx.from_pandas_edgelist(graph, source=0, target=2, edge_attr=1, create_using=Graphtype)

# create empty abstracted graph
abstracted_graph = pd.DataFrame(columns=["subject", "predicate", "object", "new_relation"])

#### Extract "bp3:nextStep" edges from original graph

In [23]:
# Select next step edges and associated nodes (pathways)
next_step_edges = [(u,v) for u,v,e in egf_pathway.edges(data=True) if e[1] == 'bp3:nextStep']
print("Next step edges")
print(next_step_edges)

Next step edges
[('reactome:PathwayStep14662', 'reactome:PathwayStep16677'), ('reactome:PathwayStep14662', 'reactome:PathwayStep14663'), ('reactome:PathwayStep14661', 'reactome:PathwayStep14667'), ('reactome:PathwayStep14661', 'reactome:PathwayStep15089'), ('reactome:PathwayStep14661', 'reactome:PathwayStep3294'), ('reactome:PathwayStep14661', 'reactome:PathwayStep14662'), ('reactome:PathwayStep14661', 'reactome:PathwayStep15106'), ('reactome:PathwayStep14661', 'reactome:PathwayStep14913'), ('reactome:PathwayStep14661', 'reactome:PathwayStep15391'), ('reactome:PathwayStep14658', 'reactome:PathwayStep14659'), ('reactome:PathwayStep14659', 'reactome:PathwayStep2747'), ('reactome:PathwayStep14657', 'reactome:PathwayStep14658'), ('reactome:PathwayStep14696', 'reactome:PathwayStep14698'), ('reactome:PathwayStep14696', 'reactome:PathwayStep14699'), ('reactome:PathwayStep14686', 'reactome:PathwayStep3291'), ('reactome:PathwayStep14686', 'reactome:PathwayStep14687'), ('reactome:PathwayStep1468

#### Create a dictionary of pathway steps and associated biochemical reactions

In [24]:
# create empty dictionary to store the BiochemicalReactions associated to each PathwayStep
dico_ps_reactions = dict()

# build dictionary
for next_step_nodes in next_step_edges:
    for pathway in next_step_nodes:
        out_nodes = egf_pathway.out_edges(pathway)
        dico_ps_reactions[pathway] = list()
        for nodes in out_nodes:
            for node in nodes:
                if "BiochemicalReaction" in node:
                    dico_ps_reactions[pathway] += [str(node)]

print("Dico pathway steps reactions")
print(dico_ps_reactions)


Dico pathway steps reactions
{'reactome:PathwayStep14662': ['reactome:BiochemicalReaction12392'], 'reactome:PathwayStep16677': [], 'reactome:PathwayStep14663': ['reactome:BiochemicalReaction12393'], 'reactome:PathwayStep14661': ['reactome:BiochemicalReaction12391'], 'reactome:PathwayStep14667': ['reactome:BiochemicalReaction12396'], 'reactome:PathwayStep15089': [], 'reactome:PathwayStep3294': [], 'reactome:PathwayStep15106': [], 'reactome:PathwayStep14913': [], 'reactome:PathwayStep15391': [], 'reactome:PathwayStep14658': ['reactome:BiochemicalReaction12389'], 'reactome:PathwayStep14659': ['reactome:BiochemicalReaction12390'], 'reactome:PathwayStep2747': [], 'reactome:PathwayStep14657': ['reactome:BiochemicalReaction12388'], 'reactome:PathwayStep14696': ['reactome:BiochemicalReaction12423'], 'reactome:PathwayStep14698': ['reactome:BiochemicalReaction12425'], 'reactome:PathwayStep14699': ['reactome:BiochemicalReaction12426'], 'reactome:PathwayStep14686': ['reactome:BiochemicalReaction12

#### Link biochemical reactions by a next step if their pathway steps are successive

In [25]:
keys = list(dico_ps_reactions.keys())
values = list(dico_ps_reactions.values())
subject_list = list()
predicate_list = list()
object_list = list()
inferred_relation = list()
for i in range(0, len(dico_ps_reactions)):
    for j in range(i, len(dico_ps_reactions)):
        #print(keys[i], keys[j])
        for edges in next_step_edges:
            if edges == (str(keys[i]), str(keys[j])):
                if values[i] != [] and values[j] != []:
                    for value1 in values[i]:
                        br1 = value1
                    for value2 in values[j]:
                        br2 = value2
                    subject_list.append(value1)
                    predicate_list.append("bp3:nextStep")
                    object_list.append(value2)
                    inferred_relation.append("yes")

#### Abstraction on the pathway steps

In [26]:
def abstract_biopax_pathway_steps(input_graph, output_graph):
    # add sequence of biochemical reactions
    output_graph["subject"] = subject_list
    output_graph["predicate"] = predicate_list
    output_graph["object"] = object_list
    output_graph["new_relation"] = inferred_relation

    # read input graph as pandas dataframe 
    input_graph_dataframe = nx.to_pandas_edgelist(input_graph)
    input_graph_dataframe = input_graph_dataframe[['source', 1, 'target']]

    # add triplets that do not have to be abstracted
    for index, row in input_graph_dataframe.iterrows():
        if not "bp3:stepProcess" in row[1] and not "bp3:nextStep" in row[1] and not "bp3:pathwayOrder" in row[1]:
            subject_list.append(row['source'])
            predicate_list.append(row[1])
            object_list.append(row['target'])
            inferred_relation.append("no")

    assert len(subject_list) == len(predicate_list) == len(object_list) == len(inferred_relation)
    output_graph = output_graph.reindex(range(len(subject_list)))
    output_graph["subject"] = subject_list
    output_graph["predicate"] = predicate_list
    output_graph["object"] = object_list
    output_graph["new_relation"] = inferred_relation
    # export graph for visualization in Cytoscape
    output_graph.to_csv("../Results/Abstraction_pathway_egf.tsv", sep="\t", index=False)


abstract_biopax_pathway_steps(egf_pathway, abstracted_graph)