# Abstraction of the "Signaling by EGFR" pathway (R-HSA-177929) from Reactome

#### Load libraries

In [1]:
import networkx as nx
import pandas as pd

#### Load the pathway extracted in `01_extraction_signaling_by_egfr_pathway_reactome.ipynb`

In [2]:
# read edgelist of EGF pathway from Reactome
graph = pd.read_csv("../Results/Final_pathway_egf_completed.tsv", sep="\t", header=None)

# Convert it as a networkx object
Graphtype = nx.DiGraph()
egf_pathway = nx.from_pandas_edgelist(graph, source=0, target=2, edge_attr=1, create_using=Graphtype)

# create empty abstracted graph
abstracted_graph = pd.DataFrame(columns=["subject", "predicate", "object", "new_relation"])

### Simplification of the BioPAX graph

In [3]:
simplified_graph = egf_pathway
print(simplified_graph)
nodes_names_to_omit = ["Protein", "Complex", "SmallMolecule", "LEFT-TO-RIGHT", "Stoichiometry", "Catalysis", "Control"]
nodes_to_remove = list()
for node in simplified_graph.nodes():
    for node_name in nodes_names_to_omit:
        if node_name in node:
            nodes_to_remove.append(node)
simplified_graph.remove_nodes_from(nodes_to_remove)
print(simplified_graph)
nx.write_edgelist(simplified_graph, "../Results/Simplification_pathway_egf.csv")

DiGraph with 323 nodes and 611 edges
DiGraph with 180 nodes and 277 edges


#### Extract "bp3:nextStep" edges from simplified graph

In [4]:
# Select next step edges and associated nodes (pathways)
next_step_edges = [(u,v) for u,v,e in simplified_graph.edges(data=True) if e[1] == 'bp3:nextStep']
print("Next step edges")
print(next_step_edges)

Next step edges
[('reactome:PathwayStep14662', 'reactome:PathwayStep16677'), ('reactome:PathwayStep14662', 'reactome:PathwayStep14663'), ('reactome:PathwayStep14661', 'reactome:PathwayStep14667'), ('reactome:PathwayStep14661', 'reactome:PathwayStep15089'), ('reactome:PathwayStep14661', 'reactome:PathwayStep3294'), ('reactome:PathwayStep14661', 'reactome:PathwayStep14662'), ('reactome:PathwayStep14661', 'reactome:PathwayStep15106'), ('reactome:PathwayStep14661', 'reactome:PathwayStep14913'), ('reactome:PathwayStep14661', 'reactome:PathwayStep15391'), ('reactome:PathwayStep14658', 'reactome:PathwayStep14659'), ('reactome:PathwayStep14659', 'reactome:PathwayStep2747'), ('reactome:PathwayStep14657', 'reactome:PathwayStep14658'), ('reactome:PathwayStep14696', 'reactome:PathwayStep14698'), ('reactome:PathwayStep14696', 'reactome:PathwayStep14699'), ('reactome:PathwayStep14686', 'reactome:PathwayStep3291'), ('reactome:PathwayStep14686', 'reactome:PathwayStep14687'), ('reactome:PathwayStep1468

#### Create a dictionary of pathway steps and associated step processes

In [5]:
# create empty dictionary to store the BiochemicalReactions associated to each PathwayStep
dico_ps_reactions = dict()

# build dictionary
for edge in next_step_edges:
    for step in edge:
        out_nodes = egf_pathway.out_edges(step)
        dico_ps_reactions[step] = list()
        for nodes in out_nodes:
            for node in nodes:
                if not "PathwayStep" in node:
                    dico_ps_reactions[step] += [str(node)]

print("Dico pathway steps reactions")
print(dico_ps_reactions)
print(len(set(dico_ps_reactions.keys())))


Dico pathway steps reactions
{'reactome:PathwayStep14662': ['reactome:BiochemicalReaction12392'], 'reactome:PathwayStep16677': [], 'reactome:PathwayStep14663': ['reactome:BiochemicalReaction12393'], 'reactome:PathwayStep14661': ['reactome:BiochemicalReaction12391'], 'reactome:PathwayStep14667': ['reactome:BiochemicalReaction12396'], 'reactome:PathwayStep15089': [], 'reactome:PathwayStep3294': [], 'reactome:PathwayStep15106': [], 'reactome:PathwayStep14913': [], 'reactome:PathwayStep15391': [], 'reactome:PathwayStep14658': ['reactome:BiochemicalReaction12389'], 'reactome:PathwayStep14659': ['reactome:BiochemicalReaction12390'], 'reactome:PathwayStep2747': [], 'reactome:PathwayStep14657': ['reactome:BiochemicalReaction12388'], 'reactome:PathwayStep14696': ['reactome:BiochemicalReaction12423'], 'reactome:PathwayStep14698': ['reactome:BiochemicalReaction12425'], 'reactome:PathwayStep14699': ['reactome:BiochemicalReaction12426'], 'reactome:PathwayStep14686': ['reactome:BiochemicalReaction12

#### Link biochemical reactions by a next step if their pathway steps are successive

In [6]:
keys = list(dico_ps_reactions.keys())
values = list(dico_ps_reactions.values())
subject_list = list()
predicate_list = list()
object_list = list()
inferred_relation = list()
for i in range(0, len(dico_ps_reactions)):
    for j in range(i, len(dico_ps_reactions)):
        #print(keys[i], keys[j])
        for edges in next_step_edges:
            if edges == (str(keys[i]), str(keys[j])):
                if values[i] != [] and values[j] != []:
                    for value1 in values[i]:
                        br1 = value1
                    for value2 in values[j]:
                        br2 = value2
                    print(br1, br2)
                    subject_list.append(br1)
                    predicate_list.append("http://abstraction/NextStepBiochemicalReaction/")
                    object_list.append(br2)
                    inferred_relation.append("yes")
            if edges == (str(keys[j]), str(keys[i])):
                if values[i] != [] and values[j] != []:
                    for value1 in values[j]:
                        br1 = value1
                    for value2 in values[i]:
                        br2 = value2
                    print(br1, br2)
                    subject_list.append(br1)
                    predicate_list.append("http://abstraction/NextStepBiochemicalReaction/")
                    object_list.append(br2)
                    inferred_relation.append("yes")

reactome:BiochemicalReaction12392 reactome:BiochemicalReaction12393
reactome:BiochemicalReaction12391 reactome:BiochemicalReaction12392
reactome:BiochemicalReaction12386 reactome:BiochemicalReaction12392
reactome:BiochemicalReaction12387 reactome:BiochemicalReaction12392
reactome:BiochemicalReaction12391 reactome:BiochemicalReaction12396
reactome:BiochemicalReaction12396 reactome:BiochemicalReaction12397
reactome:BiochemicalReaction12395 reactome:BiochemicalReaction12396
reactome:BiochemicalReaction12389 reactome:BiochemicalReaction12390
reactome:BiochemicalReaction12388 reactome:BiochemicalReaction12389
reactome:BiochemicalReaction12387 reactome:BiochemicalReaction12388
reactome:BiochemicalReaction12423 reactome:BiochemicalReaction12425
reactome:BiochemicalReaction12423 reactome:BiochemicalReaction12426
reactome:BiochemicalReaction12413 reactome:BiochemicalReaction12414
reactome:BiochemicalReaction12414 reactome:BiochemicalReaction12415
reactome:BiochemicalReaction12410 reactome:Bioch

#### Abstraction on the pathway steps

In [8]:
def abstract_biopax_pathway_steps(input_graph, output_graph):
    # add sequence of biochemical reactions
    output_graph["subject"] = subject_list
    output_graph["predicate"] = predicate_list
    output_graph["object"] = object_list
    output_graph["new_relation"] = inferred_relation

    # read input graph as pandas dataframe 
    input_graph_dataframe = nx.to_pandas_edgelist(input_graph)
    input_graph_dataframe = input_graph_dataframe[['source', 1, 'target']]

    # add triplets that do not have to be abstracted
    for index, row in input_graph_dataframe.iterrows():
        if not "bp3:stepProcess" in row[1] and not "bp3:nextStep" in row[1] and not "bp3:pathwayOrder" in row[1]:
            print(row['source'], row[1], row['target'])
            subject_list.append(row['source'])
            predicate_list.append(row[1])
            object_list.append(row['target'])
            inferred_relation.append("no")

    assert len(subject_list) == len(predicate_list) == len(object_list) == len(inferred_relation)
    output_graph = output_graph.reindex(range(len(subject_list)))
    output_graph["subject"] = subject_list
    output_graph["predicate"] = predicate_list
    output_graph["object"] = object_list
    output_graph["new_relation"] = inferred_relation
    # export graph for visualization in Cytoscape
    output_graph.to_csv("../Results/Abstraction_pathway_egf_v2.tsv", sep="\t", index=False)


abstract_biopax_pathway_steps(simplified_graph, abstracted_graph)

reactome:Pathway2275 bp3:pathwayComponent reactome:BiochemicalReaction12383
reactome:Pathway2275 bp3:pathwayComponent reactome:BiochemicalReaction12384
reactome:Pathway2275 bp3:pathwayComponent reactome:Pathway2277
reactome:Pathway2275 bp3:pathwayComponent reactome:BiochemicalReaction12385
reactome:Pathway2275 bp3:pathwayComponent reactome:Pathway2276
reactome:Pathway2275 bp3:pathwayComponent reactome:BiochemicalReaction12381
reactome:Pathway2275 bp3:pathwayComponent reactome:BiochemicalReaction12386
reactome:Pathway2275 bp3:pathwayComponent reactome:Pathway2280
reactome:Pathway2275 bp3:pathwayComponent reactome:BiochemicalReaction12382
reactome:Pathway2275 bp3:pathwayComponent reactome:BiochemicalReaction12387
reactome:Pathway2275 bp3:pathwayComponent reactome:Pathway2279
reactome:Pathway2275 bp3:pathwayComponent reactome:Pathway2278
reactome:BiochemicalReaction12383 bp3:displayName FAM83B, (FAM83A, FAM83D) bind EGFR
reactome:BiochemicalReaction12384 bp3:displayName EGFR binds EGF lig

#### Collapsing of subpathways

In [None]:
abstracted_graph = pd.read_csv("../Results/Abstraction_pathway_egf_v2.tsv", sep="\t")

def abstract_collapse_biopax_pathway_steps(input_graph):
    output_graph = pd.DataFrame(columns=["subject", "predicate", "object", "new_relation"])
    subject_list = list()
    predicate_list = list()
    object_list = list()
    direct_br = list()
    pathways = ['reactome:Pathway2275']
    
    for index, row in input_graph.iterrows():
        # get triplets of root pathway
        if row[0] == "reactome:Pathway2275":
            subject_list.append(row[0])
            predicate_list.append(row[1])
            object_list.append(row[2])
            # get direct biochemical reactions
            if "pathwayComponent" in row[1] and "BiochemicalReaction" in row[2]:
                direct_br.append(row[2])
            if "pathwayComponent" in row[1] and "Pathway" in row[2]:
                pathways.append(row[2])
    
    # get the subpathways associated to the next steps of the BR
    # get information about direct BR
    for index, row in input_graph.iterrows():
        for br in direct_br:
            if row[0] == br and row[1] == "http://abstraction/NextStepBiochemicalReaction/":
                if not row[2] not in direct_br:
                    print(row[0], row[1], row[2])
                    subject_list.append(row[0])
                    predicate_list.append(row[1])
                    object_list.append(row[2])
            if row[0] == br and row[1] == "bp3:displayName":
                if not row[2] in direct_br:
                    subject_list.append(row[0])
                    predicate_list.append(row[1])
                    object_list.append(row[2])
            if row[0] == br and row[1] == "http://abstraction/NextStepBiochemicalReaction/":
                next_step = row[2]
                # if the next step belongs to another pathway
                if not next_step in direct_br:
                    for index, row in input_graph.iterrows():
                        if row[1] == "bp3:pathwayComponent" and row[2] == next_step:
                            subject_list.append(br)
                            predicate_list.append("http://abstraction/NextStepPathway/")
                            object_list.append(row[0])   
    
    # get information about direct Pathway
    dico_subpathways_and_components = dict()
    for pathway in pathways:
        print(pathway)
        dico_subpathways_and_components[pathway] = list()
        list_components = list()
        for index, row in input_graph.iterrows():
            if row[0] == pathway and row[1] == "bp3:displayName":
                subject_list.append(row[0])
                predicate_list.append(row[1])
                object_list.append(row[2])
            if row[0] == pathway and row[1] == "bp3:pathwayComponent":
                list_components.append(row[2])
        dico_subpathways_and_components[pathway] = list_components

    # link subpathways if their biochemical reactions share a next step
    for i in range(len(pathways)):
        for j in range(i + 1, len(pathways)):
            pathway1 = pathways[i]
            pathway2 = pathways[j]
            liste_br1 = dico_subpathways_and_components[pathway1]
            liste_br2 = dico_subpathways_and_components[pathway2]
            
            for val1 in liste_br1:
                for val2 in liste_br2:
                    for index, row in input_graph.iterrows():
                        if row[0] == val1 and row[1] == "http://abstraction/NextStepBiochemicalReaction/" and row[2] == val2:
                            if pathway2 != "reactome:Pathway2275" and pathway1 != "reactome:Pathway2275":
                                subject_list.append(str(pathway1))
                                predicate_list.append("http://abstraction/NextStepPathway/")
                                object_list.append(str(pathway2))
                        if row[0] == val2 and row[1] == "http://abstraction/NextStepBiochemicalReaction/" and row[2] == val1:
                            if pathway2 != "reactome:Pathway2275" and pathway1 != "reactome:Pathway2275":
                                subject_list.append(str(pathway2))
                                predicate_list.append("http://abstraction/NextStepPathway/")
                                object_list.append(str(pathway1))
            
    output_graph["subject"] = subject_list
    output_graph["predicate"] = predicate_list
    output_graph["object"] = object_list
    output_graph.to_csv("../Results/Abstraction_collapse_egf_pathway_v3.tsv", sep="\t", index=False)
    
abstract_collapse_biopax_pathway_steps(abstracted_graph)

  if row[0] == "reactome:Pathway2275":
  subject_list.append(row[0])
  predicate_list.append(row[1])
  object_list.append(row[2])
  if "pathwayComponent" in row[1] and "BiochemicalReaction" in row[2]:
  direct_br.append(row[2])
  if "pathwayComponent" in row[1] and "Pathway" in row[2]:
  pathways.append(row[2])
  if row[0] == br and row[1] == "http://abstraction/NextStepBiochemicalReaction/":
  if row[0] == br and row[1] == "bp3:displayName":
  if row[0] == br and row[1] == "http://abstraction/NextStepBiochemicalReaction/":
  if not row[2] not in direct_br:
  next_step = row[2]
  if row[1] == "bp3:pathwayComponent" and row[2] == next_step:
  object_list.append(row[0])
  print(row[0], row[1], row[2])
  subject_list.append(row[0])
  predicate_list.append(row[1])
  object_list.append(row[2])
  if not row[2] in direct_br:
  subject_list.append(row[0])
  predicate_list.append(row[1])
  object_list.append(row[2])
  if row[0] == pathway and row[1] == "bp3:displayName":
  if row[0] == pathway 

reactome:BiochemicalReaction12385 http://abstraction/NextStepBiochemicalReaction/ reactome:BiochemicalReaction12386
reactome:BiochemicalReaction12384 http://abstraction/NextStepBiochemicalReaction/ reactome:BiochemicalReaction12385
reactome:BiochemicalReaction12381 http://abstraction/NextStepBiochemicalReaction/ reactome:BiochemicalReaction12384
reactome:BiochemicalReaction12385 http://abstraction/NextStepBiochemicalReaction/ reactome:BiochemicalReaction12387
reactome:Pathway2275
reactome:Pathway2277
reactome:Pathway2276
reactome:Pathway2280
reactome:Pathway2279
reactome:Pathway2278
