# Abstraction of the "Signaling by EGFR" pathway (R-HSA-177929) from Reactome

#### Load libraries

In [10]:
import networkx as nx
import pandas as pd

#### Load the pathway extracted in `01_extraction_signaling_by_egfr_pathway_reactome.ipynb`

In [11]:
# read edgelist of EGF pathway from Reactome
graph = pd.read_csv("../Results/Final_pathway_egf_completed.tsv", sep="\t", header=None)

# Convert it as a networkx object
Graphtype = nx.DiGraph()
egf_pathway = nx.from_pandas_edgelist(graph, source=0, target=2, edge_attr=1, create_using=Graphtype)

# create empty abstracted graph
abstracted_graph = pd.DataFrame(columns=["subject", "predicate", "object", "new_relation"])

### Simplification of the BioPAX graph

In [None]:
simplified_graph = egf_pathway
print(simplified_graph)
nodes_names_to_omit = ["Protein", "Complex", "SmallMolecule", "LEFT-TO-RIGHT", "Stoichiometry", "Catalysis", "Control"]
nodes_to_remove = list()
for node in simplified_graph.nodes():
    for node_name in nodes_names_to_omit:
        if node_name in node:
            nodes_to_remove.append(node)
simplified_graph.remove_nodes_from(nodes_to_remove)
edgelist = list(simplified_graph.edges(data=True))
df = pd.DataFrame(edgelist, columns=["Source", "Target", "Attributes"])
df.to_csv("../Results/Simplification_pathway_egf.tsv", sep='\t', index=False)
#nx.write_edgelist(simplified_graph, sep="\t", "../Results/Simplification_pathway_egf.tsv")

#### Extract "bp3:nextStep" edges from simplified graph

In [None]:
# Select next step edges and associated nodes (pathways)
next_step_edges = [(u,v) for u,v,e in simplified_graph.edges(data=True) if e[1] == 'bp3:nextStep']
print("Next step edges")
print(next_step_edges)

#### Create a dictionary of pathway steps and associated step processes

In [None]:
# create empty dictionary to store the BiochemicalReactions associated to each PathwayStep
dico_ps_reactions = dict()

# build dictionary
for edge in next_step_edges:
    for step in edge:
        out_nodes = egf_pathway.out_edges(step)
        dico_ps_reactions[step] = list()
        for nodes in out_nodes:
            for node in nodes:
                if not "PathwayStep" in node:
                    dico_ps_reactions[step] += [str(node)]

print("Dico pathway steps reactions")
print(dico_ps_reactions)
print(len(set(dico_ps_reactions.keys())))


#### Link biochemical reactions by a next step if their pathway steps are successive

In [None]:
keys = list(dico_ps_reactions.keys())
values = list(dico_ps_reactions.values())
subject_list = list()
predicate_list = list()
object_list = list()
inferred_relation = list()
for i in range(0, len(dico_ps_reactions)):
    for j in range(i, len(dico_ps_reactions)):
        #print(keys[i], keys[j])
        for edges in next_step_edges:
            if edges == (str(keys[i]), str(keys[j])):
                if values[i] != [] and values[j] != []:
                    for value1 in values[i]:
                        br1 = value1
                    for value2 in values[j]:
                        br2 = value2
                    print(br1, br2)
                    subject_list.append(br1)
                    predicate_list.append("http://abstraction/NextStepBiochemicalReaction/")
                    object_list.append(br2)
                    inferred_relation.append("yes")
            if edges == (str(keys[j]), str(keys[i])):
                if values[i] != [] and values[j] != []:
                    for value1 in values[j]:
                        br1 = value1
                    for value2 in values[i]:
                        br2 = value2
                    print(br1, br2)
                    subject_list.append(br1)
                    predicate_list.append("http://abstraction/NextStepBiochemicalReaction/")
                    object_list.append(br2)
                    inferred_relation.append("yes")

#### Abstraction on the pathway steps

In [None]:
def abstract_biopax_pathway_steps(input_graph, output_graph):
    # add sequence of biochemical reactions
    output_graph["subject"] = subject_list
    output_graph["predicate"] = predicate_list
    output_graph["object"] = object_list
    output_graph["new_relation"] = inferred_relation

    # read input graph as pandas dataframe 
    input_graph_dataframe = nx.to_pandas_edgelist(input_graph)
    input_graph_dataframe = input_graph_dataframe[['source', 1, 'target']]

    # add triplets that do not have to be abstracted
    for index, row in input_graph_dataframe.iterrows():
        if not "bp3:stepProcess" in row[1] and not "bp3:nextStep" in row[1] and not "bp3:pathwayOrder" in row[1]:
            print(row['source'], row[1], row['target'])
            subject_list.append(row['source'])
            predicate_list.append(row[1])
            object_list.append(row['target'])
            inferred_relation.append("no")

    assert len(subject_list) == len(predicate_list) == len(object_list) == len(inferred_relation)
    output_graph = output_graph.reindex(range(len(subject_list)))
    output_graph["subject"] = subject_list
    output_graph["predicate"] = predicate_list
    output_graph["object"] = object_list
    output_graph["new_relation"] = inferred_relation
    # export graph for visualization in Cytoscape
    output_graph.to_csv("../Results/Abstraction_egfr_pathway.tsv", sep="\t", index=False)


abstract_biopax_pathway_steps(simplified_graph, abstracted_graph)

#### Collapsing of subpathways

In [None]:
abstracted_graph = pd.read_csv("../Results/Abstraction_egfr_pathway.tsv", sep="\t")

def abstract_collapse_biopax_pathway_steps(input_graph):
    output_graph = pd.DataFrame(columns=["subject", "predicate", "object", "new_relation"])
    subject_list = list()
    predicate_list = list()
    object_list = list()
    direct_br = list()
    pathways = ['reactome:Pathway1389']
    
    for index, row in input_graph.iterrows():
        # get triplets of root pathway
        if row[0] == "reactome:Pathway1389":
            subject_list.append(row[0])
            predicate_list.append(row[1])
            object_list.append(row[2])
            # get direct biochemical reactions
            if "pathwayComponent" in row[1] and "BiochemicalReaction" in row[2]:
                direct_br.append(row[2])
            # get direct sub-pathways
            if "pathwayComponent" in row[1] and "Pathway" in row[2]:
                pathways.append(row[2])
    
    # get the subpathways associated to the next steps of the BR
    # get information about direct BR
    for index, row in input_graph.iterrows():
        for br in direct_br:
            if row[0] == br and row[1] == "http://abstraction/NextStepBiochemicalReaction/":
                if not row[2] not in direct_br:
                    print(row[0], row[1], row[2])
                    subject_list.append(row[0])
                    predicate_list.append(row[1])
                    object_list.append(row[2])
            if row[0] == br and row[1] == "bp3:displayName":
                if not row[2] in direct_br:
                    subject_list.append(row[0])
                    predicate_list.append(row[1])
                    object_list.append(row[2])
            if row[0] == br and row[1] == "http://abstraction/NextStepBiochemicalReaction/":
                next_step = row[2]
                # if the next step belongs to another pathway
                if not next_step in direct_br:
                    for index, row in input_graph.iterrows():
                        if row[1] == "bp3:pathwayComponent" and row[2] == next_step:
                            subject_list.append(br)
                            predicate_list.append("http://abstraction/NextStepPathway/")
                            object_list.append(row[0])   
    
    # get information about direct Pathway
    dico_subpathways_and_components = dict()
    for pathway in pathways:
        print(pathway)
        dico_subpathways_and_components[pathway] = list()
        list_components = list()
        for index, row in input_graph.iterrows():
            if row[0] == pathway and row[1] == "bp3:displayName":
                subject_list.append(row[0])
                predicate_list.append(row[1])
                object_list.append(row[2])
            if row[0] == pathway and row[1] == "bp3:pathwayComponent":
                list_components.append(row[2])
        dico_subpathways_and_components[pathway] = list_components

    # link subpathways if their biochemical reactions share a next step
    for i in range(len(pathways)):
        for j in range(i + 1, len(pathways)):
            pathway1 = pathways[i]
            pathway2 = pathways[j]
            liste_br1 = dico_subpathways_and_components[pathway1]
            liste_br2 = dico_subpathways_and_components[pathway2]
            
            for val1 in liste_br1:
                for val2 in liste_br2:
                    for index, row in input_graph.iterrows():
                        if row[0] == val1 and row[1] == "http://abstraction/NextStepBiochemicalReaction/" and row[2] == val2:
                            if pathway2 != "reactome:Pathway1389" and pathway1 != "reactome:Pathway1389":
                                subject_list.append(str(pathway1))
                                predicate_list.append("http://abstraction/NextStepPathway/")
                                object_list.append(str(pathway2))
                        if row[0] == val2 and row[1] == "http://abstraction/NextStepBiochemicalReaction/" and row[2] == val1:
                            if pathway2 != "reactome:Pathway1389" and pathway1 != "reactome:Pathway1389":
                                subject_list.append(str(pathway2))
                                predicate_list.append("http://abstraction/NextStepPathway/")
                                object_list.append(str(pathway1))
            
    output_graph["subject"] = subject_list
    output_graph["predicate"] = predicate_list
    output_graph["object"] = object_list
    output_graph.to_csv("../Results/Abstraction_collapse_egfr_pathway.tsv", sep="\t", index=False)
    
abstract_collapse_biopax_pathway_steps(abstracted_graph)