In [None]:
version = "v0.1.0"

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

# Import neo4j DB: 4/5

Code to translate v2.7.4_PIS-model.xlsx to neo4j database. 

## Setup

In [None]:
from collections import defaultdict

In [None]:
import pandas as pd
import re
import numpy as np
import os

In [None]:
from py2neo import Graph, Node, Relationship

In [None]:
import helpers

In [None]:
from importlib import reload

Connect to graph via docker-compose link. See http://localhost:7474/browser/

In [None]:
graph = Graph(host="neo4j")

In [None]:
from pathlib import Path

base_path = Path("..")
parsed_path = base_path / "data" / "parsed"

## Components summary

In [None]:
q = '''MATCH (n) RETURN DISTINCT n.name AS name'''
nodes = graph.run(q).data()
all_nodes_in_components = set([(d["name"]) for d in nodes])

In [None]:
all_nodes_in_components

In [None]:
len(all_nodes_in_components)

In [None]:
reload(helpers)

In [None]:
node_dict = {}
for label in helpers.reaction_participant_labels:
    if label == "Metabolite":
        q = '''MATCH (n:%s) WHERE NOT n:MetaboliteFamily RETURN DISTINCT n.name'''%label
    else:
        q = '''MATCH (n:%s) RETURN DISTINCT n.name'''%label
    s = set([d['n.name'] for d in graph.run(q).data()])
    print(label, len(s))
    node_dict[label] = s

    

In [None]:
all_species = ['ath', 'osa', 'stu', 'sly']

### Reactions sheet

In [None]:
df_edges = pd.read_csv(parsed_path / "edges-sheet.tsv", sep="\t", index_col=0)

In [None]:
df_edges.head(10)

In [None]:
df_edges[df_edges['input1_ID']=="DZ"][['input1_ID', 'input1_newID', 'input1_label']]

In [None]:
helpers.empty_strings

In [None]:
def generate_list(subdf, ids, new_name):
    col_suffixes = ['_newID', '_location', '_label', '_form']
    new_col_suffixes = ['_name', '_location', '_label', '_form']
    
    for old_suf, new_suf in zip(col_suffixes, new_col_suffixes):
        new_col = new_name + new_suf
        old_cols = [id_ + old_suf for id_ in ids]
        subdf[new_col] = subdf[old_cols].apply(lambda x: [i for i in x.values], axis=1)
        
        
def rename_target(subdf, id_, new_name):
    col_suffixes = ['_newID', '_location', '_label', '_form']
    new_col_suffixes = ['_name', '_location', '_label', '_form']
    
    for old_suf, new_suf in zip(col_suffixes, new_col_suffixes):
        new_col = new_name + new_suf
        old_col = id_ + old_suf
        subdf[new_col] = subdf[old_col]

In [None]:
def get_x_nodes(df, x):
    rows_with_x = set()
    for i, row in df.iterrows():
        for col_prefix in ['input1', 'input2', 'input3', 'output1']:
            if row[col_prefix + "_newID"] in x:
                rows_with_x.add(i)
    return rows_with_x

In [None]:
def number_input_different(df, catalyst=False):
    ''' If catalyst is True, it is the last "input" col. '''

    if catalyst:
        # two inputs, input2 -> catalyst
        subdf2 = df[df["input3_newID"].isna()].copy()
        generate_list(subdf2, ['input1'], 'substrate')
        rename_target(subdf2, 'input2', 'catalyst')

        # three inputs, input3 -> catalyst
        subdf3 = df[~df["input3_newID"].isna()].copy()
        generate_list(subdf3, ['input1', 'input2'], 'substrate')        
        rename_target(subdf3, 'input3', 'catalyst') 
        
    else:
        # two inputs
        subdf2 = df[df["input3_newID"].isna()].copy()
        generate_list(subdf2, ['input1', 'input2'], 'substrate')

        # three inputs
        subdf3 = df[~df["input3_newID"].isna()].copy()
        generate_list(subdf3, ['input1', 'input2', 'input3'], 'substrate')
    
    # combine
    new_subdf = subdf2.append(subdf3)
    rename_target(new_subdf, 'output1', 'product')
    
    return new_subdf

In [None]:
# transcription genes

substrate_cols = [ f'substrate{x}' for x in ['_name', '_label', '_form', '_location']] 
catalyst_cols = [ f'catalyst{x}' for x in ['_name', '_label', '_form', '_location']] 
product_cols = [ f'product{x}' for x in ['_name', '_label', '_form', '_location']] 

reaction_standard_columns = ['AddedBy', 'Species', 
       'AdditionalInfo',  'external_links', 'trust_level',
       'ModelV', 'ReactionEffect', 'reaction_type', 'Modifications', 'reaction_id']

# Add reactions

In [None]:
def pretty_print_result(t, df, qr, input_type, multiplier=1):
    if input_type in ['nodes', 'relationships']:
        key = input_type + '_created'
    else:
        key = input_type
    
    try:
        stat = qr.stats()[key]
    except:
        stat = 0
    
    print(f"{t:20}\t{df.shape[0]:3}\t{stat:3}", end="")
    if df.shape[0]*multiplier == stat:
        print()
    
    elif df.shape[0]*multiplier < stat:
        print(f"\t**too many {input_type} created**")
    else:
        print(f"\t**not all {input_type} created**")

In [None]:
df_edges.index.duplicated().sum()

## binding / oligomerisation

In [None]:
key = 'binding/oligomerisation'
reaction_type = "BINDING_OGLIMERISATION"
subdf = df_edges.loc[df_edges['reaction_type'] == key].copy()
print(reaction_type, subdf.shape[0])

In [None]:
binding_wo_catalyst = subdf.loc[subdf['Modifications'] != 'with catalyst']
binding_w_catalyst = subdf.loc[subdf['Modifications'] == 'with catalyst']

In [None]:
subdf_wo_catalyst = number_input_different(binding_wo_catalyst)
subdf_w_catalyst = number_input_different(binding_w_catalyst, catalyst=True)

In [None]:
# make reaction nodes
f = f"{reaction_type}-reaction.tsv"
subdf[reaction_standard_columns].to_csv(f"../data/import/{f}", sep="\t", index=None)
query = helpers.reaction_node_query(f, name="line.reaction_id")

qr = graph.run(query)
pretty_print_result('Reaction', subdf, qr, 'nodes')

### Without catalyst

In [None]:
explode_cols = substrate_cols
exploded_new_subdf = helpers.unnesting(subdf_wo_catalyst, explode_cols)#.drop_duplicates()

In [None]:
reload(helpers)

In [None]:
# binding substrate edges
want_cols = reaction_standard_columns + substrate_cols

for t, this_subdf in exploded_new_subdf.groupby("substrate_label"):
    f = f"{reaction_type}-{t}-substrate_edges.tsv"
    this_subdf[want_cols].to_csv(f"../data/import/{f}", index=None, sep="\t")
    if t in helpers.plant_node_labels:
        labels = ['FunctionalCluster', t]
    else:
        labels = t
    query = helpers.make_create_reaction_edge_query(f, 'SUBSTRATE', 
                                                    "substrate", "dne",
                                                    source_label=labels, 
                                                    target_label="Reaction", 
                                                    target_name="line.reaction_id"
                                                   )

    qr = graph.run(query)

    pretty_print_result(t, this_subdf, qr, 'relationships')


In [None]:
subdf_wo_catalyst.shape[0]

In [None]:
# binding product edges
want_cols = reaction_standard_columns + product_cols

for t, this_subdf in subdf_wo_catalyst.groupby("product_label"):
    f = f"{reaction_type}-{t}-product_edges.tsv"
    this_subdf[want_cols].to_csv(f'../data/import/{f}', sep="\t", index=None)
    if t in helpers.plant_node_labels:
        labels = ['FunctionalCluster', t]
    else:
        labels = t
    query = helpers.make_create_reaction_edge_query(f, "PRODUCT", 
                                                    "dne", "product",
                                                    source_name="line.reaction_id", 
                                                    source_label="Reaction",
                                                    target_label=labels
                                                   )
    qr = graph.run(query)
    pretty_print_result('-', this_subdf, qr, 'relationships')

### With catalyst

In [None]:
explode_cols = substrate_cols
exploded_new_subdf = helpers.unnesting(subdf_w_catalyst, explode_cols).drop_duplicates()

In [None]:
# binding substrate edges
want_cols = reaction_standard_columns + substrate_cols

for t, this_subdf in exploded_new_subdf.groupby("substrate_label"):
    f = f"{reaction_type}-wicat-{t}-substrate_edges.tsv"
    this_subdf[want_cols].to_csv(f"../data/import/{f}", index=None, sep="\t")
    if t in helpers.plant_node_labels:
        labels = ['FunctionalCluster', t]
    else:
        labels = t
    query = helpers.make_create_reaction_edge_query(f, 'SUBSTRATE', 
                                                    "substrate", "dne",
                                                    source_label=labels, 
                                                    target_label="Reaction", 
                                                    target_name="line.reaction_id"
                                                   )

    qr = graph.run(query)

    pretty_print_result(t, this_subdf, qr, 'relationships')


In [None]:
# binding catalyst edges

want_cols = reaction_standard_columns + catalyst_cols

for t, this_subdf in exploded_new_subdf.groupby("substrate_label"):
    f = f"{reaction_type}-wicat-{t}-catalyst_label_edges.tsv"
    this_subdf[want_cols].to_csv(f"../data/import/{f}", index=None, sep="\t")
    if t in helpers.plant_node_labels:
        labels = ['FunctionalCluster', t]
    else:
        labels = t
    query = helpers.make_create_reaction_edge_query(f, 'ACTIVATES', 
                                                    "catalyst", "dne",
                                                    source_label=labels, 
                                                    target_label="Reaction", 
                                                    target_name="line.reaction_id"
                                                   )

    qr = graph.run(query)

    pretty_print_result(t, this_subdf, qr, 'relationships')
    

In [None]:
f = f"{reaction_type}-wicat-catalyst-product_edges.tsv"
want_cols = reaction_standard_columns + product_cols
subdf_w_catalyst[want_cols].to_csv(f'../data/import/{f}', sep="\t", index=None)

In [None]:
# binding product edges
want_cols = reaction_standard_columns + product_cols

for t, this_subdf in subdf_w_catalyst.groupby("product_label"):
    f = f"{reaction_type}-wicat-{t}-catalyst-product_edges.tsv"
    this_subdf[want_cols].to_csv(f'../data/import/{f}', sep="\t", index=None)
    if t in helpers.plant_node_labels:
        labels = ['FunctionalCluster', t]
    else:
        labels = t
    query = helpers.make_create_reaction_edge_query(f, "PRODUCT", 
                                                    "dne", "product",
                                                    source_name="line.reaction_id", 
                                                    source_label="Reaction",
                                                    target_label=labels
                                                   )
    qr = graph.run(query)
    pretty_print_result(t, this_subdf, qr, 'relationships')

## catalysis / auto-catalysis

In [None]:
key = 'catalysis'
reaction_type = 'CATALYSIS'
subdf = df_edges[df_edges['reaction_type'] == key]
print(reaction_type, subdf.shape[0])

In [None]:
subdf.duplicated().sum()

In [None]:
# make reaction nodes
f = f"{reaction_type}-reaction.tsv"
subdf[reaction_standard_columns].to_csv(f"../data/import/{f}", sep="\t", index=None)
query = helpers.reaction_node_query(f, name="line.reaction_id")

qr = graph.run(query)
pretty_print_result('Reaction', subdf, qr, 'nodes')

In [None]:
catalysis_wo_catalyst = subdf[subdf['input2_ID'].isna() & subdf['input3_ID'].isna()]
catalysis_w_catalyst = subdf[~(subdf['input2_ID'].isna() & subdf['input3_ID'].isna())]

### With catalyst

In [None]:
subdf_w_catalyst = number_input_different(catalysis_w_catalyst, catalyst=True)
new_subdf = number_input_different(subdf_w_catalyst, catalyst=True)

In [None]:
# substrate to reaction
exploded_new_subdf = helpers.unnesting(new_subdf, substrate_cols)
want_cols = reaction_standard_columns + substrate_cols
for t, this_subdf in exploded_new_subdf.groupby("substrate_label"):
    f = f"{reaction_type}-wicat-{t}-substrate_edges.tsv"
    this_subdf[want_cols].to_csv(f"../data/import/{f}", index=None, sep="\t")
    if t in helpers.plant_node_labels:
        labels = ['FunctionalCluster', t]
    else:
        labels = t    
    query = helpers.make_create_reaction_edge_query(f, 'SUBSTRATE', 
                                                    "substrate", "dne",
                                                    source_label=labels, target_label="Reaction", 
                                                    target_name="line.reaction_id"
                                                   )
    qr = graph.run(query)
    pretty_print_result(t, this_subdf, qr, 'relationships')

In [None]:
# catalyst to reaction
want_cols = reaction_standard_columns + catalyst_cols

for t, this_subdf in subdf_w_catalyst.groupby("catalyst_label"):
    f = f"{reaction_type}-wicat-{t}-catalyst_edges.tsv"
    this_subdf[want_cols].to_csv(f"../data/import/{f}", index=None, sep="\t")
    if t in helpers.plant_node_labels:
        labels = ['FunctionalCluster', t]
    else:
        labels = t        
    query = helpers.make_create_reaction_edge_query(f, 'ACTIVATES', 
                                                    "catalyst", "dne",
                                                    source_label=labels, target_label="Reaction", 
                                                    target_name="line.reaction_id"
                                                   )
    qr = graph.run(query)
    pretty_print_result(t, this_subdf, qr, 'relationships')

In [None]:
# product to reaction
want_cols = reaction_standard_columns + product_cols
for t, this_subdf in subdf_w_catalyst.groupby("product_label"):
    f = f"{reaction_type}-wicat-{t}-product_edges.tsv"    
    this_subdf[want_cols].to_csv(f"../data/import/{f}", index=None, sep="\t")
    if t in helpers.plant_node_labels:
        labels = ['FunctionalCluster', t]
    else:
        labels = t       
    query = helpers.make_create_reaction_edge_query(f, 'PRODUCT', 
                                                    "dne", "product",
                                                    target_label=labels, source_label="Reaction", 
                                                    source_name="line.reaction_id"
                                                   )    
    qr = graph.run(query)

    pretty_print_result(t, this_subdf, qr, 'relationships')

### Without catalyst

In [None]:
rename_target(catalysis_wo_catalyst, 'output1', 'product')
rename_target(catalysis_wo_catalyst, 'input1',  'substrate')

In [None]:
catalysis_wo_catalyst['reaction_id']

In [None]:
# binding substrate edges
want_cols = reaction_standard_columns + substrate_cols

for t, this_subdf in catalysis_wo_catalyst.groupby("substrate_label"):
    f = f"{reaction_type}-{t}-substrate_edges.tsv"
    this_subdf[want_cols].to_csv(f"../data/import/{f}", index=None, sep="\t")
    if t in helpers.plant_node_labels:
        labels = ['FunctionalCluster', t]
    else:
        labels = t          
    query = helpers.make_create_reaction_edge_query(f, 'SUBSTRATE', 
                                                    "substrate", "dne",
                                                    source_label=labels, 
                                                    target_label="Reaction", 
                                                    target_name="line.reaction_id"
                                                   )

    qr = graph.run(query)

    pretty_print_result(t, this_subdf, qr, 'relationships')


In [None]:
# binding product edges
want_cols = reaction_standard_columns + product_cols

for t, this_subdf in catalysis_wo_catalyst.groupby("product_label"):
    f = f"{reaction_type}-{t}-product_edges.tsv"
    this_subdf[want_cols].to_csv(f'../data/import/{f}', sep="\t", index=None)
    if t in helpers.plant_node_labels:
        labels = ['FunctionalCluster', t]
    else:
        labels = t
    query = helpers.make_create_reaction_edge_query(f, "PRODUCT", 
                                                    "dne", "product",
                                                    source_name="line.reaction_id", 
                                                    source_label="Reaction",
                                                    target_label=labels
                                                   )
    qr = graph.run(query)
    pretty_print_result('-', this_subdf, qr, 'relationships')

In [None]:
q = '''MATCH ()-[r:SUBSTRATE {reaction_type:"catalysis"}]->() RETURN r'''
c = graph.run(q).data()
len(c) # should be ??

In [None]:
q = '''MATCH ()-[r:PRODUCT {reaction_type:"catalysis"}]->() RETURN r'''
c = graph.run(q).data()
len(c) # should be ??

## dissociation

In [None]:
key = 'dissociation'
reaction_type = 'DISSOCIATION'
subdf = df_edges[df_edges['reaction_type'] == key].copy()
print(reaction_type, subdf.shape[0])

In [None]:
subdf

In [None]:
rename_target(subdf, 'output1', 'product')
rename_target(subdf, 'input1',  'substrate')
rename_target(subdf, 'input2',  'catalyst')

In [None]:
# make reaction nodes
f = f"{reaction_type}-reaction.tsv"
subdf[reaction_standard_columns].to_csv(f"../data/import/{f}", sep="\t", index=None)
query = helpers.reaction_node_query(f, name="line.reaction_id")

qr = graph.run(query)
pretty_print_result('Reaction', subdf, qr, 'nodes')

In [None]:
# substrate to reaction edge
f =  f"{reaction_type}-{t}-substrate_edges.tsv" 
want_cols = reaction_standard_columns + substrate_cols
subdf.to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'SUBSTRATE',
                                                "substrate", "dne", 
                                                target_label="Reaction", 
                                                target_name="line.reaction_id"
                                                
                              )
qr = graph.run(query)
pretty_print_result("-", subdf, qr, 'relationships')

In [None]:
# catalyst to reaction edge

f =  f"{reaction_type}-{t}-catalyst_edges.tsv" 
want_cols = reaction_standard_columns + catalyst_cols
subdf.to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'ACTIVATES',
                                                "catalyst" , "dne", 
                                                target_label="Reaction", 
                                                target_name="line.reaction_id"
                              )
qr = graph.run(query)
pretty_print_result("-", subdf, qr, 'relationships')

In [None]:
reload(helpers)

In [None]:
# product to reaction edge

f =  f"{reaction_type}-{t}-product_edges.tsv" 
want_cols = reaction_standard_columns + product_cols
subdf.to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'PRODUCT',
                                                "dne" , "product", 
                                                source_name="line.reaction_id", 
                                                source_label="Reaction"
                                               
                              )
qr = graph.run(query)
pretty_print_result("-", subdf, qr, 'relationships')

## degradation

In [None]:
key = 'degradation/secretion'
reaction_type = 'DEGRADATION_SECRETION'
subdf = df_edges[df_edges['reaction_type'] == key].copy()
print(reaction_type, subdf.shape[0])

In [None]:
rename_target(subdf, 'input1',  'catalyst')
rename_target(subdf, 'input2',  'substrate')

In [None]:
# make reaction nodes
f = f"{reaction_type}-reaction.tsv"
subdf[reaction_standard_columns].to_csv(f"../data/import/{f}", sep="\t", index=None)
query = helpers.reaction_node_query(f, name="line.reaction_id")

qr = graph.run(query)
pretty_print_result('Reaction', subdf, qr, 'nodes')

In [None]:
# substrate to reaction edge

f =  f"{reaction_type}-substrate_edges.tsv" 
want_cols = reaction_standard_columns + substrate_cols
subdf[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'SUBSTRATE',
                                                "substrate", "dne", 
                                                target_label="Reaction", 
                                                target_name="line.reaction_id"
                              )
qr = graph.run(query)
pretty_print_result('-', subdf, qr, 'relationships')

In [None]:
# catalyst to reaction edge

f =  f"{reaction_type}-catalyst_edges.tsv" 
want_cols = reaction_standard_columns + catalyst_cols
subdf[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'ACTIVATES',
                                                "catalyst", "dne", 
                                                target_label="Reaction", 
                                                target_name="line.reaction_id"
                              )
qr = graph.run(query)
pretty_print_result('-', subdf, qr, 'relationships')

## translocation

In [None]:
key = 'translocation'
reaction_type = 'TRANSLOCATION'
subdf = df_edges[df_edges['reaction_type'] == key].copy()
print(reaction_type, subdf.shape[0])

In [None]:
rename_target(subdf, 'output1', 'product')
rename_target(subdf, 'input1',  'substrate')
rename_target(subdf, 'input2',  'catalyst')

In [None]:
# make reaction nodes
f = f"{reaction_type}-reaction.tsv"
subdf[reaction_standard_columns].to_csv(f"../data/import/{f}", sep="\t", index=None)
query = helpers.reaction_node_query(f, name="line.reaction_id")

qr = graph.run(query)
pretty_print_result('Reaction', subdf, qr, 'nodes')

In [None]:
# substrate to reaction edge

f =  f"{reaction_type}-substrate_edges.tsv" 
want_cols = reaction_standard_columns + substrate_cols
subdf[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'TRANSLOCATE_FROM',
                                                "substrate", "dne", 
                                                target_label="Reaction", 
                                                target_name="line.reaction_id"
                              )
qr = graph.run(query)
pretty_print_result('-', subdf, qr, 'relationships')

In [None]:
# catalyst to reaction edge

f =  f"{reaction_type}-catalyst_edges.tsv" 
want_cols = reaction_standard_columns + catalyst_cols
subdf[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'ACTIVATES',
                                                "catalyst", "dne", 
                                                target_label="Reaction", 
                                                target_name="line.reaction_id"
                              )
qr = graph.run(query)
pretty_print_result('-', subdf, qr, 'relationships')

In [None]:
# product edge
f =  f"{reaction_type}--product_edges.tsv" 
want_cols = reaction_standard_columns + product_cols
subdf[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'TRANSLOCATE_TO',
                                                "dne" , "product", 
                                                source_name="line.reaction_id", 
                                                source_label="Reaction"
                              )
qr = graph.run(query)
pretty_print_result('-', subdf, qr, 'relationships')

## protein activation

In [None]:
key = 'protein activation'
reaction_type = 'PROTEIN_ACTIVATION'
subdf = df_edges[df_edges['reaction_type'] == key].copy()
print(reaction_type, subdf.shape[0])

In [None]:
rename_target(subdf, 'output1', 'product')
rename_target(subdf, 'input2',  'catalyst')
rename_target(subdf, 'input1',  'substrate')

In [None]:
# make reaction nodes
f = f"{reaction_type}-reaction.tsv"
subdf[reaction_standard_columns].to_csv(f"../data/import/{f}", sep="\t", index=None)
query = helpers.reaction_node_query(f, name="line.reaction_id")

qr = graph.run(query)
pretty_print_result('Reaction', subdf, qr, 'nodes')

In [None]:
# substrate to reaction edge

f =  f"{reaction_type}-substrate_edges.tsv" 
want_cols = reaction_standard_columns + substrate_cols
subdf[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'SUBSTRATE',
                                                "substrate", "dne", 
                                                target_label="Reaction", 
                                                target_name="line.reaction_id"
                              )
qr = graph.run(query)
pretty_print_result('-', subdf, qr, 'relationships')

In [None]:
# catalyst to reaction edge

f =  f"{reaction_type}-catalyst_edges.tsv" 
want_cols = reaction_standard_columns + catalyst_cols
subdf[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'ACTIVATES',
                                                "catalyst", "dne", 
                                                target_label="Reaction", 
                                                target_name="line.reaction_id"
                              )
qr = graph.run(query)
pretty_print_result('-', subdf, qr, 'relationships')

In [None]:
# product to reaction edge
f =  f"{reaction_type}-product_edges.tsv" 
want_cols = reaction_standard_columns + product_cols
subdf[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'PRODUCT', "dne" , "product", 
                                                source_name="line.reaction_id", 
                                                source_label="Reaction"
                              )
qr = graph.run(query)
pretty_print_result('-', subdf, qr, 'relationships')

## protein deactivation

In [None]:
key = 'protein deactivation'
reaction_type = 'PROTEIN_DEACTIVATION'
subdf = df_edges[df_edges['reaction_type'] == key].copy()
print(reaction_type, subdf.shape[0])

In [None]:
rename_target(subdf, 'output1', 'product')
rename_target(subdf, 'input2',  'substrate')
rename_target(subdf, 'input1',  'catalyst')

In [None]:
# make reaction nodes
f = f"{reaction_type}-reaction.tsv"
subdf[reaction_standard_columns].to_csv(f"../data/import/{f}", sep="\t", index=None)
query = helpers.reaction_node_query(f, name="line.reaction_id")

qr = graph.run(query)
pretty_print_result('Reaction', subdf, qr, 'nodes')

In [None]:
# substrate to reaction edge

f =  f"{reaction_type}-substrate_edges.tsv" 
want_cols = reaction_standard_columns + substrate_cols
subdf[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'SUBSTRATE',
                                                "substrate", "dne", 
                                                target_label="Reaction", 
                                                target_name="line.reaction_id"
                              )
qr = graph.run(query)
pretty_print_result('-', subdf, qr, 'relationships')

In [None]:
# catalyst to reaction edge

f =  f"{reaction_type}-catalyst_edges.tsv" 
want_cols = reaction_standard_columns + catalyst_cols
subdf[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'ACTIVATES',
                                                "catalyst", "dne", 
                                                target_label="Reaction", 
                                                target_name="line.reaction_id"
                              )
qr = graph.run(query)
pretty_print_result('-', subdf, qr, 'relationships')

In [None]:
# product to reaction edge
f =  f"{reaction_type}-product_edges.tsv" 
want_cols = reaction_standard_columns + product_cols
subdf[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'PRODUCT', 
                                                "dne" , "product", 
                                                source_name="line.reaction_id", 
                                                source_label="Reaction"
                              )
qr = graph.run(query)
pretty_print_result('-', subdf, qr, 'relationships')

## transcriptional / translational regulation

In [None]:
reaction_type = 'TRANSCRIPTIONAL_TRANSLATIONAL'
keys = ['transcriptional/translational activation', 'transcriptional/translational repression']
subdf = df_edges[df_edges['reaction_type'].isin(keys)]
print(reaction_type, subdf.shape[0])

In [None]:
def number_input_different_reverse(df):
    ''' If catalyst is True, they are the first "input" cols. 
    last one is left as substrate. '''

    # two inputs, input1 -> catalyst
    subdf2 = df[df["input3_newID"].isna()].copy()
    generate_list(subdf2, ['input1'], 'catalyst')
    rename_target(subdf2, 'input2', 'substrate')

    # three inputs, input1, input2 -> catalyst
    subdf3 = df[~df["input3_newID"].isna()].copy()
    generate_list(subdf3, ['input1', 'input2'], 'catalyst')        
    rename_target(subdf3, 'input3', 'substrate')
     
    # combine
    new_subdf = subdf2.append(subdf3)
    rename_target(new_subdf, 'output1', 'product')
    
    return new_subdf

In [None]:
new_subdf = number_input_different_reverse(subdf)

In [None]:
new_subdf[new_subdf['substrate_name'] != new_subdf['product_name']][["Status", "ConnID", 'substrate_name', 'product_name' ]]

In [None]:
new_subdf[(  (new_subdf['substrate_form'] == 'gene') & (new_subdf['substrate_location'] != 'nucleus')  )][["Status", "ConnID"] + substrate_cols]

In [None]:
#manualfix
new_subdf.loc[(  (new_subdf['substrate_form'] == 'gene') & (new_subdf['substrate_location'] != 'nucleus')  ), 'substrate_location'] = 'nucleus'

In [None]:
new_subdf[(  (new_subdf['product_form'].isin(['rna', 'ncRNA']) ))]#& (new_subdf['substrate_location'] != 'nucleus')  )][["Status", "ConnID"] + substrate_cols_wo_homologues]

In [None]:
new_subdf[(  (new_subdf['substrate_form'].isin(['rna', 'ncRNA'])) & (new_subdf['substrate_location'] != 'cytoplams')  )][["Status", "ConnID"] + substrate_cols]

In [None]:
new_subdf[(  (new_subdf['product_form'] == 'protein') & (new_subdf['product_location'] != 'cytoplasm')  )][["Status", "ConnID"] + product_cols]

In [None]:
# manualfix
new_subdf.loc[(  (new_subdf['product_form'] == 'protein') & (new_subdf['product_location'] != 'cytoplasm')  ), 'product_location'] = 'cytoplasm'

In [None]:
new_subdf[~new_subdf['substrate_label'].isin(["PlantCoding", "PlantNonCoding"])][["Status", "ConnID"] + substrate_cols] #+ catalyst_cols_wo_homologues + product_cols_wo_homologues]

In [None]:
new_subdf[~new_subdf['product_label'].isin(["PlantCoding", "PlantNonCoding"])][["Status", "ConnID"] + product_cols]

## Reaction nodes

In [None]:
new_subdf[new_subdf['reaction_id'].isin(['rx00148', 'rx00185'])][substrate_cols + product_cols + catalyst_cols]

In [None]:
# make reaction nodes
f = f"{reaction_type}-reaction.tsv"
new_subdf[reaction_standard_columns].to_csv(f"../data/import/{f}", sep="\t", index=None)
query = helpers.reaction_node_query(f, name="line.reaction_id")

qr = graph.run(query)
pretty_print_result('Reaction', new_subdf, qr, 'nodes')

## induction

In [None]:
edge_label = "ACTIVATES"
reaction_type = 'transcriptional/translational activation'
reaction_type_nice = 'transcriptional_translational_induction'
act_subdf = new_subdf[new_subdf['reaction_type']==reaction_type]

In [None]:
# substrate to reaction edge
f =  f"{reaction_type_nice}-substrate_edges.tsv" 
want_cols = reaction_standard_columns + substrate_cols
act_subdf[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'SUBSTRATE',
                                                "substrate", "dne", 
                                                target_label="Reaction", 
                                                target_name="line.reaction_id"
                              )
qr = graph.run(query)
pretty_print_result('-', act_subdf, qr, 'relationships')

In [None]:
# catalyst to reaction edge
exploded_new_subdf = helpers.unnesting(act_subdf, catalyst_cols)

f =  f"{reaction_type_nice}-catalyst_edges.tsv" 
want_cols = reaction_standard_columns + catalyst_cols
exploded_new_subdf[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'ACTIVATES',
                                                "catalyst", "dne", 
                                                target_label="Reaction", 
                                                target_name="line.reaction_id"
                              )
qr = graph.run(query)
pretty_print_result('-', exploded_new_subdf, qr, 'relationships')

In [None]:
# product to reaction edge
f =  f"{reaction_type_nice}-product_edges.tsv" 
want_cols = reaction_standard_columns + product_cols
act_subdf[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'PRODUCT', "dne" , "product", 
                                                source_name="line.reaction_id", 
                                                source_label="Reaction"
                              )
qr = graph.run(query)
pretty_print_result('-', act_subdf, qr, 'relationships')

## repression

In [None]:
reaction_type = 'transcriptional/translational repression'
reaction_type_nice = 'transcriptional_translational_repression'
inh_subdf = new_subdf[new_subdf['reaction_type']==reaction_type]

In [None]:
# substrate to reaction edge
f =  f"{reaction_type_nice}-substrate_edges.tsv" 
want_cols = reaction_standard_columns + substrate_cols
inh_subdf[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'SUBSTRATE',
                                                "substrate", "dne", 
                                                target_label="Reaction", 
                                                target_name="line.reaction_id"
                              )
qr = graph.run(query)
pretty_print_result('-', inh_subdf, qr, 'relationships')

In [None]:
# catalyst to reaction edge
exploded_new_subdf = helpers.unnesting(inh_subdf, catalyst_cols)


f =  f"{reaction_type_nice}-catalyst_edges.tsv" 
want_cols = reaction_standard_columns + catalyst_cols
exploded_new_subdf[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'INHIBITS',
                                                "catalyst", "dne", 
                                                target_label="Reaction", 
                                                target_name="line.reaction_id"
                              )
qr = graph.run(query)
pretty_print_result('-', exploded_new_subdf, qr, 'relationships')

In [None]:
# product to reaction edge
f =  f"{reaction_type_nice}-product_edges.tsv" 
want_cols = reaction_standard_columns + product_cols
inh_subdf[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'PRODUCT', "dne" , "product", 
                                                source_name="line.reaction_id", 
                                                source_label="Reaction"
                              )
qr = graph.run(query)
pretty_print_result('-', inh_subdf, qr, 'relationships')

## cleavage/auto-cleavage

In [None]:
#Image(filename='./reaction_types/activation/cleavage_autocleavage.png')

In [None]:
key = 'cleavage/auto-cleavage'
reaction_type = 'CLEAVAGE_AUTOCLEAVAGE'
subdf = df_edges[df_edges['reaction_type'] == key].copy()
print(reaction_type, subdf.shape[0])

In [None]:
# make reaction nodes
f = f"{reaction_type}-wicat-reaction.tsv"
subdf[reaction_standard_columns].to_csv(f"../data/import/{f}", sep="\t", index=None)
query = helpers.reaction_node_query(f, name="line.reaction_id")

qr = graph.run(query)
pretty_print_result('Reaction', subdf, qr, 'nodes')

In [None]:
cleavage_wo_catalyst = subdf[subdf['input2_ID'].isna() & subdf['input3_ID'].isna()]
cleavage_w_catalyst = subdf[~(subdf['input2_ID'].isna() & subdf['input3_ID'].isna())]

## with catalyst

In [None]:
rename_target(cleavage_w_catalyst, 'output1', 'product')
rename_target(cleavage_w_catalyst, 'input1',  'substrate')
rename_target(cleavage_w_catalyst, 'input2',  'catalyst')

In [None]:
# substrate to reaction edge
f =  f"{reaction_type}-wicat-substrate_edges.tsv" 
want_cols = reaction_standard_columns + substrate_cols
cleavage_w_catalyst[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'SUBSTRATE',
                                                "substrate", "dne", 
                                                target_label="Reaction", 
                                                target_name="line.reaction_id"
                              )
qr = graph.run(query)
pretty_print_result('-', cleavage_w_catalyst, qr, 'relationships')

In [None]:
# catalyst to reaction edge

f =  f"{reaction_type}-wicat-catalyst_edges.tsv" 
want_cols = reaction_standard_columns + catalyst_cols
cleavage_w_catalyst[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'ACTIVATES',
                                                "catalyst", "dne", 
                                                target_label="Reaction", 
                                                target_name="line.reaction_id"
                              )
qr = graph.run(query)
pretty_print_result('-', cleavage_w_catalyst, qr, 'relationships')

In [None]:
# product to reaction edge
f =  f"{reaction_type}-wicat-product_edges.tsv" 
want_cols = reaction_standard_columns + product_cols
cleavage_w_catalyst[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'PRODUCT', "dne" , "product", 
                                                source_name="line.reaction_id", 
                                                source_label="Reaction"
                              )
qr = graph.run(query)
pretty_print_result('-', cleavage_w_catalyst, qr, 'relationships')

## w/o catalyst

In [None]:
rename_target(cleavage_wo_catalyst, 'output1', 'product')
rename_target(cleavage_wo_catalyst, 'input1',  'substrate')

In [None]:
# substrate to reaction edge
f =  f"{reaction_type}-substrate_edges.tsv" 
want_cols = reaction_standard_columns + substrate_cols
cleavage_wo_catalyst[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'SUBSTRATE',
                                                "substrate", "dne", 
                                                target_label="Reaction", 
                                                target_name="line.reaction_id"
                              )
qr = graph.run(query)
pretty_print_result('-', cleavage_wo_catalyst, qr, 'relationships')

In [None]:
# product to reaction edge
f =  f"{reaction_type}-product_edges.tsv" 
want_cols = reaction_standard_columns + product_cols
cleavage_wo_catalyst[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'PRODUCT', "dne" , "product", 
                                                source_name="line.reaction_id", 
                                                source_label="Reaction"
                              )
qr = graph.run(query)
pretty_print_result('-', cleavage_wo_catalyst, qr, 'relationships')

## undefined

In [None]:
#Image(filename='./reaction_types/...png')

In [None]:
key = 'undefined'
reaction_type = 'UNDEFINED'
subdf = df_edges[df_edges['reaction_type'] == key].copy()
print(reaction_type, subdf.shape[0])

In [None]:
rename_target(subdf, 'output1', 'product')
rename_target(subdf, 'input2',  'substrate')
rename_target(subdf, 'input1',  'catalyst')

In [None]:
subdf[substrate_cols + product_cols + catalyst_cols]

In [None]:
subdf[reaction_standard_columns]

In [None]:
activation_subdf = subdf[subdf['ReactionEffect'] == 'activation' ]
inhibition_subdf = subdf[subdf['ReactionEffect'] == 'inhibition' ]

In [None]:
# make reaction nodes
f = f"{reaction_type}-reaction.tsv"
subdf[reaction_standard_columns].to_csv(f"../data/import/{f}", sep="\t", index=None)
query = helpers.reaction_node_query(f, name="line.reaction_id")

qr = graph.run(query)
pretty_print_result('Reaction', subdf, qr, 'nodes')

In [None]:
# substrate to reaction edge
f =  f"{reaction_type}-wicat-substrate_edges.tsv" 
want_cols = reaction_standard_columns + substrate_cols
subdf[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'SUBSTRATE',
                                                "substrate", "dne", 
                                                target_label="Reaction", 
                                                target_name="line.reaction_id"
                              )
qr = graph.run(query)
pretty_print_result('-', subdf, qr, 'relationships')

In [None]:
# product to reaction edge
f =  f"{reaction_type}-wicat-product_edges.tsv" 
want_cols = reaction_standard_columns + product_cols
subdf[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'PRODUCT', "dne" , "product", 
                                                source_name="line.reaction_id", 
                                                source_label="Reaction"
                              )
qr = graph.run(query)
pretty_print_result('-', subdf, qr, 'relationships')

## activation

In [None]:
activation_subdf[substrate_cols + product_cols + catalyst_cols]

In [None]:
# catalyst to reaction edge

f =  f"{reaction_type}-catalyst_edges.tsv" 
want_cols = reaction_standard_columns + catalyst_cols
activation_subdf[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'ACTIVATES',
                                                "catalyst", "dne", 
                                                target_label="Reaction", 
                                                target_name="line.reaction_id"
                              )
qr = graph.run(query)
pretty_print_result('-', activation_subdf, qr, 'relationships')

## inhibition

In [None]:
inhibition_subdf[substrate_cols + product_cols + catalyst_cols]

In [None]:
# catalyst to reaction edge

f =  f"{reaction_type}-catalyst_edges.tsv" 
want_cols = reaction_standard_columns + catalyst_cols
inhibition_subdf[want_cols].to_csv(f"../data/import/{f}", sep="\t", index=None)

query = helpers.make_create_reaction_edge_query(f, 'INHIBITS',
                                                "catalyst", "dne", 
                                                target_label="Reaction", 
                                                target_name="line.reaction_id"
                              )
qr = graph.run(query)
pretty_print_result('-', inhibition_subdf, qr, 'relationships')

# END 