In [5]:

from dnngior import NN_Predictor
import tensorflow as tf
# import numpy as np
# import pandas as pd
# 
# 
# import cobra

In [11]:
import os, sys
from pathlib import Path
import json

path = Path.cwd()
sys.path.append(path)
file_path = os.path.join(path.parent, 'dnngior', 'files', 'NN')
output_path = os.path.join(path.parent, 'dnngior', 'files', 'custom scores')

#Tensorflow complains because the NN is not optimized perfectly
tf.get_logger().setLevel('ERROR')

## Introduction

The purpose of this notebook is to show some examples of using the neural network (NN) to make predictions based on reaction sets. We can then use those predictions as weights to gapfill models

## Making predictions using the NN

First we need to load in a NN, the NN_Predictor module has a function for this, you can provide a path to your favourite NN:

In [8]:
NN_MS = NN_Predictor.load_NN(path=file_path+'/NN_MS.h5')

Loading network at user provided path


2023-04-04 17:24:17.291943: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-04-04 17:24:17.292087: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-04-04 17:24:17.292116: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2023-04-04 17:24:17.292137: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2023-04-04 17:24:17.292156: W tensorflow/c

or if left empty it will currently default to the standard ModelSEED one

In [9]:
NN_MS = NN_Predictor.load_NN()

Loading Default NN (ModelSEED)


2023-04-04 17:24:19.280071: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_1_1/bias/Assign' id:563 op device:{requested: '', assigned: ''} def:{{{node dense_1_1/bias/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](dense_1_1/bias, dense_1_1/bias/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-04-04 17:24:19.447134: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_1_1/kernel/v/Assign' id:916 op device:{requested: '', assigned: ''} def:{{{node dense_1_1/kernel/v/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](dense_1_1/kernel/v, dense_1_1/kernel/v/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigge

Load in the data, the NN requires a binary list indicating presences of reactions, it is important that this is in the right order. However, you can give it a dictionary and it will put it in the right order for you.

load in the dictionary:

In [12]:
with open(file_path+'/binary_example.json', 'r') as j:
    binary_example = json.loads(j.read())

In [None]:
binary_example

and make the prediction

In [13]:
prediction = NN_Predictor.predict(input=binary_example)

Using user-provided ids
#reactions not in NN_rxn:  0
Loading Default NN (ModelSEED)


2023-04-04 17:24:39.232212: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_1_2/kernel/Assign' id:1062 op device:{requested: '', assigned: ''} def:{{{node dense_1_2/kernel/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](dense_1_2/kernel, dense_1_2/kernel/Initializer/stateless_random_uniform)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-04-04 17:24:39.409307: W tensorflow/c/c_api.cc:291] Operation '{name:'learning_rate_2/Assign' id:1377 op device:{requested: '', assigned: ''} def:{{{node learning_rate_2/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](learning_rate_2, learning_rate_2/Initializer/initial_value)}}' was changed by setting attribute after it was run by a session. This mutation will have n

This will return a dictionary with for every reaction a prediction of how likely it is that it should be in the reaction set according to the NN based on the given reaction

In [14]:
prediction

{'rxn08713_c0': 3.210943e-05,
 'rxn03137_c0': 0.9981457,
 'rxn09195_c0': 1.880479e-05,
 'rxn01279_c0': 0.01665108,
 'rxn00720_c0': 0.002832767,
 'rxn12639_c0': 0.99999875,
 'rxn20214_c0': 6.767396e-05,
 'rxn08076_c0': 0.99979484,
 'rxn01213_c0': 0.0003554159,
 'rxn01537_c0': 0.27153924,
 'rxn03918_c0': 0.081894025,
 'rxn05252_c0': 0.99997467,
 'rxn10317_c0': 9.243675e-08,
 'rxn23042_c0': 0.9712741,
 'rxn08542_c0': 2.7705e-07,
 'rxn03886_c0': 0.11481701,
 'rxn04751_c0': 0.0037542847,
 'rxn09313_c0': 0.06191987,
 'rxn02296_c0': 4.5195243e-06,
 'rxn03241_c0': 0.99999905,
 'rxn05195_c0': 0.9999999,
 'rxn18575_c0': 5.916748e-07,
 'rxn03540_c0': 0.995783,
 'rxn05335_c0': 0.9999868,
 'rxn02227_c0': 1.8563658e-05,
 'rxn00745_c0': 0.054352637,
 'rxn00510_c0': 0.9544768,
 'rxn05598_c0': 0.0009475703,
 'rxn05289_c0': 0.99943817,
 'rxn00712_c0': 0.00038387507,
 'rxn08528_c0': 0.99947006,
 'rxn00407_c0': 0.9942456,
 'rxn08335_c0': 0.9869699,
 'rxn05361_c0': 0.99999136,
 'rxn00804_c0': 0.81698596,
 

We can for instance save this as an dictionary (.json) if we want to use some other time

In [None]:
p_file = open(output_path+"/prediction_example.json", 'w')
json.dump(str(prediction), p_file)

Of course you might not already have a dictionary with reaction presences. 
To convert a set of reactions ids to a binary array, based on a given set of ids, you may run:

In [15]:
list_of_reactions = [i for i in binary_example if binary_example[i] == 1]
default_ids  = NN_Predictor.load_ids()
binary_input = NN_Predictor.convert_reaction_list(list_of_reactions, NN_reaction_ids = default_ids)

Using user-provided ids
#reactions not in NN_rxn:  0


In [21]:
list_of_reactions[:5]

['rxn03137_c0', 'rxn08076_c0', 'rxn03241_c0', 'rxn05195_c0', 'rxn05335_c0']

In [23]:
default_ids

array(['rxn08713_c0', 'rxn03137_c0', 'rxn09195_c0', ..., 'rxn01465_c0',
       'rxn29120_c0', 'rxn01322_c0'], dtype='<U11')

In [16]:
binary_input = NN_Predictor.convert_reaction_list(list_of_reactions)

TypeError: unsupported operand type(s) for +: 'NoneType' and 'str'

But if you want to use the default NN, the predictor will do this 
automatically for you if you give a list of reactions

In [None]:
NN_Predictor.make_prediction(input=list_of_reactions, NN=NN_MS)

# Multiple predictions

We can also make predictions for multiple models at the same time, starting with a reaction presence dataframe where the rows are the different reactions and the columns are the model ids:

In [None]:
#multiple binary reaction lists (csv)
input_path = file_path+'/Sample_reaction_presence.csv'
df = pd.read_csv(input_path, index_col=0)

We can then create a array of prediction scores with the same order of reaction and model ids

In [None]:
df_p = NN_Predictor.make_prediction(df.T)

Finally we can create a dataframe of predictions so we can see which score corresponds to which model and reaction id

In [None]:
df_p

# Predicting based on a model

Usually however you will probably not have any of the previous stuff, but rather a model:

In [None]:
draft_model_location = os.path.join(path, 'files',  'models', 'E_coli_KTE31_388739.3_r30_removed.sbml')
draft_model = cobra.io.read_sbml_model(draft_model_location) 


We can list the reactions from the model and use those as input

In [None]:
list_of_reactions = draft_model.reactions.list_attr('id')

or if you give a model as input the predictor will try and do this for you

In [None]:
NN_Predictor.make_prediction(input=draft_model, NN=NN_MS)

# Gapfilling with the scores

Unfortunately, I forgot that you need a gurobi license to do the actual gapfilling which is a bit of a hassle. Gapfilling will look a bit like this:

In [None]:
from reaction_class import Reaction
import gapfill_function


draft_model = Reaction(model=draft_model_location)
biochem = os.path.join(path.parent, 'files',  'biochemistry', 'reactions.tsv')
db_reactions = Reaction(biochem_input=biochem)

#Combine all reactions into one object

all_reactions = Reaction()
all_reactions.reactions = all_reactions.add_dict(draft_model.reactions, db_reactions.reactions) 
draft_reaction_ids = set(draft_model.reactions)

In [None]:
def_gf_model, obj, new_reacs = gapfill_function.gapfill(all_reactions, draft_reaction_ids, {}, 'bio1', medium = 'complete', result_selection = 'min_reactions')


In [None]:
p = NN_Predictor.make_prediction(draft_reaction_ids)

weights = {}
for i in p:
    weights[i]  = np.round(1-p[i])

NN_gf_model, obj, new_reacs = gapfill_function.gapfill(all_reactions, draft_reaction_ids, weights, 'bio1', medium = 'complete', result_selection = 'min_reactions')


In [None]:
model_location = os.path.join(path.parent, 'files',  'models')
cobra.io.write_sbml_model(def_gf_model, model_location+'/E_coli_KTE31_388739.3_gf_def.sbml')
cobra.io.write_sbml_model(NN_gf_model, model_location+'/E_coli_KTE31_388739.3_gf_NN.sbml')


# Comparing gapfilled models

Now that we can look at the different gapfilled models to compare

In [None]:
model_location = os.path.join(path.parent, 'files',  'models')
more_complete_model_location = os.path.join(model_location+'/E_coli_KTE31_388739.3_draft.sbml')
def_gf_model_location = os.path.join(path.parent, 'files',  'models', 'E_coli_KTE31_388739.3_gf_def.sbml')
NN_gf_model_location = os.path.join(path.parent, 'files',  'models', 'E_coli_KTE31_388739.3_gf_NN.sbml')

more_complete = cobra.io.read_sbml_model(more_complete_model_location) 
def_gf_model =cobra.io.read_sbml_model(def_gf_model_location) 
NN_gf_model = cobra.io.read_sbml_model(NN_gf_model_location) 

c_reactions = set(more_complete.reactions.list_attr('id'))
def_gf_reactions = set(def_gf_model.reactions.list_attr('id'))
NN_gf_reactions = set(NN_gf_model.reactions.list_attr('id'))

rem_reactions = c_reactions.difference(draft_reaction_ids)
new_reactions_def = def_gf_reactions.difference(draft_reaction_ids)
new_reactions_NN = NN_gf_reactions.difference(draft_reaction_ids)

In [None]:
TP_NN = len(new_reactions_NN.intersection(c_reactions))
FP_NN = len(new_reacs) - TP_NN
FN_NN = len(rem_reactions.difference(new_reacs))
TN_NN = len(all_reactions.reactions) - (FP_NN+TP_NN+FN_NN)


In [None]:
TP_def = len(new_reactions_def.intersection(c_reactions))
FP_def = len(new_reactions_def) - TP_def
FN_def = len(rem_reactions.difference(new_reactions_def))
TN_def = len(all_reactions.reactions) - (FP_def+TP_def+FN_def)

In [None]:
f1_NN = 2*TP_NN/(2*TP_def+FP_def+FN_def)
f1_def = 2*TP_def/(2*TP_def+FP_def+FN_def)

In [None]:
f1_def