Load dependencies

In [3]:
import numpy as np
import pandas as pd
import os
import sys
import json
import tensorflow as tf
import NN_Predictor
import cobra

In [4]:
from pathlib import Path
path = Path.cwd()
sys.path.append(path)
file_path = os.path.join(path.parent,'files', 'NN')
output_path = os.path.join(path.parent,'files', 'custom scores')

#Tensorflow complains because the NN is not optimized perfectly
tf.get_logger().setLevel('ERROR')

## Introduction

The purpose of this notebook is to show some examples of using the NN

## Making predictions using the NN

Load in your favourite Neural Network

In [5]:
NN_MS = NN_Predictor.load_NN(file_path+'/NN_MS.h5')

Loading user provided network


2023-02-09 15:33:09.323010: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-09 15:33:09.977468: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 856 MB memory:  -> device: 0, name: Quadro K620, pci bus id: 0000:03:00.0, compute capability: 5.0


if left empty it will default to the standard ModelSEED one

In [6]:
NN_MS = NN_Predictor.load_NN()

Loading Default NN (ModelSEED)


Load in the data, the NN requires a binary list indicating presences of reactions, it is important that this is in the right order. However, you can give it a dictionary and it will put it in the right order for you.

load in the dictionary:

In [7]:
with open(file_path+'/binary_example.json', 'r') as j:
    binary_example = json.loads(j.read())

and make the prediction

In [8]:
prediction = NN_Predictor.make_prediction(input=binary_example, NN=NN_MS)

Using ModelSEED ids
#reactions not in NN_rxn:  0


In [9]:
prediction

{'rxn08713_c0': 3.210937e-05,
 'rxn03137_c0': 0.9981457,
 'rxn09195_c0': 1.8804754e-05,
 'rxn01279_c0': 0.01665109,
 'rxn00720_c0': 0.0028327655,
 'rxn12639_c0': 0.9999988,
 'rxn20214_c0': 6.76739e-05,
 'rxn08076_c0': 0.9997949,
 'rxn01213_c0': 0.0003554161,
 'rxn01537_c0': 0.27153924,
 'rxn03918_c0': 0.08189404,
 'rxn05252_c0': 0.9999747,
 'rxn10317_c0': 9.243675e-08,
 'rxn23042_c0': 0.97127414,
 'rxn08542_c0': 2.7704942e-07,
 'rxn03886_c0': 0.11481691,
 'rxn04751_c0': 0.0037542812,
 'rxn09313_c0': 0.061919857,
 'rxn02296_c0': 4.5195197e-06,
 'rxn03241_c0': 0.99999905,
 'rxn05195_c0': 0.9999999,
 'rxn18575_c0': 5.916736e-07,
 'rxn03540_c0': 0.995783,
 'rxn05335_c0': 0.99998677,
 'rxn02227_c0': 1.8563675e-05,
 'rxn00745_c0': 0.05435261,
 'rxn00510_c0': 0.9544768,
 'rxn05598_c0': 0.0009475707,
 'rxn05289_c0': 0.9994381,
 'rxn00712_c0': 0.00038387466,
 'rxn08528_c0': 0.99947006,
 'rxn00407_c0': 0.9942456,
 'rxn08335_c0': 0.98696995,
 'rxn05361_c0': 0.9999914,
 'rxn00804_c0': 0.816986,
 '

We can then save this as an dictionary (.json) if we want to use some other time

In [10]:
p_file = open(output_path+"/prediction_example.json", 'w')
json.dump(prediction, p_file)

TypeError: Object of type float32 is not JSON serializable

I wrote a function that allows you convert a set of reactions ids to a binary array, based on a given set of ids

In [12]:
list_of_reactions = [i for i in binary_example if binary_example[i] == 1]
NN_ids = NN_Predictor.load_ids()
binary_input = NN_Predictor.convert_reaction_list(list_of_reactions, NN_ids)

Using user-provided ids
#reactions not in NN_rxn:  0


In [13]:
binary_input

array([0, 1, 0, ..., 1, 0, 0])

But if you want to use the standards NN_ids the predictor will do this automatically

In [15]:
NN_Predictor.make_prediction(input=list_of_reactions, NN=NN_MS)

Converting to binary array:
Using ModelSEED ids
#reactions not in NN_rxn:  0


{'rxn08713_c0': 3.210937e-05,
 'rxn03137_c0': 0.9981457,
 'rxn09195_c0': 1.8804754e-05,
 'rxn01279_c0': 0.01665109,
 'rxn00720_c0': 0.0028327655,
 'rxn12639_c0': 0.9999988,
 'rxn20214_c0': 6.76739e-05,
 'rxn08076_c0': 0.9997949,
 'rxn01213_c0': 0.0003554161,
 'rxn01537_c0': 0.27153924,
 'rxn03918_c0': 0.08189404,
 'rxn05252_c0': 0.9999747,
 'rxn10317_c0': 9.243675e-08,
 'rxn23042_c0': 0.97127414,
 'rxn08542_c0': 2.7704942e-07,
 'rxn03886_c0': 0.11481691,
 'rxn04751_c0': 0.0037542812,
 'rxn09313_c0': 0.061919857,
 'rxn02296_c0': 4.5195197e-06,
 'rxn03241_c0': 0.99999905,
 'rxn05195_c0': 0.9999999,
 'rxn18575_c0': 5.916736e-07,
 'rxn03540_c0': 0.995783,
 'rxn05335_c0': 0.99998677,
 'rxn02227_c0': 1.8563675e-05,
 'rxn00745_c0': 0.05435261,
 'rxn00510_c0': 0.9544768,
 'rxn05598_c0': 0.0009475707,
 'rxn05289_c0': 0.9994381,
 'rxn00712_c0': 0.00038387466,
 'rxn08528_c0': 0.99947006,
 'rxn00407_c0': 0.9942456,
 'rxn08335_c0': 0.98696995,
 'rxn05361_c0': 0.9999914,
 'rxn00804_c0': 0.816986,
 '

# Multiple predictions

We can also make predictions for multiple models at the same time, starting with a reaction presence dataframe where the rows are the different reactions and the columns are the model ids:

In [23]:
#multiple binary reaction lists (csv)
input_path = file_path+'/Sample_reaction_presence.csv'
df = pd.read_csv(input_path, index_col=0)

Unnamed: 0,1827300.3,693748.4,1637747.3,1165841.3,912630.3,1950946.3,1378073.3,589873.29,1919105.4,1951070.3
rxn04602_c0,0,0,0,0,0,0,0,0,0,0
rxn00543_c0,1,1,1,1,1,0,1,1,0,0
rxn02937_c0,1,1,1,1,1,1,1,1,1,1
rxn01967_c0,0,0,1,0,1,0,0,0,0,1
rxn05621_c0,1,0,0,0,1,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...
rxn00827_c0,0,0,0,0,0,0,0,0,0,0
rxn08475_c0,1,0,1,1,1,0,1,0,0,0
rxn15748_c0,0,0,0,0,0,0,0,0,0,0
rxn10339_c0,0,0,0,0,0,0,0,0,0,0


We can then create a array of prediction scores with the same order of reaction and model ids

In [31]:
df_p = NN_Predictor.make_prediction(df.T)

Loading Default NN (ModelSEED)


Finally we can create a dataframe of predictions so we can see which score corresponds to which model and reaction id

In [32]:
df_p

Unnamed: 0,rxn08713_c0,rxn03137_c0,rxn09195_c0,rxn01279_c0,rxn00720_c0,rxn12639_c0,rxn20214_c0,rxn08076_c0,rxn01213_c0,rxn01537_c0,...,rxn08657_c0,rxn05244_c0,rxn01383_c0,rxn01524_c0,rxn00347_c0,rxn05324_c0,rxn09201_c0,rxn01465_c0,rxn29120_c0,rxn01322_c0
1827300.3,1.4e-05,1.0,1.535646e-07,0.86579,5.699601e-07,1.0,3.358972e-09,1.0,5e-05,0.006633,...,0.109875,0.000876,0.002164763,1.169813e-06,0.999675,0.000102,1.0,1.0,1.296195e-06,4.712854e-07
693748.4,3e-05,0.999973,1.510779e-07,0.046863,1.800756e-07,1.0,1.038376e-08,0.999999,3.7e-05,0.521327,...,0.801017,0.000653,0.4231069,1.97902e-06,0.24354,1.0,0.999999,0.999997,2.679972e-10,0.0001048401
1637747.3,4.7e-05,0.999948,3.092027e-05,0.099882,1.977873e-06,1.0,2.557181e-08,0.999856,0.000958,0.362656,...,0.935619,0.001722,0.5599492,0.0003122896,0.343517,1.0,0.999986,0.999952,2.886286e-06,0.02093159
1165841.3,9e-05,0.999588,1.086691e-08,0.00048,3.28134e-06,0.999954,2.410732e-08,0.997313,0.000407,0.041189,...,0.01247,0.001147,2.724819e-05,2.051416e-08,0.137548,0.999997,0.999977,0.999372,1.701312e-07,0.0003950793
912630.3,2e-06,0.999801,7.027885e-06,0.000479,3.05985e-08,0.999997,2.422469e-10,0.01503,0.000419,0.018196,...,0.898834,0.002469,0.0008418942,7.050913e-08,0.799452,4.7e-05,8.1e-05,0.999764,1.19947e-09,0.0006799239
1950946.3,7e-06,0.981365,1.298398e-08,0.000178,0.0005591758,0.989625,2.807828e-08,0.982109,0.002462,0.000782,...,0.038433,1e-06,6.123112e-07,5.308237e-08,0.082296,0.999983,0.999061,0.974417,8.113082e-10,0.001101118
1378073.3,0.026719,1.0,0.05370793,0.211544,5.483909e-07,1.0,2.318448e-09,0.999999,0.99885,0.09129,...,0.001887,0.000183,0.02392264,3.620508e-07,0.99993,0.999999,1.0,0.999983,0.9999949,0.2604304
589873.29,0.000317,0.999115,2.800052e-06,0.001391,2.791074e-05,0.999999,2.523837e-07,0.999897,0.003489,0.016277,...,0.005693,0.002146,0.0002573193,2.00456e-06,0.031676,3e-06,0.999999,0.997979,6.554672e-07,0.0003097417
1919105.4,1.5e-05,0.998553,2.673585e-06,0.000134,8.523157e-06,0.999788,7.629986e-10,0.954889,0.00191,0.032682,...,0.016926,9.1e-05,0.0001113381,1.331319e-07,0.009748,0.999978,0.999149,0.997747,1.517507e-07,0.001698961
1951070.3,4e-06,0.992488,2.234194e-08,4.8e-05,0.0009644277,0.989264,3.591443e-09,0.939691,0.002721,0.000818,...,0.024073,3e-06,2.060235e-06,7.855996e-08,0.057234,0.999971,0.999521,0.989477,1.004869e-09,0.001141532


of course usually we dont have a binary array but rather a model:

In [44]:
model_path = os.path.join(path.parent, 'files',  'models', 'E_coli_KTE31_388739.3_r30_removed.sbml')
draft_model = cobra.io.read_sbml_model(model_path) 


Set parameter Username
Academic license - for non-commercial use only - expires 2023-03-12


We can find the reaction in the model

In [50]:
list_of_reactions = draft_model.reactions.list_attr('id')

In [53]:
NN_Predictor.make_prediction(input=draft_model, NN=NN_MS)

Using ModelSEED ids
#reactions not in NN_rxn:  4


{'rxn08713_c0': 3.210937e-05,
 'rxn03137_c0': 0.9981457,
 'rxn09195_c0': 1.8804754e-05,
 'rxn01279_c0': 0.01665109,
 'rxn00720_c0': 0.0028327655,
 'rxn12639_c0': 0.9999988,
 'rxn20214_c0': 6.76739e-05,
 'rxn08076_c0': 0.9997949,
 'rxn01213_c0': 0.0003554161,
 'rxn01537_c0': 0.27153924,
 'rxn03918_c0': 0.08189404,
 'rxn05252_c0': 0.9999747,
 'rxn10317_c0': 9.243675e-08,
 'rxn23042_c0': 0.97127414,
 'rxn08542_c0': 2.7704942e-07,
 'rxn03886_c0': 0.11481691,
 'rxn04751_c0': 0.0037542812,
 'rxn09313_c0': 0.061919857,
 'rxn02296_c0': 4.5195197e-06,
 'rxn03241_c0': 0.99999905,
 'rxn05195_c0': 0.9999999,
 'rxn18575_c0': 5.916736e-07,
 'rxn03540_c0': 0.995783,
 'rxn05335_c0': 0.99998677,
 'rxn02227_c0': 1.8563675e-05,
 'rxn00745_c0': 0.05435261,
 'rxn00510_c0': 0.9544768,
 'rxn05598_c0': 0.0009475707,
 'rxn05289_c0': 0.9994381,
 'rxn00712_c0': 0.00038387466,
 'rxn08528_c0': 0.99947006,
 'rxn00407_c0': 0.9942456,
 'rxn08335_c0': 0.98696995,
 'rxn05361_c0': 0.9999914,
 'rxn00804_c0': 0.816986,
 '