Load dependencies

In [1]:
import numpy as np
import pandas as pd
import os
import sys
import json
import tensorflow as tf
from NN_Predictor import NN
import NN_Trainer
import cobra

In [2]:
from pathlib import Path
path = Path.cwd()
sys.path.append(path)
file_path = os.path.join(path.parent,'files', 'NN')
output_path = os.path.join(path.parent,'files', 'custom scores')

#Tensorflow complains because the NN is not optimized perfectly
tf.get_logger().setLevel('ERROR')

## Introduction

The purpose of this notebook is to show some examples of using the NN

## Making predictions using the NN

Load in your favourite Neural Network

In [43]:
NN_MS = NN(path=file_path+'/NN_MS.h5')

Loading network at user provided path


You can also use modeltype

In [20]:
NN_MS = NN(modeltype='ModelSEED')

Loading default ModelSEED NN


Load in the data, the NN requires a binary list indicating presences of reactions, it is important that this is in the right order. However, you can give it a dictionary and it will put it in the right order for you.

load in the dictionary:

In [35]:
with open(file_path+'/binary_example.json', 'r') as j:
    binary_example = json.loads(j.read())

and make the prediction

In [36]:
prediction = NN.predict(NN_MS, binary_example)

#reactions not found in keys:  0 / 922


In [37]:
prediction

{'rxn00743_c0': 0.8809813,
 'rxn02288_c0': 0.9997712,
 'rxn00837_c0': 0.0021290297,
 'rxn47768_c0': 0.9999999,
 'rxn01351_c0': 0.9999527,
 'rxn00881_c0': 0.999943,
 'rxn10220_c0': 0.99739945,
 'rxn05614_c0': 0.15901737,
 'rxn01115_c0': 0.1570352,
 'rxn05232_c0': 0.0004929404,
 'rxn05449_c0': 0.99985385,
 'rxn10298_c0': 0.00056084065,
 'rxn01674_c0': 2.4730758e-05,
 'rxn12510_c0': 0.9959578,
 'rxn03919_c0': 0.13256215,
 'rxn02160_c0': 0.9164231,
 'rxn08805_c0': 0.9999994,
 'rxn00559_c0': 0.31918162,
 'rxn08941_c0': 0.0031160053,
 'rxn01133_c0': 0.11088779,
 'rxn02811_c0': 0.9981363,
 'rxn09206_c0': 0.9963366,
 'rxn02774_c0': 0.81767875,
 'rxn04989_c0': 0.46674755,
 'rxn05326_c0': 0.9998085,
 'rxn08816_c0': 0.9999995,
 'rxn00763_c0': 0.99722207,
 'rxn00324_c0': 0.99771786,
 'rxn02175_c0': 0.9990871,
 'rxn09014_c0': 0.9999448,
 'rxn01636_c0': 0.99578804,
 'rxn05610_c0': 0.021021385,
 'rxn09197_c0': 0.9958782,
 'rxn08933_c0': 0.00027430398,
 'rxn01639_c0': 0.047818765,
 'rxn05512_c0': 0.01

# Multiple predictions

We can also make predictions for multiple models at the same time, starting with a reaction presence dataframe where the rows are the different reactions and the columns are the model ids:

In [3]:
#multiple binary reaction lists (csv)
input_path = file_path+'/Sample_reaction_presence.csv'
df = pd.read_csv(input_path, index_col=0)

In [23]:
df = df.reindex(NN_MS.rxn_keys) #need to replace some files

In [59]:
df = df.fillna(0.0)

array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan])

We can then create a array of prediction scores with the same order of reaction and model ids

In [61]:
NN_MS.predict(input=df)

Unnamed: 0,1827300.3,693748.4,1637747.3,1165841.3,912630.3,1950946.3,1378073.3,589873.29,1919105.4,1951070.3
rxn00743_c0,2.360819e-01,7.755198e-01,8.893149e-01,5.308281e-01,9.064420e-02,9.716177e-01,6.468525e-01,9.709477e-01,6.910445e-01,9.081649e-01
rxn02288_c0,9.999996e-01,9.997825e-01,9.999524e-01,9.923460e-01,9.978074e-01,3.345670e-01,9.999983e-01,9.999472e-01,5.266663e-01,1.542819e-01
rxn00837_c0,7.506709e-07,1.161691e-04,3.194141e-03,1.471049e-04,2.098575e-04,1.301335e-03,9.978288e-01,4.867154e-03,2.670732e-02,6.418527e-04
rxn47768_c0,1.000000e+00,1.000000e+00,1.000000e+00,9.998272e-01,9.996049e-01,9.998584e-01,1.000000e+00,9.999990e-01,8.915814e-01,9.998854e-01
rxn01351_c0,1.000000e+00,9.995857e-01,9.999107e-01,4.039265e-02,9.953585e-01,9.959032e-01,9.999670e-01,9.965274e-01,8.578017e-01,9.712749e-01
...,...,...,...,...,...,...,...,...,...,...
rxn11684_c0,1.095919e-11,2.015804e-10,1.325182e-10,1.647843e-09,2.649108e-10,1.664223e-09,1.373268e-09,6.274233e-10,8.974430e-08,1.220285e-09
rxn03127_c0,4.228486e-13,2.610083e-09,6.479244e-09,2.937564e-09,3.365066e-11,3.453462e-09,4.974707e-11,1.696757e-08,3.858615e-07,1.345375e-08
rxn11858_c0,5.433693e-12,1.274283e-08,1.235520e-08,2.381408e-10,3.630257e-10,1.552465e-08,4.598281e-11,3.416079e-10,6.010296e-08,1.289154e-08
rxn11859_c0,6.984756e-10,2.554276e-08,5.983279e-08,8.685907e-10,2.541773e-10,4.017748e-09,2.928576e-11,3.953541e-10,5.511446e-08,1.426247e-09


Finally we can create a dataframe of predictions so we can see which score corresponds to which model and reaction id

In [46]:
df_p

Unnamed: 0,1827300.3,693748.4,1637747.3,1165841.3,912630.3,1950946.3,1378073.3,589873.29,1919105.4,1951070.3
rxn00743_c0,,,,,,,,,,
rxn02288_c0,,,,,,,,,,
rxn00837_c0,,,,,,,,,,
rxn47768_c0,,,,,,,,,,
rxn01351_c0,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
rxn11684_c0,,,,,,,,,,
rxn03127_c0,,,,,,,,,,
rxn11858_c0,,,,,,,,,,
rxn11859_c0,,,,,,,,,,


of course usually we dont have a binary array but rather a model:

In [17]:
model_path = os.path.join(path.parent, 'files',  'models', 'E_coli_KTE31_388739.3_r30_removed.sbml')
draft_model = cobra.io.read_sbml_model(model_path) 


Set parameter Username
Academic license - for non-commercial use only - expires 2024-03-09


We can find the reaction in the model

In [18]:
list_of_reactions = draft_model.reactions.list_attr('id')

or just give the model

In [44]:
NN_MS.predict(input=draft_model)

#reactions not found in keys:  69 / 995


  updates=self.state_updates,


{'rxn00743_c0': 0.87475514,
 'rxn02288_c0': 0.99975127,
 'rxn00837_c0': 0.001999906,
 'rxn47768_c0': 0.9999999,
 'rxn01351_c0': 0.9999509,
 'rxn00881_c0': 0.99993765,
 'rxn10220_c0': 0.9973635,
 'rxn05614_c0': 0.16172516,
 'rxn01115_c0': 0.15242338,
 'rxn05232_c0': 0.00049220724,
 'rxn05449_c0': 0.9998247,
 'rxn10298_c0': 0.0005893186,
 'rxn01674_c0': 2.4695855e-05,
 'rxn12510_c0': 0.9959662,
 'rxn03919_c0': 0.13001335,
 'rxn02160_c0': 0.9091895,
 'rxn08805_c0': 0.9999994,
 'rxn00559_c0': 0.31811252,
 'rxn08941_c0': 0.00263028,
 'rxn01133_c0': 0.10429691,
 'rxn02811_c0': 0.9980071,
 'rxn09206_c0': 0.9949649,
 'rxn02774_c0': 0.8168885,
 'rxn04989_c0': 0.45337406,
 'rxn05326_c0': 0.99977094,
 'rxn08816_c0': 0.9999994,
 'rxn00763_c0': 0.9970542,
 'rxn00324_c0': 0.9975266,
 'rxn02175_c0': 0.9990759,
 'rxn09014_c0': 0.9999428,
 'rxn01636_c0': 0.9953614,
 'rxn05610_c0': 0.018839324,
 'rxn09197_c0': 0.99463874,
 'rxn08933_c0': 0.00025703586,
 'rxn01639_c0': 0.049765915,
 'rxn05512_c0': 0.0110

# Training

You need to give a pandas DataFrame with binary presences of reactions, and modeltype

In [6]:
NN_trained = NN_Trainer.train(df, modeltype='ModelSEED')

Num GPUs Available:  1
using data as labels
dataset created
training on data with shape: (300, 2452) with 249710.0 reactions
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 256)               627968    
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 2452)              630164    
                                                                 
Total params: 1,258,132
Trainable params: 1,258,132
Non-trainable params: 0
_________________________________________________________________
Train on 300 samples


2023-03-28 14:20:25.851177: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-28 14:20:26.442911: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 741 MB memory:  -> device: 0, name: Quadro K620, pci bus id: 0000:03:00.0, compute capability: 5.0


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [14]:
print(len(NN_trained.rxn_keys))
NN_trained.modeltype
NN_trained.network.summary()

2452
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 256)               627968    
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 2452)              630164    
                                                                 
Total params: 1,258,132
Trainable params: 1,258,132
Non-trainable params: 0
_________________________________________________________________


In [18]:
NN_trained.predict(draft_model)

#reactions not found in keys:  73 / 995


  updates=self.state_updates,


{'rxn08713_c0': 7.4078743e-09,
 'rxn03137_c0': 1.0,
 'rxn09195_c0': 7.860609e-09,
 'rxn01279_c0': 0.0012579305,
 'rxn00720_c0': 5.5741234e-09,
 'rxn12639_c0': 1.0,
 'rxn20214_c0': 4.837703e-09,
 'rxn08076_c0': 0.9999958,
 'rxn01213_c0': 1.8998808e-06,
 'rxn01537_c0': 0.9495562,
 'rxn03918_c0': 0.03158252,
 'rxn05252_c0': 1.0,
 'rxn10317_c0': 3.6703857e-06,
 'rxn23042_c0': 0.9992754,
 'rxn08542_c0': 5.439383e-09,
 'rxn03886_c0': 0.016803127,
 'rxn04751_c0': 1.7890794e-06,
 'rxn09313_c0': 5.5749762e-08,
 'rxn02296_c0': 2.0847866e-08,
 'rxn03241_c0': 1.0,
 'rxn05195_c0': 1.0,
 'rxn18575_c0': 1.3160019e-09,
 'rxn03540_c0': 0.9948533,
 'rxn05335_c0': 0.9995053,
 'rxn02227_c0': 1.9007225e-09,
 'rxn00745_c0': 0.0047439206,
 'rxn00510_c0': 0.056542158,
 'rxn05598_c0': 0.0034543555,
 'rxn05289_c0': 1.0,
 'rxn00712_c0': 0.0002336608,
 'rxn08528_c0': 0.99869186,
 'rxn00407_c0': 0.9972414,
 'rxn08335_c0': 1.0,
 'rxn05361_c0': 0.99941635,
 'rxn00804_c0': 0.0063445615,
 'rxn05505_c0': 6.0297896e-09,