## **Notebook to make predictions with reprocessed Henson data and SPOT**

By Garrett Roell and Christina Schenk

Tested on biodesign_3.7 kernel on jprime

### Ensemble Method


#### **Set up imports**

In [2]:
import os
import sys
import pandas as pd
import numpy as np
import cobra
import scipy.stats
#import cplex
%matplotlib inline

import matplotlib
from matplotlib import pyplot as plt
from matplotlib.offsetbox import (TextArea, DrawingArea, OffsetImage,
                                  AnnotationBbox)
from matplotlib.cbook import get_sample_data
import matplotlib.image as mpimg
import matplotlib.cm as cm

# from edd_utils import login, export_study, export_metadata

# from sklearn.metrics import r2_score

import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pymc3 as pm
import theano.tensor as tt




#### **Load Genome Scale Model**

In [3]:
model = cobra.io.read_sbml_model("../models/Ropacus_annotated.xml")

Scaling...
 A: min|aij| =  1.000e+00  max|aij| =  1.000e+00  ratio =  1.000e+00
Problem data seem to be well scaled


#### **Load glucose data**

In [4]:
glucose_data = {
    'fba': pd.read_csv('../data/genome_scale_fluxes/glucose_fba.csv'),
    'pfba': pd.read_csv('../data/genome_scale_fluxes/glucose_pfba.csv'),
    'eflux': pd.read_csv('../data/genome_scale_fluxes/glucose_eflux.csv'),
    'spot': pd.read_csv('../data/genome_scale_fluxes/glucose_spot.csv'),
}

#### **Standardize data format**

In [5]:
glucose_data['eflux'].rename(columns={'Unnamed: 0':'reaction_id'}, inplace=True)
glucose_data['eflux'].rename(columns={'fluxes':'flux'}, inplace=True)

glucose_data['spot'].rename(columns={'Unnamed: 0':'reaction_id'}, inplace=True)
glucose_data['spot'].rename(columns={'fluxes':'flux'}, inplace=True)

display(glucose_data['fba'].head())
display(glucose_data['spot'].head())

Unnamed: 0,reaction_id,reaction_name,reaction_reaction,flux
0,12DGR140tipp,"1,2 diacylglycerol transport via flipping (per...",12dgr140_p --> 12dgr140_c,0.0
1,13PPDH,"1,3-propanediol dehydrogenase",3hppnl_c + h_c + nadh_c <=> 13ppd_c + nad_c,0.0
2,1P2CBXLCYCL,1 Pyrroline 2 carboxylate cyclation,5a2opntn_c <=> 1p2cbxl_c + h2o_c + h_c,0.0
3,1P2CBXLR,Delta1 piperideine 2 carboxylate reductase,1p2cbxl_c + 2.0 h_c + nadph_c --> nadp_c + pro...,0.0
4,23CTI1,Decenyl coa cis trans isomerization cis dec 3...,decoa_c --> dc2coa_c + h_c,0.0


Unnamed: 0,reaction_id,flux
0,12DGR140tipp,-2.818634e-12
1,13PPDH,-7.228013999999999e-20
2,1P2CBXLCYCL,-2.534757e-12
3,1P2CBXLR,1.591026e-10
4,23CTI1,7.58361e-13


In [6]:
for algorithm, flux_df in glucose_data.items():
    uptake_rate = -1* float(flux_df[flux_df.reaction_id == 'EX_glc__D_e'].flux.values[0])
    scale_factor = 100 / uptake_rate
    
    flux_df['flux'] = scale_factor * flux_df['flux']
    print(uptake_rate, scale_factor)

100.0 1.0
100.0 1.0
169.09442937869107 0.5913855374623109
0.046794592289116 2136.9990656646687


#### **Ensure glucose uptake rate is 100 for each model**

In [7]:
for algorithm, flux_df in glucose_data.items():
    display(flux_df[flux_df.reaction_id == 'EX_glc__D_e'])

Unnamed: 0,reaction_id,reaction_name,reaction_reaction,flux
2819,EX_glc__D_e,R_EX_glc__D_e,glc__D_e <=>,-100.0


Unnamed: 0,reaction_id,reaction_name,reaction_reaction,flux
2819,EX_glc__D_e,R_EX_glc__D_e,glc__D_e <=>,-100.0


Unnamed: 0,reaction_id,flux
2819,EX_glc__D_e,-100.0


Unnamed: 0,reaction_id,flux
2819,EX_glc__D_e,-100.0


#### **Load Glucose 13C MFA Data**

In [8]:
glucose_fluxes = pd.read_csv('../data/central_fluxes/glucose_13C.csv')

# # Remove rows that do not have a mapping to the GSM
glucose_fluxes.dropna(subset = ["Forward Reactions"], inplace=True)
# print(f'There are {len(glucose_fluxes)} fluxes that can be compared between the MFA and FBA')

glucose_fluxes.head()

Unnamed: 0,Pathway,Forward Reactions,Reaction,Location on map,Flux,90% Confidence Lower Bound,90% Confidence Upper Bound
0,Substrate Uptake,reverse_EX_glc__D_e,Gluc.ext + ATP -> G6P,"(50, 460)",100.0,100.0,100.0
1,EMP Pathway,PGI,G6P <-> F6P,"(-150, 430)",-1.61,-2.09,1.42
2,EMP Pathway,PFK or reverse_FBP,F6P + ATP -> FBP,"(-220, 195)",0.0,0.0,1.91
3,EMP Pathway,FBA,FBP <-> DHAP + GAP,"(-140, 115)",0.0,0.0,1.91
4,EMP Pathway,TPI,DHAP <-> GAP,"(-270, 150)",0.0,0.0,1.91


#### **Make a grid of possible coefficient values**

#### **Function to Convert List of Weight Coeff**

In [25]:
df_list = list(glucose_data.values())
df = df_list[0]

df[df.reaction_id == '13PPDH'].flux.values[0]

0.0

In [36]:
def weights_to_solution(weight_list, df_list):
    # create object to store weighted solution
    weighted_solution = []
    
    # loop over reactions
    for rxn_id in df_list[0].reaction_id:
        rxn_flux = 0
        
        # loop over flux dataframes adding flux according to the weight
        for weight, df in zip(weight_list,df_list):
            flux = df[df.reaction_id == rxn_id].flux.values[0]
            rxn_flux += flux * weight
            
        weighted_solution.append({'reaction_id': rxn_id, 'flux': rxn_flux})
        
    # convert to a dataframe
    weighted_df = pd.DataFrame(weighted_solution)
    display(weighted_df)
    
    display(weighted_df[weighted_df.reaction_id == 'EX_glc__D_e'])
    
    
weights_to_solution([0.25, 0.25, 0.25, 0.25], list(glucose_data.values()))

Unnamed: 0,reaction_id,flux
0,12DGR140tipp,-1.505854e-09
1,13PPDH,-3.861565e-17
2,1P2CBXLCYCL,-1.354193e-09
3,1P2CBXLR,3.491030e-02
4,23CTI1,4.051542e-10
...,...,...
3014,EX_guaiacol_e,0.000000e+00
3015,guaiacol_transport,5.641977e-17
3016,GUADEM,-2.396466e-16
3017,Growth_Phenol,0.000000e+00


Unnamed: 0,reaction_id,flux
2819,EX_glc__D_e,-100.0


#### **Get r-squared value for each spot in the grid**

#### **Get r-squared value for each spot in the grid**

In [None]:
glucose_fluxes = add_pred_fluxes_to_13c_df_without_std(glucose_fluxes, glucose_fba_solution, 'FBA', 'WT')

#### **Visualize the data grid and r-squared values**

In [None]:
glucose_fluxes = add_pred_fluxes_to_13c_df_without_std(glucose_fluxes, glucose_fba_solution, 'FBA', 'WT')