# Strain-desing Workflow

In [27]:
## IMPORTS
#Cobra dependencies
import cobra
from cobra import Reaction, Metabolite
from cobra.io import read_sbml_model, save_matlab_model
from cobra.flux_analysis import production_envelope, flux_variability_analysis
from cobra.flux_analysis.variability import find_essential_genes
from cobra.sampling import sample
from cobra.sampling import OptGPSampler, ACHRSampler
import gurobipy
#Cameo dependencies
import cameo
from cameo.strain_design.deterministic.linear_programming import OptKnock
from cameo import phenotypic_phase_plane
from cameo.visualization.plotting.with_plotly import PlotlyPlotter
from cameo.flux_analysis.simulation import lmoma, pfba
#Data processing dependencies
import pandas as pd
import ast
import plotly.express as px
from itertools import combinations
#Metabolic design helper functions
from pyfastcore import set_medium, Fastcore
#Our own helper functions
from designFunctions import *

## Phase 1: Generate OptKnock Results

In [28]:
#Set the parameters for exploration of all combinations:
max_knock_out_range =  range(3,6,1)
max_cl_range = range(12,8,-1)
replicates = 3
#Project-specific parameters
dir_path = '.'
project_name = 'synecococcus_LA_ko_results'
experiment = '_'.join(project_name.split('_')[0:-2])
framework_name = 'cameo' # mewpy or cameo
minimum_growth_fraction = 0.1
NEED_TO_SET_UP_MEDIA = False
set_up_params = {'dir_path' : dir_path,
                 'project' : project_name,
                 'framework' : framework_name,
                 'min_growth' : minimum_growth_fraction,
                 'max_cl_range' : max_cl_range,
                 'max_knock_out_range' : max_knock_out_range,
                 'replicates' : replicates}
#Parameters for flux sampling strategy generation
sorting_param = 'Normalised_max_Flux' #TO CHOOSE BETWEEN: 'Normalised_max_Flux',               
secondary_param = 'Presence'          #                   'Normalised_max_biomass' or 'Presence'
#Flux Sampling Parameters:
TOP_CANDIDATES = 10
N_OF_DELETIONS = 5
TOP_DELETION_STRAINS = 2
N_OF_DELETION_STRAINS = 6
#Selection Parameters:
TOP_STRATEGIES = 5

### 1.1 Model & media specification

In [29]:
#Specify the model and the required parameters
model_path = 'cyanobacteria_models/paper/iMS837'
model_filename = 'syneco_acetate_mixotrophic_conditions.json'
model=cobra.io.load_json_model('/'.join([project_name, model_path, model_filename]))
target_biomass = 'BOF_acetate_mixotrophy'
target_metabolites = ['octe_9_12_15_e']
target_reaction = 'DM_ALA'
carbon_source = 'EX_ac_e'
#minimize flux ranges to reduce the solution space
model.summary()

Metabolite,Reaction,Flux,C-Number,C-Flux
ac_e,EX_ac_e,0.1802,2,17.59%
co2_e,EX_co2_e,0.9083,1,44.34%
cobalt2_e,EX_cobalt2_e,0.002271,0,0.00%
fe3_e,EX_fe3_e,0.009585,0,0.00%
h_e,EX_h_e,0.4743,0,0.00%
hco3_e,EX_hco3_e,0.7798,1,38.07%
k_e,EX_k_e,0.003423,0,0.00%
mg2_e,EX_mg2_e,0.005934,0,0.00%
na1_e,EX_na1_e,0.005821,0,0.00%
nh4_e,EX_nh4_e,0.5291,0,0.00%

Metabolite,Reaction,Flux,C-Number,C-Flux
co_c,DM_co_c,-0.0003007,1,100.00%
pho_loss_c,DM_pho_loss_c,-0.3224,0,0.00%
o2_e,EX_o2_e,-1.775,0,0.00%


### 1.2 Model reduction

In [30]:
#Preprocess the model with user data
configured_model = model.copy()
configured_model.objective = target_biomass
#Specify media if not previously define in a preparation script
if NEED_TO_SET_UP_MEDIA:
    media_definition = {    'EX_ca2_e' : 10,
                            'EX_cl_e' : 10,
                            'EX_co2_e' : 100,
                            'EX_cobalt2_e' : 10,
                            'EX_cu2_e' : 10,
                            'EX_fe2_e' : 10,
                            'EX_fe3_e' : 0,
                            'EX_h_e' : 100,
                            'EX_h2o_e' : 100,
                            'EX_k_e' : 10,
                            'EX_mg2_e' : 10,
                            'EX_mn2_e' : 10,
                            'EX_mobd_e' : 10,
                            'EX_na1_e' : 10,
                            'EX_tungs_e' : 1000,
                            'EX_zn2_e' : 10,
                            'EX_ni2_e' : 10,
                            'EX_sel_e' : 1000,
                            'EX_slnt_e' : 1000,
                            'EX_so4_e' : 10,
                            'EX_nh4_e' : 10,
                            'EX_pi_e' : 10,
                            'EX_cbl1_e' : 0,
                            'EX_o2_e' : 30,
                            carbon_source : 3.78}
    
    set_medium(configured_model, media_definition, inplace=True)

configured_model = purge_non_objective_biomass(configured_model, target_biomass, n_of_biomass_reactions=3)
blocked_reactions = set([r.id for r in configured_model.reactions])-set(get_rxn_with_fva_flux(configured_model))
#remove blocked reactions
print('Removing a total of %d blocked reactions...' % len(blocked_reactions))
configured_model.remove_reactions(blocked_reactions)
#save model to .mat format if it were neccessary to run gcFront after
save_matlab_model(configured_model, '/'.join([dir_path, project_name, "configured_model.mat"]))
configured_model.summary()

Read LP format model from file /tmp/tmpxex6fwvh.lp
Reading time = 0.00 seconds
: 835 rows, 1504 columns, 6914 nonzeros
Read LP format model from file /tmp/tmpxjoibuyy.lp
Reading time = 0.00 seconds
: 835 rows, 1504 columns, 6914 nonzeros
Read LP format model from file /tmp/tmpt9bm1a35.lp
Reading time = 0.00 seconds
: 835 rows, 1504 columns, 6914 nonzeros
Removing a total of 140 blocked reactions...


Metabolite,Reaction,Flux,C-Number,C-Flux
ac_e,EX_ac_e,0.1802,2,17.59%
co2_e,EX_co2_e,0.9083,1,44.34%
cobalt2_e,EX_cobalt2_e,0.002271,0,0.00%
fe3_e,EX_fe3_e,0.009585,0,0.00%
h_e,EX_h_e,0.4743,0,0.00%
hco3_e,EX_hco3_e,0.7798,1,38.07%
k_e,EX_k_e,0.003423,0,0.00%
mg2_e,EX_mg2_e,0.005934,0,0.00%
na1_e,EX_na1_e,0.005821,0,0.00%
nh4_e,EX_nh4_e,0.5291,0,0.00%

Metabolite,Reaction,Flux,C-Number,C-Flux
co_c,DM_co_c,-0.0003007,1,100.00%
pho_loss_c,DM_pho_loss_c,-0.3224,0,0.00%
o2_e,EX_o2_e,-1.775,0,0.00%


### 1.3 Generate OptKnock strategies along th parameter space

In [None]:
#Generate OptKnock results for the specified parameters:
generate_strategies(set_up_params, configured_model, target_biomass, target_reaction, carbon_source, blocked_reactions)

## Phase 2: Strategy comparision & selection

In [None]:
analysis_type = ['gene_candidates','strategies_eval']

raw_df, results_df = analyse_results(set_up_params,configured_model,target_reaction,sorting_param,analysis_type=analysis_type)

In [33]:
#Make a figure:
fig = px.bar(results_df, x="Reaction", y=sorting_param, color=secondary_param, color_continuous_scale='Bluered_r')
fig.show()
#save it:
fig.write_image('/'.join([dir_path, project_name, "candidate_reactions_reduced_parameter_space.png"]))

In [23]:
#Filter out reactions with quasi-null fluxes
min_flux = 0.05
deletion_list = raw_df.query("Flux>="+str(min_flux))["N_of_deletions"].unique().tolist()
deletion_list.sort()
carbon_list = raw_df.query("Flux>="+str(min_flux))["C_limit"].unique().tolist()
carbon_list.sort()

fig = px.scatter(raw_df.query("Flux>="+str(min_flux)),
                 x="Flux", y="Biomass", color="Biomass",
                 facet_col="N_of_deletions", facet_row="C_limit",hover_name="Strategy",
                 category_orders = {"N_of_deletions":deletion_list,
                                    "C_limit":carbon_list})

fig.for_each_annotation(lambda a: a.update(text=a.text.replace("N_of_deletions", "dels")))

fig.show()
#save figure:
fig.write_image('/'.join([dir_path, project_name, "optknock_strategies_evaluation_reduced_parameter_space.png"]))

In [None]:
flux_limit = max(raw_df.Flux.tolist())*0.95
print(flux_limit)
biomas_limit = max(raw_df.Biomass.tolist())*minimum_growth_fraction
print(biomas_limit)
best_ok_df = raw_df.loc[(raw_df['Biomass']>=biomas_limit) & (raw_df['Flux']>=flux_limit)]
best_ok = best_ok_df['Strategy'].tolist()
print(best_ok)
display_KO_candidates_results(configured_model, best_ok, target_biomass, target_reaction, 'cameo', media=None)

#### If optknock/NSGA-II results are not reproducible using cobrapy, launch the gcFront script:

In [10]:
#FIRST: Check reproducibility of Optknock/NSGA-II
#implement here check function!!

#Second: Generate a file containing basic information of the set up
set_up_dict = {'experiment' : [experiment],
               'target' : [target_reaction],
               'framework' : [framework_name],
               'biomass_limit' : [biomas_limit]}

set_up_df = pd.DataFrame.from_dict(set_up_dict)
set_up_df.to_csv('production_target_tf.csv',index=False)

In [2]:
#THIRD: Call bash to launch the GDLS script in the background
import subprocess
subprocess.call("bash launch_gcFront.sh", shell=True)

0