In [1]:
import cobra
from cobra.io.mat import *
import warnings
warnings.simplefilter('ignore')
#from cobra.sampling import sample
from cobra.sampling import OptGPSampler, ACHRSampler
import os
cpus = os.cpu_count()
from cobra.io.json import load_json_model
#model = load_matlab_model("./COBRA_models/GEM_Recon2_thermocurated_redHUMAN.mat")
model = load_matlab_model("./COBRA_models/GEM_Recon3_thermocurated_redHUMAN_AA.mat")


No defined compartments in model model. Compartments will be deduced heuristically using regular expressions.
Using regular expression found the following compartments:c, e, g, i, l, m, n, r, x


In [2]:
print(f'Number of reactions: {model.reactions.__len__()}')
print(f'Number of metabolites: {model.metabolites.__len__()}')
print(f'Number of compartments: {model.compartments.__len__()}')

Number of reactions: 10602
Number of metabolites: 5835
Number of compartments: 9


In [3]:
model.objective.expression

1.0*biomass - 1.0*biomass_reverse_01e59

In [4]:

#recon301 =load_matlab_model("./COBRA_models/Recon3DModel_301.mat")
model.reactions.get_by_id('biomass_maintenance').bounds = 0, 0
model.reactions.get_by_id('biomass').bounds             = .5*model.optimize().objective_value, 100
model.objective = 'r0399'
model.reactions.get_by_id("r0399").bounds = (0, 1e5)
model.reactions.get_by_id("PHETHPTOX2").bounds = (0, 0.0)

In [5]:
def turn_off_rxn(rxn_id):
    model.reactions.get_by_id(rxn_id).bounds       = (0, 0)
   
    return model.reactions.get_by_id(rxn_id).bounds 
    


list(map(turn_off_rxn, [r.id for r in model.metabolites.get_by_id("tyr_L_e").reactions]))
list(map(turn_off_rxn, [r.id for r in model.metabolites.get_by_id("dhbpt_e").reactions]))
list(map(turn_off_rxn, [r.id for r in model.metabolites.get_by_id("thbpt_e").reactions]))

[(0, 0), (0, 0)]

In [6]:
import itertools


regulatory_reactions = model.optimize().reduced_costs.loc[lambda x: abs(x)>1e-12].index.tolist()
regulatory_reactions = list(
itertools.compress(regulatory_reactions, 
                   np.invert(abs(np.array([list(model.reactions.get_by_id(rr).bounds) for rr in regulatory_reactions]
)).sum(axis=1) == 0)))
      
[model.reactions.get_by_id(rr).reaction for rr in regulatory_reactions]

['5mthf_c + dhbpt_c --> mlthf_c + thbpt_c',
 'thbpt4acam_c --> dhbpt_c + h2o_c',
 'o2_c + thbpt_c + trp_L_c --> 5htrp_c + dhbpt_c + h2o_c',
 'dhbpt_c + 2.0 h_c + nadh_c --> nad_c + thbpt_c',
 'dhbpt_c + 2.0 h_c + nadph_c --> nadp_c + thbpt_c',
 'o2_c + thbpt_c + tyr_L_c --> 34dhphe_c + dhbpt_c + h2o_c']

In [7]:
def set_rxn_bounds(rxn_id):
    model.reactions.get_by_id(rxn_id).bounds       = (0, 33.5)
    
    return model.reactions.get_by_id(rxn_id).bounds 


list(map(set_rxn_bounds, regulatory_reactions))

model.optimize().objective_value

100.5

In [8]:
from cobra.sampling import OptGPSampler


##HEALTHY
model.reactions.get_by_id("r0399").bounds       = (99, model.optimize().objective_value)
model.reactions.get_by_id("PHETHPTOX2").bounds  = (0, 0)



optgp = OptGPSampler(model, processes=os.cpu_count(), thinning=1) #optgp = OptGPSampler(model, processes=os.cpu_count(), thinning=500)
samples = optgp.sample(10) #samples = optgp.sample(20_000)
#samples.to_parquet("./results/fluxes/flux_samples_CONTROL_20_000.parquet.gzip", compression='gzip')  
samples

Unnamed: 0,10FTHF5GLUtl,10FTHF5GLUtm,10FTHF6GLUtl,10FTHF6GLUtm,10FTHF7GLUtl,10FTHF7GLUtm,10FTHFtl,10FTHFtm,11DOCRTSLtm,11DOCRTSLtr,...,PROTEIN_BS,DM_PROTEIN,ATPS4mi,CYOR_u10mi,Htmi,NADH2_u10mi,CYOOm3i,CYOOm2i,ARTPLM1,ARTPLM2
0,0.019678,0.011827,0.002702,0.002425,0.002846,0.0026,-0.025226,97.201351,0.003455,-0.001842,...,0.0,0.0,99.981347,32.819516,94.292157,9.786381,80.80687,0.141619,0.983175,0.01248
1,0.018174,0.010923,0.002495,0.00224,0.002629,0.002402,-0.023298,0.651139,0.003191,-0.001701,...,0.0,0.0,99.982773,2.933521,1.85624,0.345863,97.036654,0.130795,0.984461,0.979167
2,0.035481,0.021324,0.004871,0.004373,0.749692,0.749249,-0.790044,1.271193,0.00623,-0.003322,...,0.0,0.0,99.966368,56.477877,97.306577,19.091094,48.468697,0.255345,0.955453,0.945118
3,0.023711,0.01425,2.173395,2.173062,0.00343,0.003133,-2.200535,-0.470506,0.004163,-0.00222,...,0.0,0.0,99.977525,56.302924,2.421695,0.4191,42.526907,1.334017,0.021943,0.015037
4,0.029319,0.017621,0.004025,0.003614,0.004241,0.003874,-0.037585,91.930187,0.005148,-0.002745,...,0.0,0.0,99.972209,27.927452,97.77436,24.368028,71.909322,0.210998,0.950022,0.018594
5,0.017433,0.801268,0.793184,0.002149,0.002522,0.002304,-0.813139,1.415381,0.003061,-0.001632,...,0.0,0.0,99.983475,82.147087,98.676609,0.308144,42.071935,0.125462,0.951506,0.980017
6,0.038012,0.022845,0.005219,0.004685,0.005498,0.005023,-0.048729,-90.919679,0.006674,-0.003559,...,0.0,0.0,99.963969,44.734089,97.114415,28.793078,50.441843,0.273563,0.957995,0.946923
7,0.02487,0.014947,0.003414,0.003065,0.003597,0.003287,-0.031882,0.891038,1.461916,-1.459878,...,0.0,0.0,99.976426,56.519406,95.709839,0.439597,66.54468,0.400203,0.978736,0.938621
8,0.028777,0.017295,0.003951,0.003547,0.004163,0.003803,-0.036891,1.031018,0.005053,-0.002694,...,0.0,0.0,99.972723,29.912441,5.532891,0.508657,70.727746,0.207101,0.975395,0.044188
9,0.032995,0.01983,0.00453,0.004067,0.004773,0.00436,-0.042297,1.182116,0.005793,-0.003089,...,0.0,0.0,99.968725,31.557274,5.204292,5.661343,63.813729,0.237452,0.048879,0.020925


I have a pandas data frame named "samples". By applying to_parquet, I want to save the output to /results/fluxes/flux_samples_PKU_20_000.parquet.gzip, wich is a subfolder in the same levels as the subfoldar where the script is. Create the code to automaticaly export it as I want



In [9]:
##PKU
model.reactions.get_by_id("r0399").bounds       = (0, 5)
model.reactions.get_by_id("PHETHPTOX2").bounds  = (0, 0)



optgp = OptGPSampler(model, processes=os.cpu_count(), thinning=500)
samples_pku = optgp.sample(20_000)
samples_pku.to_parquet("./results/fluxes/flux_samples_PKU_20_000.parquet.gzip", compression='gzip')  