# Load and execute the model

In [1]:
import platform
print("python version " + platform.python_version())
import sys
import json
import re
import os
from os.path import exists
if exists("config.cfg"):
    from configparser import ConfigParser
    config = ConfigParser()
    config.read("config.cfg")
    paths = config.get("script", "syspaths").split(";")
    for path in paths:
        sys.path.append(path)

python version 3.9.12


In [2]:
from datetime import datetime
%run mscommfitting.py

graphs = [
    {
        'trial':'B4',
        'content': 'OD',
        'experimental_data': True
    }
]

mscommfit = MSCommFitting()
mscommfit.load_data(phenotypes_csv_path = 'PhenotypeStoichiometry.csv', 
                           signal_tsv_paths = {'RFP_pf-ec_4-29-22.txt':'ecoli', 'GFP_pf-ec_4-29-22.txt':'pf', 'OD_pf-ec_4-29-22.txt':'OD'},
                           media_conc_path = 'media_conc.csv',
                           species_abundance_path = 'species_abundances.csv',
                           carbon_conc_series = {'rows': {
                               'cpd00136': {'B':0, 'C': 0, 'D': 1, 'E': 1, 'F': 4, 'G': 4},
                               'cpd00179': {'B':5, 'C': 5, 'D':5, 'E': 5, 'F': 5, 'G': 5},
                               }},
                           ignore_trials = {'rows': ['C', 'D', 'E', 'F']}, # 'wells': ['C2', 'G3', 'C12']},  # 'columns': [6,7,8,9,10,11,12]},
                           # ignore_timesteps = [x for x in range(20,167)],
                           zip_path='mscomfit.zip'
                   )
print(datetime.today())
mscommfit.define_problem(parameters={
    'v':0.3,
    'cvmin': 100,
    'bcv': 100,
    'diffpos': 100,
    'diffneg': 100
    }, zip_name='simple_full_community.zip',final_relative_carbon_conc = 0.1, metabolites_to_track = ["cpd00179","cpd00029","cpd00136"])
print(datetime.today())
%time mscommfit.compute(graphs, zip_name='simple_full_community.zip')
print(datetime.today())
print('growth Vmax:', mscommfit.parameters['v'], 'objective value:', mscommfit.problem.objective.value, '\n\n\n\n')



TypeError: load_data() got an unexpected keyword argument 'phenotypes_csv_path'

# Change the parameters

## Explore Vmax values

In [None]:
from numpy import linspace
%run mscommfitting.py

for vmax in linspace(.1,.4,10):
    mscommfit.change_parameters(vmax=-vmax, export_zip_name='simple_full_community_edited.zip', extract_zip_name='simple_full_community.zip')
    %time mscommfit.compute(graphs, 'simple_full_community_edited.zip')
    print(datetime.today())
    print('growth Vmax:', vmax, 'objective value:', mscommfit.problem.objective.value, '\n\n\n\n')

## Explore relative final concentrations

In [None]:
from numpy import linspace
%run mscommfitting.py

previous_relative_conc = .1
for final_relative_carbon_conc in linspace(.1,.5,5):
    mscommfit.change_parameters(final_relative_carbon_conc=final_relative_carbon_conc, previous_relative_conc=previous_relative_conc, export_zip_name='simple_full_community_edited.zip', extract_zip_name='simple_full_community.zip')
    %time mscommfit.compute(graphs, 'simple_full_community_edited.zip')
    print(datetime.today())
    print('final_relative_carbon_conc:', final_relative_carbon_conc, 'objective value:', mscommfit.problem.objective.value, '\n\n\n\n')

## Km calculation for maltose

In [None]:
from datetime import datetime
%run mscommfitting.py

# define the content
vmax = {
    'cpd00179': 2.266667
}
km = {
    'cpd00179': 2
}
phenotype_met = {
    'ecoli_malt': 'cpd00179'
}


graphs = [
    {
        'trial':'B4',
        'content': 'all_biomass',
        'experimental_data': True
    }
]

mscommfit = MSCommFitting()
mscommfit.load_data(phenotypes_csv_path = 'PhenotypeStoichiometry.csv', 
                           signal_csv_paths = {'path':'PF-EC 4-29-22 ratios and 4HB changes (1).xlsx', 'Raw OD(590)':'OD', 'mNeonGreen':'pf', 'mRuby':'ecoli'},
                           media_conc_path = 'media_conc.csv',
                           species_abundance_path = 'species_abundances.csv',
                           phenotype_met = phenotype_met,
                           carbon_conc_series = {'rows': {
                               'cpd00136': {'B':0, 'C': 0, 'D': 1, 'E': 1, 'F': 4, 'G': 4},
                               'cpd00179': {'B':5, 'C': 5, 'D':5, 'E': 5, 'F': 5, 'G': 5},
                               }},
                            ignore_trials = {'rows': ['C', 'D', 'E', 'F', 'G'], 'columns': [1,2,3,5,6,7,8,9,10,11,12]}, # 'wells': ['C2', 'G3', 'C12']}
                           # ignore_timesteps = [x for x in range(20,167)],
                           # zip_path='mscomfit.zip'
                   )
print(datetime.today())
mscommfit.define_problem(parameters={
    'v':0.3,
    'cvmin': 100,
    'bcv': 100,
    'diffpos': 100,
    'diffneg': 100
    }, 
    # zip_name='simple_full_community.zip',
    final_relative_carbon_conc = 0.1, metabolites_to_track = ["cpd00179","cpd00029","cpd00136"])
%time mscommfit.compute(graphs, 'simple_full_community_edited.zip')
mscommfit.change_parameters(vmax=vmax, km=km, strain='ecoli_malt', graphs=graphs,
                            export_zip_name='simple_full_community_edited.zip', extract_zip_name='simple_full_community.zip')

print(datetime.today())

# B4 sans bad data

In [None]:
from datetime import datetime
%run mscommfitting.py

graphs = [
    {
        'trial':'B4',
        'content': 'OD',
        'experimental_data': True
    }
]

mscommfit = MSCommFitting()
mscommfit.load_data(phenotypes_csv_path = 'PhenotypeStoichiometry.csv', 
                    signal_tsv_paths = {'RFP_pf-ec_4-29-22.txt':'ecoli', 'GFP_pf-ec_4-29-22.txt':'pf', 'OD_pf-ec_4-29-22.txt':'OD'},
                    media_conc_path = 'media_conc.csv',
                    species_abundance_path = 'species_abundances.csv',
                    carbon_conc_series = {'rows': {
                        'cpd00136': {'B':0, 'C': 0, 'D': 1, 'E': 1, 'F': 4, 'G': 4},
                        'cpd00179': {'B':5, 'C': 5, 'D':5, 'E': 5, 'F': 5, 'G': 5},
                        }},
                    ignore_trials = {'rows': ['C', 'D', 'E', 'F', 'G'], 'columns': [1,2,3,5,6,7,8,9,10,11,12]}, # 'wells': ['C2', 'G3', 'C12']}
                   # ignore_timesteps = [x for x in range(20,167)],
                    zip_path='mscomfit.zip'
                   )
print(datetime.today())
mscommfit.define_problem(parameters={
                                    'v':0.3,
                                    'cvmin': 100,
                                    'bcv': 100,
                                    'diffpos': 100,
                                    'diffneg': 100
                                    }, 
                         zip_name='simple_full_community.zip', 
                         final_relative_carbon_conc = 0.1, 
                         metabolites_to_track = ["cpd00179","cpd00029","cpd00136"], 
                         bad_data_timesteps = {'*':'23:'})
print(datetime.today())
%time mscommfit.compute(graphs, zip_name='simple_full_community.zip')
print(datetime.today())
print('growth Vmax:', mscommfit.parameters['v'], 'objective value:', mscommfit.problem.objective.value, '\n\n\n\n')

# Poster figure

In [None]:
from datetime import datetime
%run mscommfitting.py

graphs = [
    {
        'trial':'B4',
        'content': 'all_biomass',
        'experimental_data': True
    }
]

mscommfit = MSCommFitting()
mscommfit.load_data(phenotypes_csv_path = 'PhenotypeStoichiometry.csv', 
                    signal_tsv_paths = {'RFP_pf-ec_4-29-22.txt':'ecoli', 'GFP_pf-ec_4-29-22.txt':'pf', 'OD_pf-ec_4-29-22.txt':'OD'},
                    media_conc_path = 'media_conc.csv',
                    species_abundance_path = 'species_abundances.csv',
                    carbon_conc_series = {'rows': {
                        'cpd00136': {'B':0, 'C': 0, 'D': 1, 'E': 1, 'F': 4, 'G': 4},
                        'cpd00179': {'B':5, 'C': 5, 'D':5, 'E': 5, 'F': 5, 'G': 5},
                        }},
                    ignore_trials = {'rows': ['C', 'D', 'E', 'F', 'G'], 'columns': [1,2,3,5,6,7,8,9,10,11,12]}, # 'wells': ['C2', 'G3', 'C12']}
                   # ignore_timesteps = [x for x in range(20,167)],
                    zip_path='mscomfit.zip'
                   )
print(datetime.today())
mscommfit.define_problem(parameters={
                                    'v':0.3,
                                    'cvmin': 100,
                                    'bcv': 100,
                                    'diffpos': 100,
                                    'diffneg': 100
                                    }, 
                         zip_name='simple_full_community.zip', 
                         final_relative_carbon_conc = 0.1, 
                         metabolites_to_track = ["cpd00179","cpd00029","cpd00136"], 
                         bad_data_timesteps = {'*':'23:'})
print(datetime.today())
%time mscommfit.compute(graphs, zip_name='simple_full_community.zip', publishing=True)
print(datetime.today())
print('growth Vmax:', mscommfit.parameters['v'], 'objective value:', mscommfit.problem.objective.value, '\n\n\n\n')

## Export concentration figures

In [None]:
from datetime import datetime
from zipfile import ZipFile
from pandas import read_csv
import os
%run mscommfitting.py
# with ZipFile('msComFit.zip', 'r') as zp:
#     zp.extractall()
# fluxes_df = read_csv('PhenotypeStoichiometry.csv')
graphs = []
metabolites_to_track = ["cpd00179","cpd00029","cpd00136"]
for metabolite in metabolites_to_track: # fluxes_df['rxn'].to_list():
    graphs.append({
        'trial':'B4',
        'content': metabolite,
        'species': 'ecoli',
        'phenotype': 'malt'
    })

mscommfit = MSCommFitting()
mscommfit.load_data(phenotypes_csv_path = 'PhenotypeStoichiometry.csv', 
                    signal_csv_paths = {'path':'PF-EC 4-29-22 ratios and 4HB changes (1).xlsx', 'Raw OD(590)':'OD', 'mNeonGreen':'pf', 'mRuby':'ecoli'},
                    media_conc_path = 'media_conc.csv',
                    species_abundance_path = 'species_abundances.csv',
                    carbon_conc_series = {'rows': {
                        'cpd00136': {'B':0, 'C': 0, 'D': 1, 'E': 1, 'F': 4, 'G': 4},
                        'cpd00179': {'B':5, 'C': 5, 'D':5, 'E': 5, 'F': 5, 'G': 5},
                        }},
                    ignore_trials = {'rows': ['C', 'D', 'E', 'F', 'G'], 'columns': [1,2,3,5,6,7,8,9,10,11,12]}, # 'wells': ['C2', 'G3', 'C12']}
                   # ignore_timesteps = [x for x in range(20,167)],
                    extract_zip_path='figures.zip'
                   )
print(datetime.today())
mscommfit.define_problem(parameters={
                                    'v':0.3,
                                    'cvmin': 100,
                                    'bcv': 100,
                                    'diffpos': 100,
                                    'diffneg': 100
                                    }, 
                         export_zip_name='figures.zip', 
                         final_relative_carbon_conc = 0.1, 
                         metabolites_to_track = metabolites_to_track, 
                         bad_data_timesteps = {'*':'23:'},
                         zero_start=["cpd00029"])
print(datetime.today())
%time mscommfit.compute(graphs, export_zip_name='figures.zip', publishing=True)
print(datetime.today())
print('growth Vmax:', mscommfit.parameters['v'], 'objective value:', mscommfit.problem.objective.value, '\n\n\n\n')

# Full system sans bad data

In [None]:
from datetime import datetime
%run mscommfitting.py

graphs = [
    {
        'trial':'B4',
        'content': 'OD',
        'experimental_data': True
    }
]

mscommfit = MSCommFitting()
mscommfit.load_data(phenotypes_csv_path = 'PhenotypeStoichiometry.csv', 
                    signal_tsv_paths = {'RFP_pf-ec_4-29-22.txt':'ecoli', 'GFP_pf-ec_4-29-22.txt':'pf', 'OD_pf-ec_4-29-22.txt':'OD'},
                    media_conc_path = 'media_conc.csv',
                    species_abundance_path = 'species_abundances.csv',
                    carbon_conc_series = {'rows': {
                        'cpd00136': {'B':0, 'C': 0, 'D': 1, 'E': 1, 'F': 4, 'G': 4},
                        'cpd00179': {'B':5, 'C': 5, 'D':5, 'E': 5, 'F': 5, 'G': 5},
                        }},
                    # ignore_trials = {'rows': ['C', 'D', 'E', 'F', 'G'], 'columns': [1,2,3,5,6,7,8,9,10,11,12]}, # 'wells': ['C2', 'G3', 'C12']}
                   # ignore_timesteps = [x for x in range(20,167)],
                    zip_path='mscomfit.zip'
                   )
print(datetime.today())
mscommfit.define_problem(parameters={
                                    'v':0.3,
                                    'cvmin': 100,
                                    'bcv': 100,
                                    'diffpos': 100,
                                    'diffneg': 100
                                    }, 
                         zip_name='simple_full_community.zip', 
                         final_relative_carbon_conc = 0.1, 
                         metabolites_to_track = ["cpd00179","cpd00029","cpd00136"], 
                         bad_data_timesteps = {'*':'23:'})
print(datetime.today())
%time mscommfit.compute(graphs, zip_name='simple_full_community.zip')
print(datetime.today())
print('growth Vmax:', mscommfit.parameters['v'], 'objective value:', mscommfit.problem.objective.value, '\n\n\n\n')

# plotting phenotypes of each species

In [None]:
# define the environment path 
import os
local_cobrakbase_path = os.path.join('C:', 'Users', 'Andrew Freiburger','Documents','Argonne','cobrakbase')
os.environ["HOME"] = local_cobrakbase_path

# import the models
import cobrakbase
token = 'RBP7R34IILI6IQN5533VVSVUZM6H7YU7'
kbase_api = cobrakbase.KBaseAPI(token)
# model1 = kbase_api.get_from_ws("iML1515",76994)
# model2 = kbase_api.get_from_ws("iSB1139.kb.gf",30650)

In [None]:
from datetime import datetime
from time import process_time
from pandas import set_option
set_option("display.max_rows", None)
# import the KBase objects
graphs = [
    {
        'trial':'B4',
        'content': 'biomass',
        # "species": ["ecoli"],
        'experimental_data': True
    },
    {
        'trial':'B4',
        'content': "cpd00029",
    }
]

# run MSCommFitting
%run mscommfitting.py
time1 = process_time()
mscommfit = MSCommFitting()
mscommfit.load_data(#phenotypes_csv_path = 'PhenotypeStoichiometry.csv', 
                    community_members = {
                        kbase_api.get_from_ws("iML1515",76994): {
                            'name': 'ecoli', 
                            'phenotypes': {#'acetate': kbase_api.get_from_ws('93465/13/1'), 
                                        'malt': {"cpd00179":[-1,-1]} #kbase_api.get_from_ws("93465/23/1")} #'93465/9/1')}   # !!! The phenotype name must align with the experimental IDs for the graphs to find the appropriate data
                            }
                        },
                        kbase_api.get_from_ws("iSB1139.kb.gf",30650): {
                            'name': 'pf',
                            'phenotypes': {'acetate': {"cpd00029":[-1,-1]}, # kbase_api.get_from_ws("93465/25/1"), #'93465/11/1'), 
                                        '4HB': {"cpd00136":[-1,-1]} # kbase_api.get_from_ws('	93465/27/1')} #93465/15/1')}
                            }
                        }
                    },
                    base_media = kbase_api.get_from_ws("93465/19/1"),
                    signal_csv_paths = {'path':'Jeffs_data/PF-EC 4-29-22 ratios and 4HB changes (1).xlsx', 'Raw OD(590)':'OD', 'mNeonGreen':'pf', 'mRuby':'ecoli'},
                    media_conc_path = 'Jeffs_data/media_conc.csv',
                    carbon_conc_series = {'rows': {
                        'cpd00136': {'B':0, 'C': 0, 'D': 1, 'E': 1, 'F': 4, 'G': 4},
                        'cpd00179': {'B':5, 'C': 5, 'D':5, 'E': 5, 'F': 5, 'G': 5},
                        }},
                    ignore_trials = {'rows': ['C', 'D', 'E', 'F', 'G'], 'columns': [1,2,3,5,6,7,8,9,10,11,12]}, # 'wells': ['C2', 'G3', 'C12']}
                   # ignore_timesteps = [x for x in range(20,167)],
                    # zip_path='mscomfit.zip'
                   )
time2 = process_time()
print(datetime.today(), (time2-time1)/60, 'minutes later')
mscommfit.define_problem(parameters={
                                    'v':.3,
                                    'cvmin': 100,
                                    'bcv': 100,
                                    'diffpos': 100,
                                    'diffneg': 100
                                    }, 
                         # zip_name='simple_full_community.zip', 
                         final_relative_carbon_conc = 0.1, 
                         metabolites_to_track = ["cpd00179","cpd00029","cpd00136"], 
                         bad_data_timesteps = {'*':'23:'}, zero_start=["cpd00029"])
print(datetime.today())
%time mscommfit.compute(graphs) #, zip_name='simple_full_community.zip')
print('growth Vmax:', mscommfit.parameters['v'], 'objective value:', mscommfit.problem.objective.value, '\n\n\n\n')

# Demonstrating expedited speed of dictionaries versus Optlang API

In [None]:
from time import process_time
from optlang import Variable, Constraint, Objective, Model
from modelseedpy.core.optlanghelper import OptlangHelper
from json import dump

# optlang API and OptlangHelper API
time_1 = process_time()
model = Model()
variables, constraints = [], []
dict_vars, dict_cons = [], []
for t in range(1000):
    # define the variable
    var_name = f"g_{t}" ; var_bounds = (0, 1000) ; dict_vars.append((var_name, var_bounds, "continuous"))
    var = Variable(var_name, lb=0, ub=1000)
    variables.append(var)
    
    # define the constraint
    cons_name = f"const_{t}" ; cons_bounds = (-1000, 1000) ; cons_expr = [(t,var_name)]
    dict_cons.append((cons_name, cons_bounds, cons_expr))
    constraints.append(Constraint(t*var, name=cons_name, lb=-1000, ub=1000))
model.add(variables) ; model.add(constraints)

# define the objective
model.objective = Objective(sum(variables))
dict_obj = ("Test Helper Dictionary", [var.name for var in variables], "max")

with open("API_Optlang_model.json", 'w') as out:
    dump(model.to_json(), out, indent=3)
    
dict_model = OptlangHelper.define_model("OptlangHelper test", dict_vars, dict_cons, dict_obj)
with open("OptlangHelper_model.json", 'w') as out:
    dump(dict_model, out, indent=3)
    
time_2 = process_time()
print(f'Optlang method {time_2-time_1} seconds')

# dictionary
time_3 = process_time()
model_json = {
    'name':"Test JSON model",
    'variables':[],
    'constraints':[],
    "objective": {
        "name": "sample_objective",
        "expression":{"type": "Add", "args":[]},
        "direction": "max"
    }
}
for t in range(1000):
    model_json['variables'].append({"name": f"g_{t}", "lb": 0, "ub": 1000, "type": "continuous"})
    model_json['constraints'].append({
        "name": f"const_{t}",
        "expression": {
              "type": "Mul",
              "args": [
                 {
                    "type": "Number",
                    "value": t
                 },
                 {
                    "type": "Symbol",
                    "name": f"g_{t}"
                 }
              ]
           },
         "lb": -1000,
         "ub": 1000,
         "indicator_variable": None,
         "active_when": 1
    })
    model_json['objective']["expression"]["args"].append({
        "type": "Mul",
        "args": [
          {
             "type": "Number",
             "value": 1.0
          },
          {
             "type": "Symbol",
             "name": f"g_{t}",
          }
        ]
    })
model2 = Model.from_json(model_json)
model3 = Model.from_json(dict_model)
time_4 = process_time()
print(f'dictionary method {time_4-time_3} seconds')
model.optimize(); model1_opt = model.objective.value
model2.optimize(); model2_opt = model2.objective.value
model3.optimize(); model3_opt = model3.objective.value
if model1_opt != model2_opt:
    print(model1_opt, model2_opt)
elif model1_opt != model3_opt:
    print(model1_opt, model3_opt)
else:
    print("models parity")

In [None]:
print(type(model))
print(type(model2))

In [None]:
from pprint import pprint
# from typing import Union
from json import dump

def define_constraint_expression(cons_expression):
    if len(cons_expression) > 1:
        constraint_expression = {
            "type": "Add",
            "args": []
        }
        for term in cons_expression:
            term_expression = {
                "type": "Mul",
                "args": []
            }
            for value in term:
                if isinstance(value, str):
                    term_expression["args"].append({
                        "type":"Symbol",
                        "name": value
                    })
                elif isinstance(value, (float, int)):
                    term_expression["args"].append({
                        "type":"Number",
                        "name": value
                    })
            constraint_expression["args"].append(term_expression)
    else:
        for value in cons_expression[0]:
            constraint_expression = {
                "type": "Mul",
                "args": []
            }
            if isinstance(value, str):
                constraint_expression["args"].append({
                    "type":"Symbol",
                    "name": value
                })
            elif isinstance(value, (float, int)):
                constraint_expression["args"].append({
                    "type":"Number",
                    "name": value
                })
    return constraint_expression



cons_expression = [("var1", 123), ("var2", 1)]
constraint = {
        "name": "test",
        "expression": define_constraint_expression(cons_expression),
         "lb": 0,
         "ub": 1000,
         "indicator_variable": None,
         "active_when": 1
    }
with open("test.json", 'w') as out:
    dump(constraint, out, indent=3)
pprint(constraint)

# debug the bad_timesteps arguemnt

In [None]:
bad_data_timesteps = {'*':'23:'}
trials = {1, 2, 3, 4, 5, 6, 7}

In [None]:
# define the environment path 
import os
local_cobrakbase_path = os.path.join('C:', 'Users', 'Andrew Freiburger','Documents','Argonne','cobrakbase')
os.environ["HOME"] = local_cobrakbase_path

# import the models
import cobrakbase
token = 'YY57WB6SZ3ZZNP6Q4ERNV675XZWV4EI5'
kbase_api = cobrakbase.KBaseAPI(token)

# define the simulation

from datetime import datetime
from time import process_time
from pandas import set_option
set_option("display.max_rows", None)
## import the KBase objects
graphs = [
    {
        'trial':'B4',
        'content': 'biomass',
        # "species": ["ecoli"],
        'experimental_data': True
    },
    {
        'trial':'B4',
        'content': "cpd00029",
    }
]

## run MSCommFitting
%run mscommfitting.py
time1 = process_time()
mscommfit = MSCommFitting()
mscommfit.load_data(#phenotypes_csv_path = 'PhenotypeStoichiometry.csv', 
                    community_members = {
                        kbase_api.get_from_ws("iML1515",76994): {
                            'name': 'ecoli', 
                            'phenotypes': {#'acetate': kbase_api.get_from_ws('93465/13/1'), 
                                        'malt': {"cpd00179":[-1,-1]} #kbase_api.get_from_ws("93465/23/1")} #'93465/9/1')}   # !!! The phenotype name must align with the experimental IDs for the graphs to find the appropriate data
                            }
                        },
                        kbase_api.get_from_ws("iSB1139.kb.gf",30650): {
                            'name': 'pf',
                            'phenotypes': {'acetate': {"cpd00029":[-1,-1]}, # kbase_api.get_from_ws("93465/25/1"), #'93465/11/1'), 
                                        '4HB': {"cpd00136":[-1,-1]} # kbase_api.get_from_ws('	93465/27/1')} #93465/15/1')}
                            }
                        }
                    },
                    base_media = kbase_api.get_from_ws("93465/19/1"),
                    signal_csv_paths = {'path':'data/Jeffs_data/PF-EC 4-29-22 ratios and 4HB changes (1).xlsx', 
                                        'Raw OD(590)':'OD', 'mNeonGreen':'pf', 'mRuby':'ecoli'},
                    carbon_conc_series = {'rows': {
                        'cpd00136': {'B':0, 'C': 0, 'D': 1, 'E': 1, 'F': 4, 'G': 4},
                        'cpd00179': {'B':5, 'C': 5, 'D':5, 'E': 5, 'F': 5, 'G': 5},
                        }},
                    ignore_trials = {'rows': ['C', 'D', 'E', 'F', 'G'], 'columns': [1,2,3,5,6,7,8,9,10,11,12]}, # 'wells': ['C2', 'G3', 'C12']}
                   # ignore_timesteps = [x for x in range(20,167)],
                    # zip_path='mscomfit.zip'
                   )
time2 = process_time()
print(datetime.today(), (time2-time1)/60, 'minutes later')
mscommfit.define_problem(parameters={
                                    'v':.3,
                                    'cvmin': 100,
                                    'bcv': 100,
                                    'diffpos': 100,
                                    'diffneg': 100
                                    }, 
                         # zip_name='simple_full_community.zip', 
                         final_relative_carbon_conc = 0.1, 
                         metabolites_to_track = ["cpd00179","cpd00029","cpd00136"], 
                         bad_data_timesteps = {'*':'23:'}, zero_start=["cpd00029"])
print(datetime.today())
%time mscommfit.compute(graphs) #, zip_name='simple_full_community.zip')
print('growth Vmax:', mscommfit.parameters['v'], 'objective value:', mscommfit.problem.objective.value, '\n\n\n\n')

# Programmatically remove timesteps in an OD plateau

In [1]:
# define the environment path 
import os
# local_cobrakbase_path = os.path.join('/Users/afreiburger/Documents')
local_cobrakbase_path = os.path.join('C:', 'Users', 'Andrew Freiburger','Documents','Argonne','cobrakbase')
os.environ["HOME"] = local_cobrakbase_path

# import the models
import cobrakbase
# with open("/Users/afreiburger/Documents/kbase_token.txt") as token_file:
with open("C:/Users/Andrew Freiburger/Documents/Argonne/kbase_token.txt") as token_file:
    kbase_api = cobrakbase.KBaseAPI(token_file.readline())
    
import warnings
warnings.filterwarnings(action='once')

from pandas import set_option
set_option("display.max_rows", None)



cobrakbase 0.2.8


In [3]:
%run ./data/standardized_data/datastandardization.py
growth_data_path="data/Jeffs_data/PF-EC 4-29-22 ratios and 4HB changes.xlsx"
experimental_metadata, growth_df, fluxes_df, standardized_carbon_conc, signal_species, trial_name_conversion, species_phenos_df, data_timestep_hr, simulation_timestep, media_conc = GrowthData.process(
    base_media=kbase_api.get_from_ws("93465/19/1"),
    community_members = {
        kbase_api.get_from_ws("iML1515",76994): {
            'name': 'ecoli', 
            'phenotypes': {'acetate': {"cpd00029":[-1,-1]}, #kbase_api.get_from_ws('93465/13/1'), 
                        'malt': {"cpd00179":[-1,-1]} #kbase_api.get_from_ws("93465/23/1")} #'93465/9/1')}   # !!! The phenotype name must align with the experimental IDs for the graphs to find the appropriate data
            }
        },
        kbase_api.get_from_ws("iSB1139.kb.gf",30650): {
            'name': 'pf',
            'phenotypes': {'acetate': {"cpd00029":[-1,-1]}, # kbase_api.get_from_ws("93465/25/1"), #'93465/11/1'), 
                        '4HB': {"cpd00136":[-1,-1]} # kbase_api.get_from_ws('	93465/27/1')} #93465/15/1')}
            }
        }
    },
    data_paths = {'path':growth_data_path, 'Raw OD(590)':'OD', 'mNeonGreen':'pf', 'mRuby':'ecoli'},
    species_abundances = {
        1:{"ecoli":0, "pf":1},
        2:{"ecoli":1, "pf":50},
        3:{"ecoli":1, "pf":20},
        4:{"ecoli":1, "pf":10},
        5:{"ecoli":1, "pf":3},
        6:{"ecoli":1, "pf":1},
        7:{"ecoli":3, "pf":1},
        8:{"ecoli":10, "pf":1},
        9:{"ecoli":20, "pf":1},
        10:{"ecoli":1, "pf":0},
        11:{"ecoli":0, "pf":0}
      },
    carbon_conc_series = {'rows': {
        'cpd00136': {'B':0, 'C': 0, 'D': 1, 'E': 1, 'F': 4, 'G': 4},
        'cpd00179': {'B':5, 'C': 5, 'D':5, 'E': 5, 'F': 5, 'G': 5},
    }},
    ignore_trials = {'rows': ['C', 'D', 'E', 'F', 'G'], 'columns': [1,2,3,5,6,7,8,9,10,11,12]},
    # ignore_timesteps="10:",  # The 
    species_identities_rows = {
        1:{"ecoli":"mRuby"},
        2:{"ecoli":"ACS"},
        3:{"ecoli":"mRuby"},
        4:{"ecoli":"ACS"},
        5:{"ecoli":"mRuby"},
        6:{"ecoli":"ACS"}
    }
)

  met_id = re.sub('(\_\w\d+)', '', met)
  medium = pd.Series()
  medium = pd.Series()
  medium = pd.Series()
  medium = pd.Series()


The ['B2', 'B3', 'B5', 'B6', 'B7', 'B8', 'B9', 'B10', 'B11', 'B12', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'D10', 'D11', 'D12', 'E2', 'E3', 'E4', 'E5', 'E6', 'E7', 'E8', 'E9', 'E10', 'E11', 'E12', 'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9', 'F10', 'F11', 'F12', 'G2', 'G3', 'G4', 'G5', 'G6', 'G7', 'G8', 'G9', 'G10', 'G11', 'G12'] trials were dropped from the OD measurements per the ignore_trials parameter.
[1.19240942 1.1927582  1.19249437 1.1894573  1.18660331 1.18786633
 1.18184414 1.17995412]
The [159, 160, 161, 162, 163, 164, 165, 166] timesteps were removed for the OD OD data since the OD plateaued and is no longer valid.
The ['B2', 'B3', 'B5', 'B6', 'B7', 'B8', 'B9', 'B10', 'B11', 'B12', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'D10', 'D11', 'D12', 'E2', 'E3', 'E4', 'E5', 'E6', 'E7', 'E8', 'E9', 'E10', 'E11', 'E12', 'F2', 'F3

In [37]:
def _check_plateau(org_df, signal, name, significant_deviation, timesteps_len):
    dataframe = org_df.copy()  # this prevents an irrelevant warning from pandas
    dropped = []
    for trial, row in dataframe.iterrows():
        row_array = np.array(row.to_list())
        values = []
        ## remove trials for which the biomass growth did not change by the determined minimum deviation
        for index, val in enumerate(row_array):
            if val / row_array[0] >= significant_deviation:
                values.append(val)
                if len(values) > timesteps_len:
                    del values[0]
                remaining_values = list(dataframe.columns[index-len(values)+1:])
                if all([len(values) == timesteps_len, values[-1] <= values[0],
                        remaining_values[-1] <= remaining_values[0]*1.1]):
                    # the entire plateau, minus the first point of plateau, are removed
                    dropped = remaining_values
                    for col in remaining_values:
                        dataframe.drop(col, axis=1, inplace=True)
                    break
    print(values)
    print(f"The {dropped} timesteps were removed for the {name} {signal} data"
          f" since the OD plateaued and is no longer valid.")
    return dataframe


_check_plateau(values_df, "OD", "OD", 2, 5)

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,157,158,159,160,161,162,163,164,165,166
Well,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
B4,0.067329,0.069574,0.064329,0.064587,0.063576,0.067418,0.064351,0.06755,0.065555,0.064353,...,1.120975,1.172624,1.192409,1.192758,1.192494,1.189457,1.186603,1.187866,1.181844,1.179954


[1.19240942001343, 1.1927582025528, 1.19249436855316, 1.18945729732513, 1.186603307724]
The [159, 160, 161, 162, 163, 164, 165, 166] timesteps were removed for the OD OD data since the OD plateaued and is no longer valid.


Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,149,150,151,152,153,154,155,156,157,158
Well,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
B4,0.067329,0.069574,0.064329,0.064587,0.063576,0.067418,0.064351,0.06755,0.065555,0.064353,...,0.924227,0.953201,0.994628,1.020234,1.044841,1.073502,1.099927,1.119189,1.120975,1.172624
