In [11]:
# import statements
import os
import pickle
import math
import numpy as np

import sys
sys.path.insert(0, '../functions/')
import basal_model_calcs as bmc
import mRNA_ratios as mr
import parameter_optimization as po
import create_cAct_cInh_vals as cv
import interface_GAMS as iG

In [12]:
# settings
flags = { # add additional settings that should be changed between runs here
    # general flags
    'force_rerun' : False,
    'sanity_plots' : True,
    'central_gene' : 'b1101',
    'basal_conditions' : ['control__wt_glc__1', 'control__wt_glc__2'],
    'grid_use' : -1, # which of the generated constants grids do you want to use? -1 is the highest KdRNAP
    
    # basal model flags
    'basal_bool': False, # If true, use imported values for basal expression

    # mRNA ratios flags
    'basal_or_hard_val' : 'basal', # use basal conditions for ratio or some hard value
    'hard_val' : 10, # the basal log tpm value
    'use_zerod_A_matrix' : True, # recalculate the ICA matrices based on zeroing out gene values in iModulons not of interest

    # picking KdRNAPCrp values
    'initial_guess_ratio' : .3, # this commonly needs to get raised and lowered to get a good value, eventually I will figure out a way to automate it if this step stays in the pipeline
    'base_cInhibitor_val' : 0.1, # this is the set minimum value to solve cActivator based on to set its dynamic range
    'base_cActivator_val' : 0.1, # defines the range of cInhibitor values available
    
    # cAct & cInh Grid flags
    'auto_set_max_range' : True, # if True, set the range based on the set to zero values of the KdRNAPCrp calculator
    'additional_tolerance' : 0.10, # add this %age of the maximum to the top
    'cActivator' : [-2,2], # Uses a log10 range
    'cInhibitor': [-2,2], # Uses a log10 range
    'run_greedy' : False, # skips greedy algo to save time

    # GA - best cActivator/cInhibitor flags
    'neg_grid_toss_OR_zero' : 'toss', # options: toss or zero, what to do with negative grid values
    'seed' : 42,
    'n_ind' : 100, # Starting population size
    'mu' : 100, # Number of individuals to select for the next generation
    'lambda_' : 100, # Number of offspring to produce
    'cxpb' : 0.6, # Chance of crossover
    'cx_prob' : 0.6, # Chance that a condition is crossed
    'mutpb' : 0.4, # Chance an individual undergoes mutation
    'mt_prob' : 0.1, # Chance that a condition in individual is mutated
    'n_gen' : 100, # Number of generations
    'verbose' : False, # Show the logbook as GA runs
    # NOTE: To modify the selection algorithm, modify the toolbox in the GA section of the notebook
    
    # greedy
    'n_iter' : 5, # Number of greedy individuals to produce, takes ~1 min for each
    'max_steps' : 30, # Maximum number of steps before moving on to next condition
    'n_rounds' : 100, # Number of loops of the shuffled conditions with the max steps for each greedy individual

    # GAMs
    'use_greedy' : False, # use the greedy algo values (if False, uses the results of the GA)
    'run_on_all' : False, # run on all genes that are in the saved output folder
    'limit_samples' : ['b1101', 'b1817', 'b1818', 'b1819'] # if run_on_all is False, limit to these samples (or which of them are available)
}


gene_to_act_inh_iMs = {
    'b1101' : ['Crp-2', 'DhaR/Mlc'],
    'b1817' : ['Crp-2', 'DhaR/Mlc'],
    'b1818' : ['Crp-2', 'DhaR/Mlc'],
    'b1819' : ['Crp-2', 'DhaR/Mlc'],
    'b2151' : ['Crp-1', 'Crp-2'],
    'b3601' : ['Crp-1', 'Crp-2'],
    'b2239' : ['Crp-2', 'GlpR'],
    'b2240' : ['Crp-2', 'GlpR'],
    'b0723' : ['Crp-2', 'ArcA-1'],
    'b1415' : ['Crp-2', 'Crp-1'],
    'b2597' : ['Crp-2', 'CpxR'],
    'b3403' : ['Crp-2', 'crp-KO'],
    'b4267' : ['Crp-2', 'GntR/TyrR'],
    'b2143' : ['Crp-2', 'YieP'],
}

# Grid constants, use these if basal_bool = False
# NOTE: The names of these variables must match the sympy equation
t_half_life_deg = 300
grid_constants = {
    'KdRNAP': 10**-5,
    'KdRNAPCrp': 2.5118864315095796e-07*1.4,
    #'KeqOpening': 10**-0.34444956947383365, gets set later
    'RNAP': 10**-6,
    'mRNA_total': 1800, # Total mRNA/cell from https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3554401
    'cell_volume': 10**-15, # Liters from https://bionumbers.hms.harvard.edu/bionumber.aspx?id=100004&ver=19
    'k_d_TF': 1, # May change depending on model
    'kDeg': np.log(2)/t_half_life_deg, # Rate of degradation
    'promoterConcVal': 10**-9, # Promoter concentration
    'TF': 0, # Concentration of TF
    'u': 1/3600, # Growth Rate
}

eq_str = 'Eq(mRNARatio,((cActivator*KdRNAP + KdRNAPCrp)*(KdRNAP + RNAP + \
KeqOpening*RNAP))/((1 + cActivator + cInhibitor)*KdRNAP*KdRNAPCrp + \
cActivator*KdRNAP*(1 + KeqOpening)*RNAP + KdRNAPCrp*(1 + \
KeqOpening)*RNAP))'

gene_grid_name = '../data/gene_grid_constants/'+flags['central_gene']+'.pkl'
if os.path.exists(gene_grid_name):
    pickle_in = open(gene_grid_name, 'rb')
    grid_constants = pickle.load(pickle_in)
    pickle_in.close()
else:
    grid_constants = {}
    
# Set reproducible random seed used by the GA
rng = np.random.default_rng(seed = flags['seed'])

SyntaxError: unmatched ')' (2788649890.py, line 95)

In [3]:
# to do
# 1 - add sanity plots, likely collected throughout
# 2 - better enforce flags['force_rerun'] to save runtime
# 3 - put "per every gene" functions into one function
# 4 - figure out scaled TF concentration

# per every gene

In [5]:
# create mRNA ratios and MA values
df_name = flags['central_gene']+'_zerod'+str(flags['use_zerod_A_matrix'])+'_mRNA_ratios_and_MA_vals.csv'
if not flags['force_rerun'] and os.path.exists('../data/saved_mRNA_ratios_MA_vals/'+df_name):
    ratios_df = pd.read_csv('../data/saved_mRNA_ratios_MA_vals/'+df_name, index_col = 0)
else:
    ratios_df = mr.calculate_mRNA_ratios_and_MA_values(gene_to_act_inh_iMs[flags['central_gene']][0], gene_to_act_inh_iMs[flags['central_gene']][1], flags)
    ratios_df.to_csv('../data/saved_mRNA_ratios_MA_vals/'+df_name)

In [6]:
# create TF concentration
pass # not done yet

In [7]:
# pick KdRNAPCrp value, limit cActivator and cInhibitor based on it
# load in calculator
gene_grid_name = '../data/gene_grid_constants/'+flags['central_gene']+'.pkl')
if flags['force_rerun'] or not os.path.exists(gene_grid_name):  
    # basal model calculations
    grid_constants = bmc.basal_values(grid_constants, eq_str, flags)
    
    # pick KdRNAPCrp
    po.create_shared_lambda_df(eq_str, grid_constants)
    grid_constants['KdRNAPCrp'] = po.pick_KdRNAPCrp(ratios_df, flags)

    # save off grid constants
    pickle_out = open(gene_grid_name, 'wb')
    pickle.dump(gene_grid_name, pickle_out)
    pickle_out.close()
else:
    pickle_in = open(gene_grid_name, 'rb')
    grid_constants = pickle.load(gene_grid_name)
    pickle_in.close()

In [7]:
# determine cActivator and cInhibior values, and greedy
greedy_path = '../data/cAct_cInh_vals/'+flags['central_gene']+'_greedy.pkl'
norm_path = '../data/cAct_cInh_vals/'+flags['central_gene']+'.pkl'
if flags['force_rerun']:
    greedy_cAct_cInh_df, cAct_cInh_df = cv.create_cAct_cInh_for_gene(ratios_df, grid_constants, eq_str, flags)
    if flags['run_greedy']:
        pickle_out = open(greedy_path, 'wb')
        pickle.dump(greedy_cAct_cInh_df, pickle_out)
        pickle_out.close()
        pickle_out = open(norm_path, 'wb')
        pickle.dump(cAct_cInh_df, pickle_out)
        pickle_out.close()
    else:
        pickle_out = open(norm_path, 'wb')
        pickle.dump(cAct_cInh_df, pickle_out)
        pickle_out.close()
else:
    if flags['run_greedy'] and os.path.exists(norm_path):
        pickle_in = open(norm_path, 'rb')
        cAct_cInh_df = pickle.load(pickle_in)
        pickle_in.close()
    elif os.path.exists(norm_path) and os.path.exists(greedy_path):
        pickle_in = open(norm_path, 'rb')
        cAct_cInh_df = pickle.load(pickle_in)
        pickle_in.close()
        pickle_in = open(greedy_path, 'rb')
        greedy_cAct_cInh_df = pickle.load(pickle_in)
        pickle_in.close()
    else: # need to rerun
        greedy_cAct_cInh_df, cAct_cInh_df = cv.create_cAct_cInh_for_gene(ratios_df, grid_constants, eq_str, flags)
        if flags['run_greedy']:
            pickle_out = open(greedy_path, 'wb')
            pickle.dump(greedy_cAct_cInh_df, pickle_out)
            pickle_out.close()
            pickle_out = open(norm_path, 'wb')
            pickle.dump(cAct_cInh_df, pickle_out)
            pickle_out.close()
        else:
            pickle_out = open(norm_path, 'wb')
            pickle.dump(cAct_cInh_df, pickle_out)
            pickle_out.close()

# don't do per gene

In [6]:
# run GAMs
iG.run_GAMs(flags)

--- Job cAct_model Start 11/03/23 11:57:18 44.1.1 27c4d1f8 LEX-LEG x86 64bit/Linux
--- Applying:
    /opt/gams/gams44.1_linux_x64_64_sfx/gmsprmun.txt
--- GAMS Parameters defined
    Input /home/chris/github/regulonML/GAMs/cAct_model.gms
    ScrDir /home/chris/github/regulonML/GAMs/225a/
    SysDir /opt/gams/gams44.1_linux_x64_64_sfx/
Licensee: GAMS Demo, for EULA and demo limitations see   G230706/0001CB-GEN
          https://www.gams.com/latest/docs/UG%5FLicense.html         DC0000
          /opt/gams/gams44.1_linux_x64_64_sfx/gamslice.txt
          Demo license for demonstration and instructional purposes only
Processor information: 2 socket(s), 12 core(s), and 24 thread(s) available
GAMS 44.1.1   Copyright (C) 1987-2023 GAMS Development. All rights reserved
--- Starting compilation
--- cAct_model.gms(10) 2 Mb
--- call csv2gdx ../data/save_for_GAMs/composite_cAct_vals.csv id=cEff index=1 values=2..lastCol useHeader=y trace=0 output=./input_GDX/input.gdx
--- cAct_model.gms(11) 2 Mb
--

    461   4        2.7240493608E+02 9.9E-06   253 2.1E-04    2 F  T
    466   4        2.7240493608E+02 9.9E-06   252 8.3E-04    1 F  T
    471   4        2.7240493607E+02 9.7E-06   251 5.2E-01    1 F  T
    481   4        2.7240493605E+02 9.7E-06   250 9.4E+00    1 F  T
    491   4        2.7240493604E+02 9.7E-06   249 1.0E+00    1 F  T
 
   Iter Phase Ninf     Objective     RGmax    NSB   Step InItr MX OK
    496   4        2.7240493603E+02 9.9E-06   249 9.9E-05    1 F  T
    501   4        2.7240493603E+02 9.7E-06   249 1.0E+00      F  T
    506   4        2.7240493603E+02 9.9E-06   249 7.3E-05    1 F  T
    511   4        2.7240493603E+02 9.9E-06   249 1.8E-05    1 F  T
    516   4        2.7240493602E+02 9.7E-06   249 1.0E+00    1 F  T
    521   4        2.7240493600E+02 9.7E-06   249 1.0E+00    1 F  T
    531   4        2.7240493592E+02 9.9E-06   249 5.3E-06      F  T
    536   4        2.7240493587E+02 9.9E-06   249 4.4E-04    1 F  T
    541   4        2.7240493587E+02 9.7E-06  

     81   4        2.9753577726E+02 6.8E-01   107 4.3E+00      F  T
     86   4        2.9701070257E+02 1.5E+00   106 1.0E+01    1 F  T
     91   4        2.9688536133E+02 6.3E-01   104 2.4E-02    1 F  T
    101   4        2.9562864096E+02 2.2E+00   124 5.1E-01      F  T
    106   4        2.9508489116E+02 8.0E-01   123 4.2E-02    1 F  T
    111   4        2.9489105321E+02 8.7E-01   123 1.0E+00    1 F  T
    116   4        2.9484054932E+02 1.9E-01   122 8.8E+00    1 F  T
 
   Iter Phase Ninf     Objective     RGmax    NSB   Step InItr MX OK
    121   4        2.9482663275E+02 6.2E-01   121 1.0E+00    1 F  T
    126   4        2.9481534737E+02 1.9E-01   120 1.0E+00    1 F  T
    131   4        2.9377337356E+02 1.4E+00   139 1.0E+00      F  T
    141   4        2.9273125766E+02 1.1E+00   136 1.0E+01    1 F  T
    146   4        2.9265858379E+02 3.5E-01   135 1.8E+00    1 F  T
    151   4        2.9262161405E+02 7.4E-01   133 1.0E-02    1 F  T
    156   4        2.9261135576E+02 5.3E-01  

    751   4        2.8617128768E+02 1.2E+00   121 4.8E-06    1 F  T
    756   4        2.8617118139E+02 7.0E-01   121 1.9E+01    1 F  T
    761   4        2.8617055504E+02 8.2E-01   121 3.4E-05    1 F  T
    766   4        2.8617010236E+02 1.8E+00   121 1.3E-01    1 F  T
    771   4        2.8616955617E+02 1.5E+00   120 1.4E-02      F  T
    781   4        2.8616815359E+02 5.3E-01   119 6.9E-05    1 F  T
 
   Iter Phase Ninf     Objective     RGmax    NSB   Step InItr MX OK
    786   4        2.8616760896E+02 2.9E+00   118 1.0E+00    1 F  T
    791   4        2.8615153617E+02 2.1E+00   125 4.1E+00    1 F  T
    796   4        2.8613266810E+02 8.9E-01   123 7.6E+00    1 F  T
    801   4        2.8604123566E+02 1.7E+00   122 1.8E-01      F  T
    811   4        2.8601272269E+02 7.9E-01   120 4.2E+00    1 F  T
    816   4        2.8600986677E+02 1.6E+00   119 1.0E+00    1 F  T
    821   4        2.8600794352E+02 7.6E-01   119 6.9E-01    1 F  T
    826   4        2.8600590993E+02 7.0E-01  

   1461   4        2.8493571698E+02 1.4E+00    78 6.4E-06    1 F  T
   1466   4        2.8493569491E+02 9.3E-01    77 3.3E+00    1 F  T
   1471   4        2.8493564765E+02 2.1E+00    77 4.5E-06    1 F  T
   1476   4        2.8493557340E+02 5.8E-01    77 1.0E+00    1 F  T
   1481   4        2.8493552500E+02 3.0E+00    77 6.2E-06    1 F  T
 
   Iter Phase Ninf     Objective     RGmax    NSB   Step InItr MX OK
   1486   4        2.8493544058E+02 7.3E-01    77 5.2E-06    1 F  T
   1491   4        2.8493541817E+02 3.6E+00    75 2.0E+01    1 F  T
   1496   4        2.8493538730E+02 1.5E+00    74 3.9E-06    1 F  T
   1501   4        2.8493530240E+02 4.7E+00    78 5.4E-04      F  T
   1511   4        2.8493527506E+02 4.2E+00    78 1.9E+02    1 F  T
   1516   4        2.8493524871E+02 9.8E-01    78 1.6E-05    1 F  T
   1521   4        2.8493523150E+02 6.1E-01    75 1.0E+00    1 F  T
   1531   4        2.8493519858E+02 8.5E-01    73 2.5E-01    1 F  T
   1536   4        2.8493518197E+02 1.1E+00  

   2186   4        2.8438260610E+02 9.2E-01    35 1.3E-07    1 F  T
   2191   4        2.8438260566E+02 1.8E+00    35 6.2E-06      F  T
   2201   4        2.8438260408E+02 9.2E-01    34 4.6E-01    1 F  T
   2206   4        2.8438260362E+02 1.5E+00    33 1.4E-08    1 F  T
   2211   4        2.8438260308E+02 2.4E+00    35 2.8E-08      F  T
   2221   4        2.8438260301E+02 2.4E+00    33 1.0E+00    1 F  T
   2231   4        2.8438260298E+02 1.4E+00    33 2.2E-10      F  T
   2236   4        2.8438260298E+02 2.9E+00    33 5.3E-10    1 F  T
 
   Iter Phase Ninf     Objective     RGmax    NSB   Step InItr MX OK
   2241   4        2.8438260295E+02 8.9E-01    34 1.0E-07      F  T
   2246   4        2.8438260295E+02 1.3E+00    33 4.6E-10    2 F  T
   2251   4        2.8438260293E+02 2.5E+00    33 1.0E+00    1 F  T
   2261   4        2.8438260275E+02 8.1E-01    32 9.4E-06      F  T
   2266   4        2.8438260255E+02 8.1E-01    31 9.4E-10    1 F  T
   2271   4        2.8438260246E+02 8.1E-01  

   2861   4        2.8380788409E+02 9.0E-01    31 1.0E+00    1 F  T
   2866   4        2.8380765051E+02 1.0E+00    31 3.4E-01    1 F  T
   2871   4        2.8380740994E+02 5.4E+00    31 5.5E-05    1 F  T
   2881   4        2.8380575326E+02 9.0E-01    30 4.1E-03      F  T
   2886   4        2.8380357400E+02 1.6E+00    29 1.0E-04    1 F  T
   2891   4        2.8379673050E+02 1.8E+00    29 1.2E-04    1 F  T
   2901   4        2.8379502505E+02 6.9E-01    29 2.7E-06      F  T
 
   Iter Phase Ninf     Objective     RGmax    NSB   Step InItr MX OK
   2906   4        2.8379495559E+02 2.3E+00    28 4.4E+00    1 F  T
   2911   4        2.8379483650E+02 9.0E-01    26 2.4E+00    1 F  T
   2921   4        2.8378456914E+02 2.4E+00    31 2.5E-03    1 F  T
   2926   4        2.8378375628E+02 8.0E-01    29 3.3E+00    1 F  T
   2931   4        2.8378289984E+02 3.0E+00    28 6.3E-02      F  T
   2941   4        2.8378241716E+02 1.4E+00    27 1.0E-04    1 F  T
   2951   4        2.8378225832E+02 6.9E-01  

 
   Iter Phase Ninf     Objective     RGmax    NSB   Step InItr MX OK
   3611   4        2.8325348363E+02 1.4E+00    38 1.4E+01    1 F  T
   3616   4        2.8319425254E+02 3.3E+00    37 1.1E-02    1 F  T
   3621   4        2.8310027638E+02 1.4E+00    36 1.0E+00    1 F  T
   3626   4        2.8303147467E+02 2.6E+00    37 5.3E-03    1 F  T
   3631   4        2.8301317861E+02 8.2E-01    35 1.0E+00    1 F  T
   3636   4        2.8300222337E+02 1.9E+00    36 4.1E-03    1 F  T
   3641   4        2.8297874207E+02 6.9E-01    33 3.0E-02    1 F  T
   3651   4        2.8293287112E+02 8.2E-01    31 5.0E-01      F  T
   3656   4        2.8291771216E+02 1.8E+00    34 7.2E+00    1 F  T
   3661   4        2.8289358429E+02 1.2E+00    33 1.3E-03    3 F  T
 
   Iter Phase Ninf     Objective     RGmax    NSB   Step InItr MX OK
   3671   4        2.8288637857E+02 1.7E+00    30 2.0E+00    1 F  T
   3676   4        2.8288436620E+02 1.3E+00    29 2.0E-04    1 F  T
   3681   4        2.8288382328E+02 1.3E+0

   4321   4        2.8274470564E+02 9.2E-01     8 3.4E-03      F  T
   4331   4        2.8274470440E+02 9.2E-01     8 1.6E-06    1 F  T
   4336   4        2.8274470377E+02 1.9E+00     7 3.3E-07    1 F  T
 
   Iter Phase Ninf     Objective     RGmax    NSB   Step InItr MX OK
   4341   4        2.8274470371E+02 9.2E-01     7 2.9E-11      F  T
   4346   4        2.8274470368E+02 9.6E-01     6 1.5E-08    1 F  T
   4351   4        2.8274470266E+02 8.1E-01     6 1.0E+00    1 F  T
   4361   4        2.8274470263E+02 6.1E-01     6 3.1E-07    1 F  T
   4366   4        2.8274470263E+02 6.1E-01     6 3.1E-07    1 F  T
   4371   4        2.8274469904E+02 1.2E+00    23 3.0E-05      F  T
   4381   4        2.8274292572E+02 1.0E+00    21 1.2E-04      F  T
   4391   4        2.8274254406E+02 1.4E+00    17 4.4E-04    1 F  T
   4401   4        2.8274186375E+02 1.6E+00    15 1.0E+00    1 F  T
   4411   4        2.8274169218E+02 4.8E+00    15 5.8E-06    1 F  T
 
   Iter Phase Ninf     Objective     RGmax 

   5066   4        2.8267906042E+02 9.2E-01    11 6.1E+00    1 F  T
   5071   4        2.8267904707E+02 9.9E-01    10 1.3E-01      F  T
   5076   4        2.8267904408E+02 8.2E-01    10 5.3E-01    1 F  T
   5081   4        2.8267904383E+02 2.6E+00    10 5.8E-08    1 F  T
   5086   4        2.8267904382E+02 1.4E+00     9 4.0E+00    1 F  T
   5091   4        2.8267904377E+02 7.7E-01     8 1.0E+00    1 F  T
   5096   4        2.8267904376E+02 1.1E+00    10 7.2E-09    1 F  T
 
   Iter Phase Ninf     Objective     RGmax    NSB   Step InItr MX OK
   5101   4        2.8267904373E+02 1.8E+00     9 6.4E-09    1 F  T
   5111   4        2.8267902268E+02 1.5E+00     8 1.0E+00    1 F  T
   5116   4        2.8267902207E+02 5.9E-01     8 1.5E-01    1 F  T
   5121   4        2.8267902206E+02 2.2E+00     8 9.5E-09    1 F  T
   5131   4        2.8267902203E+02 8.9E-01     7 1.9E-09    1 F  T
   5136   4        2.8267902203E+02 8.9E-01     5 3.6E-01    1 F  T
   5141   4        2.8267902203E+02 1.4E+00  

AttributeError: module 'interface_GAMS' has no attribute 'read_GAMs'

In [3]:
# read GAMs
calc_cAct, cAct_kd_df, cAct_TF_conc_df, calc_cInh, cInh_kd_df, cInh_TF_conc_df = iG.read_GAMs(flags)


In [None]:
# run GAMs