In [1]:
# imports and loading
import pandas as pd
import numpy as np
import os
import ast
import multiprocessing
import sys
sys.path.insert(0, '../functions/')
import create_data_for_single_gene as cdg

# settings
testing = True

# load in settings flags
if testing:
    flags_filepath = '../options/test_gene_flags.csv'
else:
    flags_filepath = '../options/gene_flags.csv'

# pull in flags_df and limit to iMs listed
flags_df = pd.read_csv(flags_filepath, index_col = 0)
    
# below are the default flags used if nothing is pre-set
# set flags by editing the "saved_flags.csv" in the ../data folder
t_half_life_deg = 300
stable_flags = { # these do not change gene by gene
    # overall
    'only_create_ratios' : False,
    'only_check_KdRNAPCrp' : False, # if True, quit out of code after generating KdRNAPCrp, done to see if it is generating valid values through sanity check plots
    'include_Amy_samples' : True, # append on Amy's stationary phase samples to analysis
    'remove_outliers' : True, # removes samples that do not correlate well with others, see ../data_cleaning/1_locate_outliers_to_drop.ipynb
    'drop_basal_conds' : True, # if True, removes basal conditions from sample after they're used to calculate ratios (useful when their outliers)
    
    # KdRNAPCrp optimization
    'KdRNAPCrp_sanity' : True, # if True, return sanity plots from this optimization
    
    # GAMs
    'limit_TF_conc_by_actual' : False, # limits the TF concentrations for the model by the actual values, otherwise lets it be a very wide range
    'supress_output' : False,
    'use_greedy' : True, # use the greedy algo values (if False, uses the results of the GA)
    'run_on_all' : False, # run on all genes that are in the saved output folder
    'limit_samples' : flags_df.index.to_list(), # if run_on_all is False, limit to these samples (or which of them are available)
    'delete_old' : True,
    'run_seperate' : False, # run cActivator and cInhibitor solvers seperately
    
    # input constants for GAMs (all get logged inside GAMs so pass in un-logged)
    'act_TF_conc_lo' : 2.902870141566294e-13 / 1000000000, # minimum TF conc found in Heineman data
    'act_TF_conc_up' : 0.00014190659526601638 * 1000000000, # max of ^
    'act_Kd_lo' : 11e-9 / 1000000000, # 11 - 35 nM (1e-9) is the answer here - https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4646316/
    'act_Kd_up' : 35e-9 * 1000000000, # from above
    'inh_TF_conc_lo' : 2.902870141566294e-13 / 1000000000, # minimum TF conc found in Heineman data
    'inh_TF_conc_up' : 0.00014190659526601638 * 1000000000, # max of ^
    'inh_Kd_lo' : 11e-9 / 1000000000, # 11 - 35 nM (1e-9) is the answer here - https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4646316/
    'inh_Kd_up' : 35e-9 * 1000000000, # from above
    'inh_metab_Total_lo' : 0.000038 / 1000000000, # minimum of arginine concentration in stationary phase samples, div a buffer
    'inh_metab_Total_up' : 0.000408 * 1000000000, # maximum of arginine concentration in stationary phase samples, mult a buffer
    'act_metab_Total_lo' : 0.000038 / 1000000000, # minimum of arginine concentration in stationary phase samples, div a buffer
    'act_metab_Total_up' : 0.000408 * 1000000000, # maximum of arginine concentration in stationary phase samples, mult a buffer
    
    # best for argR
    #'act_TF_conc_lo' : 2.902870141566294e-13 / 100, # minimum TF conc found in Heineman data
    #'act_TF_conc_up' : 0.00014190659526601638 * 100, # max of ^
    #'act_Kd_lo' : 11e-9 / 100, # 11 - 35 nM (1e-9) is the answer here - https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4646316/
    #'act_Kd_up' : 35e-9 * 100, # from above
    #'inh_TF_conc_lo' : 2.902870141566294e-13 / 100, # minimum TF conc found in Heineman data
    #'inh_TF_conc_up' : 0.00014190659526601638 * 100, # max of ^
    #'inh_Kd_lo' : 11e-9 / 100, # 11 - 35 nM (1e-9) is the answer here - https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4646316/
    #'inh_Kd_up' : 35e-9 * 100, # from above
    #'metab_Total_lo' : 0.000038 / 100, # minimum of arginine concentration in stationary phase samples, div a buffer
    #'metab_Total_up' : 0.000408 * 100, # maximum of arginine concentration in stationary phase samples, mult a buffer
    
    
    # objective function weightings
    'weight_act_obj1' : 1,
    'weight_inh_obj1' : 1,
    'weight_act_obj2' : 0,
    'weight_inh_obj2' : 0,
    'weight_mRNA_match' : 1.0001,
    'weight_act_corr' : 0.00000000000000001,
    'weight_inh_corr' : 0.00000000000000001,
    
    
    # misc
    'eq_str' : 'Eq(mRNARatio,((cActivator*KdRNAP + KdRNAPCrp)*(KdRNAP + RNAP + \
            KeqOpening*RNAP))/((1 + cActivator + cInhibitor)*KdRNAP*KdRNAPCrp + \
            cActivator*KdRNAP*(1 + KeqOpening)*RNAP + KdRNAPCrp*(1 + \
            KeqOpening)*RNAP))',
    
    # cell_constants'
    'cell_constants_RNAP': 10**-6,
    'cell_constants_mRNA_total': 1800, # Total mRNA/cell from https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3554401
    'cell_constants_cell_volume': 10**-15, # Liters from https://bionumbers.hms.harvard.edu/bionumber.aspx?id=100004&ver=19
    'cell_constants_kDeg': np.log(2)/t_half_life_deg, # Rate of degradation
    'cell_constants_promoterConcVal': 10**-9, # Promoter concentration
    'cell_constants_u': 1/3600, # Growth rate
}

In [2]:
# multiprocess run

# setup inputs
gene_flags = []
for gene in flags_df.index:
    temp_flags = dict(flags_df.loc[gene])
    temp_flags.update({'central_gene' : gene})
    
    # need to convert some flags from strings to lists
    for col in ['basal_conditions', 'target_range', 'cActivator', 'cInhibitor']:
        temp_flags[col] = ast.literal_eval(temp_flags[col])
    
    # convert cell constants into a dictionary
    temp_flags.update({'cell_constants' : {
        'RNAP' : stable_flags['cell_constants_RNAP'],
        'mRNA_total' : stable_flags['cell_constants_mRNA_total'],
        'cell_volume' : stable_flags['cell_constants_cell_volume'],
        'kDeg' : stable_flags['cell_constants_kDeg'],
        'promoterConcVal' : stable_flags['cell_constants_promoterConcVal'],
        'u' : stable_flags['cell_constants_u'],
        'mRNA_total' : stable_flags['cell_constants_mRNA_total'],
    }})
    
    # convert some additional flags over
    temp_flags.update({'eq_str' : stable_flags['eq_str']})
    temp_flags.update({'include_Amy_samples' : stable_flags['include_Amy_samples']})
    temp_flags.update({'only_check_KdRNAPCrp' : stable_flags['only_check_KdRNAPCrp']})
    temp_flags.update({'only_create_ratios' : stable_flags['only_create_ratios']})
    temp_flags.update({'KdRNAPCrp_sanity' : stable_flags['KdRNAPCrp_sanity']})
    temp_flags.update({'remove_outliers' : stable_flags['remove_outliers']})
    temp_flags.update({'drop_basal_conds' : stable_flags['drop_basal_conds']})
    
    # add flags to run directory
    gene_flags.append(temp_flags)

# run pool
pool = multiprocessing.Pool(processes = 16)
results = pool.map(cdg.create_data_for_gene, gene_flags)
pool.close()
pool.join()