In [1]:
# generate a matrix of different options, run GAMs a ton of times
# we are just using the current GAMs input files placed in data/save_for_GAMs

import matplotlib.pyplot as plt
from multiprocessing import Pool
from itertools import product
import numpy as np
import pandas as pd
import os
import shutil
import sys
sys.path.insert(0, '../functions/')
import interface_GAMS as iG
import parameter_optimization as po

t_half_life_deg = 300
stable_flags = { # these do not change gene by gene
    # overall
    'only_check_KdRNAPCrp' : False, # if True, quit out of code after generating KdRNAPCrp, done to see if it is generating valid values through sanity check plots
    'save_results' : True, # saves resulting figures and cAct/cInh values of the previous run to the save_results_run folder
    'include_Amy_samples' : True, # append on Amy's stationary phase samples to analysis
    
    # GAMs
    'supress_output' : True, # don't show output from GAMs
    'use_greedy' : True, # use the greedy algo values (if False, uses the results of the GA)
    'run_on_all' : False, # run on all genes that are in the saved output folder
    'limit_samples' : ['b1101', 'b1817', 'b1818', 'b1819'], # if run_on_all is False, limit to these samples (or which of them are available)
    'delete_old' : True,
    'run_seperate' : False, # run cActivator and cInhibitor solvers seperately
    # input constants for GAMs (all get logged inside GAMs so pass in un-logged)
    'act_TF_conc_lo' : 1e-10,
    'act_TF_conc_up' : 1e-5,
    'act_Kd_lo' : 1e-10,
    'act_Kd_up' : 1e-6,
    'inh_TF_conc_lo' : 1e-10,
    'inh_TF_conc_up' : 1e-5,
    'inh_Kd_lo' : 1e-10,
    'inh_Kd_up' : 1e-6,
    # objective function weightings
    'weight_act_obj1' : 1,
    'weight_inh_obj1' : 1,
    'weight_act_obj2' : 0,
    'weight_inh_obj2' : 0,
    'weight_mRNA_match' : .1,
    'weight_act_corr' : 0.00000000000000001,
    'weight_inh_corr' : 0.00000000000000001,
    
    
    # misc
    'eq_str' : 'Eq(mRNARatio,((cActivator*KdRNAP + KdRNAPCrp)*(KdRNAP + RNAP + \
            KeqOpening*RNAP))/((1 + cActivator + cInhibitor)*KdRNAP*KdRNAPCrp + \
            cActivator*KdRNAP*(1 + KeqOpening)*RNAP + KdRNAPCrp*(1 + \
            KeqOpening)*RNAP))',
    
    # cell_constants'
    'cell_constants_RNAP': 10**-6,
    'cell_constants_mRNA_total': 1800, # Total mRNA/cell from https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3554401
    'cell_constants_cell_volume': 10**-15, # Liters from https://bionumbers.hms.harvard.edu/bionumber.aspx?id=100004&ver=19
    'cell_constants_kDeg': np.log(2)/t_half_life_deg, # Rate of degradation
    'cell_constants_promoterConcVal': 10**-9, # Promoter concentration
}

flags_df = pd.read_csv('../data/saved_flags.csv', index_col = 0)

In [2]:
# generate large list of sets of stable_flags with varying constants to run on
n_range = 3
min_val = 0.1
max_val = 1
lists = []
paras = ['act_TF_conc_lo', 'act_TF_conc_up', 'act_Kd_lo', 'act_Kd_up', 'weight_act_obj1', 'weight_mRNA_match', 'weight_act_corr']
para_to_list = {}
for para in paras:
    list_ = np.linspace(min_val*stable_flags[para], max_val*stable_flags[para], n_range)
    para_to_list.update({para : list_})
    lists.append(list_)
combos = product(*lists)

cell_constants = {}
cell_constants.update({
    'RNAP' : stable_flags['cell_constants_RNAP'],
    'mRNA_total' : stable_flags['cell_constants_mRNA_total'],
    'cell_volume' : stable_flags['cell_constants_cell_volume'],
    'kDeg' : stable_flags['cell_constants_kDeg'],
    'promoterConcVal' : stable_flags['cell_constants_promoterConcVal'],
})

i = 0
inputs = []
i_to_index = {}
for combo in list(combos):
    para_dict = {
        'act_TF_conc_lo' : combo[0],
        'act_TF_conc_up' : combo[1],
        'act_Kd_lo' : combo[2],
        'act_Kd_up' : combo[3],
        'inh_TF_conc_lo' : combo[0],
        'inh_TF_conc_up' : combo[1],
        'inh_Kd_lo' : combo[2],
        'inh_Kd_up' : combo[3],

        'weight_act_obj1' : combo[4],
        'weight_inh_obj1' : combo[4],
        'weight_act_obj2' : 0,
        'weight_inh_obj2' : 0,
        'weight_mRNA_match' : combo[5],
        'weight_act_corr' : combo[6],
        'weight_inh_corr' : combo[6],
    }
    i_to_index.update({i : (
        list(para_to_list['act_TF_conc_lo']).index(combo[0]),
        list(para_to_list['act_TF_conc_up']).index(combo[1]),
        list(para_to_list['act_Kd_lo']).index(combo[2]),
        list(para_to_list['act_Kd_up']).index(combo[3]),
        list(para_to_list['weight_act_obj1']).index(combo[4]),
        list(para_to_list['weight_mRNA_match']).index(combo[5]),
        list(para_to_list['weight_act_corr']).index(combo[6]),
    )})
    GAMs_run_dir = '../GAMs/optimization_runs/run_'+str(i)
    inputs.append((flags_df, stable_flags, 'b3357', 'b1594', cell_constants, GAMs_run_dir, para_dict))
    i += 1

In [None]:
# run GAMs
def mask_func(inp0, inp1, inp2, inp3, inp4, inp5, inp6):
    if os.path.exists(inp5):
        shutil.rmtree(inp5, ignore_errors = True)
    os.mkdir(inp5)
    os.mkdir(inp5+'/input_files')
    os.mkdir(inp5+'/output_files')
    os.mkdir(inp5+'/input_GDX')
    os.mkdir(inp5+'/output_GDX')
    iG.run_GAMs(inp0, inp1, inp2, inp3, inp4, inp5, parameter_flags = inp6)
print(len(inputs), end = ' total runs to be done ... ')
print('\nto check progress, look at number of run_N folders in GAMs/optimization_runs/')
with Pool(processes = 16) as pool:
    results = pool.starmap(mask_func, inputs)

2187 total runs to be done ... 
to check progress, look at number of run_N folders in GAMs/optimization_runs/


In [None]:
# intepret results to look for best values
size = (n_range,) * len(paras)
saved_act_corrs = np.zeros(size)
saved_inh_corrs = np.zeros(size)
saved_mRNA_corrs = np.zeros(size)

mRNA_ratio = pd.read_csv('../GAMs/optimization_runs/run_0/input_files/actual_mRNA_ratio.csv', index_col = 0)
input_cAct = pd.read_csv('../GAMs/optimization_runs/run_0/input_files/composite_cAct_vals.csv', index_col = 0)
input_cInh = pd.read_csv('../GAMs/optimization_runs/run_0/input_files/composite_cInh_vals.csv', index_col = 0)
grid = pd.read_csv('../GAMs/optimization_runs/run_0/input_files/grid_constants.csv', index_col = 0)
real_act_TF_conc = pd.read_csv('../GAMs/optimization_runs/run_0/input_files/exported_act_TF_conc.csv', index_col = 0)
real_inh_TF_conc = pd.read_csv('../GAMs/optimization_runs/run_0/input_files/exported_inh_TF_conc.csv', index_col = 0)

for f in os.listdir('../GAMs/optimization_runs/'):
    if 'run_' not in f: continue
    
    # read in data
    #paras_df = pd.read_csv('../GAMs/optimization_runs/'+f+'/input_files/parameters.csv', index_col = 0)
    try:
        calc_cAct, cAct_kd_df, cAct_TF_conc_df, calc_cInh, cInh_kd_df, cInh_TF_conc_df = iG.read_GAMs('../GAMs/optimization_runs/'+f)
    except:
        continue
        
    # get average cAct/cInh correlations
    act_corrs = []
    inh_corrs = []
    for gene in calc_cAct.columns:
        cAct_corr = np.corrcoef(list(calc_cAct[gene].values), list(input_cAct[gene].values))[0][1]
        cInh_corr = np.corrcoef(list(calc_cInh[gene].values), list(input_cInh[gene].values))[0][1]
        act_corrs.append(cAct_corr)
        inh_corrs.append(cInh_corr)
    act_corr = np.mean(act_corrs)
    inh_corr = np.mean(inh_corrs)
    
    # I need recreated cActivators and cInhibitors
    recons_mRNAs = []
    for gene in calc_cAct.columns:
        po.create_shared_lambda_df(stable_flags['eq_str'], dict(grid[gene]))
        recon_mRNA = []
        for cAct, cInh in zip(calc_cAct[gene], calc_cInh[gene]):
            recon_mRNA.append(po.cActivator_cInhibitor_to_mRNA(cAct, cInh, KdRNAPCrp = grid[gene].loc['KdRNAPCrp']))
        recons_mRNAs.append(recon_mRNA)
    recon_df = pd.DataFrame(recons_mRNAs, columns = calc_cAct.index, index = calc_cAct.columns).T
    corrs = []
    for column in mRNA_ratio.columns:
        corr = np.corrcoef(recon_df[column], mRNA_ratio[column])[0][1]
        corrs.append(corr)
    mRNA_corr = np.mean(corrs)
    
    # save values
    index = i_to_index[int(f.split('_')[1])]
    saved_act_corrs[index] = act_corr
    saved_inh_corrs[index] = inh_corr
    saved_mRNA_corrs[index] = mRNA_corr

In [None]:
# search through this dataframe for meaningful information about what to set the values

# not sure how to plot this many dimensions against each other for visual effect, I also only have 3 points in each dimension so maybe that isn't helpful
# I see a few ways to do this
# 1 - for each dimension, barchart the results along 3 axes, hopefully one stands out as best in each dimension
# 2 - look at the top 20 or so values and their parameters

# first method
ct = 0
for para in paras:
    act_corr_vals = []
    inh_corr_vals = []
    mRNA_corr_vals = []
    for n in range(n_range):
        mask = [slice(None)] * len(paras)
        mask[ct] = n
        act_corr_vals.append(saved_act_corrs[tuple(mask)].flatten())
        inh_corr_vals.append(saved_inh_corrs[tuple(mask)].flatten())
        mRNA_corr_vals.append(saved_mRNA_corrs[tuple(mask)].flatten())
    ct += 1
    fig, axs = plt.subplots(1, 3, figsize = (9, 3))
    axs[0].boxplot(act_corr_vals)
    axs[0].set_ylabel('Correlation')
    axs[0].set_title('cActivators')
    axs[0].set_xticklabels(['1e{:.2f}'.format(np.log10(val)) for val in para_to_list[para]])
    
    axs[1].boxplot(inh_corr_vals)
    axs[1].set_ylabel('Correlation')
    axs[1].set_title('cInhibitors')
    axs[1].set_xticklabels(['1e{:.2f}'.format(np.log10(val)) for val in para_to_list[para]])

    axs[2].boxplot(mRNA_corr_vals)
    axs[2].set_ylabel('Correlation')
    axs[2].set_title('mRNA Ratio')
    axs[2].set_xticklabels(['1e{:.2f}'.format(np.log10(val)) for val in para_to_list[para]])

    fig.suptitle(para)
    plt.tight_layout()
    plt.show()