In [None]:
import base
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rushd as rd
import scipy as sp
import seaborn as sns

# enables concurrent editing of base.py
from importlib import reload
reload(base)

### Load data

Circuit tuning data (`data`)

In [None]:
base_path = rd.datadir/'instruments'/'data'/'attune'/'kasey'

exp90_path = base_path/'2024.03.31_exp90'/'export'
exp90_2_path = base_path/'2024.04.02_exp90.2'/'export'
exp90_3_path = base_path/'2024.04.02_exp90.3'/'export'
exp90_4_path = base_path/'2024.04.05_exp90.4'/'export'
exp91_path = base_path/'2024.04.08_exp91'/'export'
exp92_path = base_path/'2024.04.12_exp92'/'export'
exp_elp60_path = rd.datadir/'instruments'/'data'/'attune'/'Emma'/'2024.06.09_EXP61'/'data_singlets'

plates = pd.DataFrame({
    'data_path': [exp90_path/'plate1', exp90_path/'plate2', 
                  exp90_2_path, exp90_4_path,
                  exp90_3_path/'plate1', exp90_3_path/'plate2', 
                  exp91_path/'plate1.1', exp91_path/'plate1.2', exp91_path/'plate1.3', 
                  exp91_path/'plate2.1', exp91_path/'plate2.2', exp91_path/'plate2.3',
                  exp92_path/'plate1.1', exp92_path/'plate1.2', exp92_path/'plate1.3', 
                  exp92_path/'plate2.1', exp92_path/'plate2.2', exp92_path/'plate2.3',
                  exp_elp60_path],
    
    'yaml_path': ([exp90_path/'exp90_plate1_wells.yaml', exp90_path/'exp90_plate2_wells.yaml', 
                   exp90_path/'exp90_plate2_wells.yaml', exp90_path/'exp90_plate1_wells.yaml',
                   exp90_path/'exp90_plate1_wells.yaml', exp90_path/'exp90_plate2_wells.yaml', ] +
                  [exp91_path/'exp91_plate1_wells.yaml']*3 + 
                  [exp91_path/'exp91_plate2.1_wells.yaml', exp91_path/'exp91_plate2.2_wells.yaml', exp91_path/'exp91_plate2.3_wells.yaml'] +
                  [exp92_path/'exp92_plate1_wells.yaml', exp92_path/'exp92_plate1.2_wells.yaml', exp92_path/'exp92_plate1_wells.yaml',
                   exp92_path/'exp92_plate2_wells.yaml', exp92_path/'exp92_plate2.2_wells.yaml', exp92_path/'exp92_plate2_wells.yaml',
                   exp_elp60_path/'metadata.yaml']
                  ),
    
    'biorep': ([1, 1, 
                2, 2, 
                3, 3,] + 
                [1, 2, 3,]*4 +
                [4]),
    
    'exp': (['exp90', 'exp90', 
             'exp90.2', 'exp90.4', 
             'exp90.3', 'exp90.3',] + 
            ['exp91']*6 + 
            ['exp92']*6 + 
            ['elp_exp61'])
})

output_path = rd.rootdir/'output'/'compare-to-simulations'
cache_path = rd.rootdir/'output'/'fig_overview'/'data.gzip'
metadata_path = rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'construct-metadata.xlsx'

# Load data
data = pd.DataFrame()
if cache_path.is_file(): data = pd.read_parquet(cache_path)
else: 
    channel_list = ['mCherry-A','mRuby2-A','FSC-A','SSC-A','tagBFP-A','mGL-A']
    data = rd.flow.load_groups_with_metadata(plates, columns=channel_list)

    # Remove negative channel values
    for c in channel_list: data = data[data[c]>0]
    
    data.dropna(inplace=True)
    data.to_parquet(rd.outfile(cache_path))

# Add metadata for constructs
metadata = base.get_metadata(metadata_path, )#style='designs')
data = data.merge(metadata, how='left', on='construct')
display(data)

In [None]:
# Add shades to teal color palette
#  light = EFS, miR-FF4, miRE-FF5, TSx2
#  dark = TSx4
metadata.loc[metadata['promoter']=='EFS', 'color'] = metadata.loc[metadata['promoter']=='EFS', 'color'].apply(base.get_dark_color)
metadata.loc[metadata['miR']=='miR.FF4', 'color'] = metadata.loc[metadata['miR']=='miR.FF4', 'color'].apply(base.get_dark_color)
metadata.loc[metadata['miR']=='miRE.FF5', 'color'] = metadata.loc[metadata['miR']=='miRE.FF5', 'color'].apply(base.get_light_color)
metadata.loc[((metadata['group']=='controller') & (metadata['ts_num']==2)), 'color'] = metadata.loc[((metadata['group']=='controller') & (metadata['ts_num']==2)), 'color'].apply(base.get_light_color)
metadata.loc[((metadata['group']=='controller') & (metadata['ts_num']==4)), 'color'] = metadata.loc[((metadata['group']=='controller') & (metadata['ts_num']==4)), 'color'].apply(base.get_dark_color)

# Create dicts to specify colors/markers
metadata_dict = metadata.set_index('construct').to_dict('dict')
main_palette = metadata_dict['color']
main_markers = metadata_dict['markers']

greys = matplotlib.colors.ListedColormap(matplotlib.colormaps['Greys'](np.linspace(0.2,0.7,256)))

In [None]:
# Gate cells
gates = pd.DataFrame()
channel_list = ['mGL-A', 'mRuby2-A']
for channel in channel_list:
    gates[channel] = data[data['construct']=='UT'].groupby(['exp'])[channel].apply(lambda x: x.quantile(0.999))
gates.reset_index(inplace=True)

# Add missing gates
gates.loc[len(gates.index)] = ['exp90.4',0,0,]  
gates.loc[gates['exp']=='exp90.4', channel_list] = gates.loc[gates['exp']=='exp90.2', channel_list].values

# Indicate which channels are relevant for each experiment
gates.sort_values(['exp'], inplace=True)
gates['marker'] = 'mGL-A'
gates['output'] = 'mRuby2-A'

# Gate data by transfection marker expression
data = data.groupby('exp')[data.columns].apply(lambda x: base.gate_data(x,gates))
data.reset_index(inplace=True, drop=True)
df = data[(data['expressing']) & (data['construct']!='UT') & (data['exp']!='elp_exp61')]

In [None]:
# Bin data and calculate statistics
df_quantiles, stats, _, fits = base.calculate_bins_stats(df, num_bins=10)
stats = stats.merge(metadata, how='left', on='construct')
fits = fits.merge(metadata, how='left', on='construct')

In [None]:
ts_label = {'na': 'base', 'NT': 'OL', 'T': 'CL', 'none': '–'}
metadata['ts_label'] = metadata['ts_kind'].replace(ts_label)

Load modeling parameter sweeps

In [None]:
simulation_path = rd.rootdir/'output'/'modeling'/'julia_param_sweeps'/'per_param'/'sweep_df.gzip'
sim_data = pd.DataFrame()
if simulation_path.is_file(): 
    sim_data = pd.read_parquet(simulation_path)

In [None]:
# Normalize parameter values such that original (middle) value = 1
def normalize_param_val(df):
    d = df.copy()
    vals = d['param_val'].unique()
    d['param_val_norm'] = d['param_val'] / (sorted(vals)[int(len(vals)/2)])
    return d

sim_data = sim_data.groupby('param')[sim_data.columns].apply(normalize_param_val).reset_index(drop=True)
display(sim_data)

In [None]:
display(sim_data['param'].unique())

In [None]:
# Compute value of unregulate gene
alpha_rna = 4.67e-2     # params from `miR_iFFL.jl`
delta_mrna = 2.88e-4
alpha_p = 3.33e-4
delta_p = 9.67e-5

sim_data['unreg'] = sim_data['copy_num'] * (alpha_rna * alpha_p) / (delta_mrna * delta_p)

In [None]:
# Compute instantaneous slope for each param & param_val 
def get_slope_instant(df, x, y):
    d = df.sort_values(x)
    return (list(d[y])[-1] - list(d[y])[-2]) / (list(d[x])[-1] - list(d[x])[-2])

col_list = ['copy_num','protein']
slopes = sim_data.groupby(['param','param_val','param_val_norm'])[sim_data.columns].apply(lambda x: get_slope_instant(x, *col_list)).rename('slope').reset_index()

alpha_rna = 4.67e-2     # params from `miR_iFFL.jl`
delta_mrna = 2.88e-4
alpha_p = 3.33e-4
delta_p = 9.67e-5
slopes['base_norm_factor'] = (delta_mrna * delta_p) / (alpha_rna * alpha_p)

def modify_norm_factor(df):
    param = df['param'].values[0]
    d = df.copy()
    if param in ['α_im', 'α_p']: 
        d['norm_factor'] = d['base_norm_factor'] * d['param_val_norm'].values[0]
    elif param in ['δ_m', 'δ_p']: 
        d['norm_factor'] = d['base_norm_factor'] / d['param_val_norm'].values[0]
    else:
        d['norm_factor'] = d['base_norm_factor']
    return d['norm_factor']

result = slopes.groupby(['param','param_val_norm'])[slopes.columns].apply(modify_norm_factor).rename('norm_factor').reset_index()
slopes['norm_factor'] = result['norm_factor']
slopes['slope_norm'] = slopes['slope'] * slopes['norm_factor']
display(slopes)

### Plots to compare simulations to data, exploratory

In [None]:
fig, axes = plt.subplots(1,2, figsize=(5,2), gridspec_kw=dict(wspace=0.3))

# linear space
ax = axes[0]
yticks = list(np.linspace(0,1e4,5))
yticks.insert(1, 1e3)
plot_df = sim_data[sim_data['param']=='Rtot']
sns.lineplot(data=plot_df[plot_df['param_val_norm']==1], x='copy_num', y='protein', color='black', ax=ax, linewidth=1.5,
             errorbar=None,)
sns.lineplot(data=plot_df[plot_df['param_val_norm']==1], x='copy_num', y='unreg', color=base.colors['gray'], ax=ax,
             errorbar=None,)
ax.set(xticks=np.linspace(0,100,5), yticks=yticks, xlabel='copy #', ylim=(0,12e3),
       yticklabels=[f'{float(l)/1000:.1f}k' for l in yticks], title='Linear space')
ax.axvspan(55,100, color=base.get_light_color(base.colors['gray']), alpha=0.2)

# log space
ax = axes[1]
plot_df = sim_data[sim_data['param']=='Rtot']
sns.lineplot(data=plot_df[plot_df['param_val_norm']==1], x='copy_num', y='protein', color='black', ax=ax, linewidth=1.5,
             errorbar=None,)
sns.lineplot(data=plot_df[plot_df['param_val_norm']==1], x='copy_num', y='unreg', color=base.colors['gray'], ax=ax,
             errorbar=None,)
ax.set(xticks=np.linspace(0,100,5), xlabel='copy #', ylim=(4e2,1e5), title='Log space',
       yscale='log', ylabel='')
#ax.axvspan(55,100, color=base.get_light_color(base.colors['gray']), alpha=0.2)


for ax in axes:
    #setpoint = plot_df.loc[(plot_df['param_val_norm']==1) & (plot_df['copy_num']==50), 'protein'].values[0]
    #ax.axhline(setpoint, color=base.colors['teal'], ls=':', xmax=100, zorder=1)
    ax.axhline(1e3, color='black', ls=':', xmax=100, zorder=1)
    ax.axhline(1e4, color='black', ls=':', xmax=100, zorder=1)