In [None]:
import base
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rushd as rd
import scipy as sp
import seaborn as sns

# enables concurrent editing of base.py
from importlib import reload
reload(base)

sns.set_style('ticks')
sns.set_context('talk',rc={'font.family': 'sans-serif', 'font.sans-serif':['Helvetica Neue']})

## Setup

- Load data
- Add metadata
- Draw gates
- Gate transfected cells

Result from this section: DataFrame `df` representing transfected cells.

Load exp93, which compares single-transcript, dual-transcript, and dual-vector implementations

In [None]:
base_path = rd.datadir/'instruments'/'data'/'attune'/'kasey'
exp93_path = base_path/'2024.04.14_exp93'/'export'

plates = pd.DataFrame({
    'data_path': [exp93_path/'plate1', exp93_path/'plate2', exp93_path/'plate3',],
    'yaml_path': [exp93_path/'exp93_wells.yaml']*3,
    'biorep': [1, 2, 3],
    'exp': ['exp93']*3,
})

output_path = rd.rootdir/'output'/'two-gene'
cache_path = rd.rootdir/'output'/'two-gene'/'data.gzip'

metadata_keys = set()
for p in plates['yaml_path'].unique():
    rd.plot.plot_well_metadata(p)
    metadata_keys.update(rd.flow.load_well_metadata(p).keys())
display(metadata_keys)

In [None]:
# Load data
data = pd.DataFrame()
if cache_path.is_file(): data = pd.read_parquet(cache_path)
else: 
    channel_list = ['mRuby2-A','FSC-A','SSC-A','SNAP-647-A','mGL-A']
    data = rd.flow.load_groups_with_metadata(plates, columns=channel_list)

    # Remove negative channel values
    for c in channel_list: data = data[data[c]>0]
    
    data.dropna(inplace=True)
    data.to_parquet(rd.outfile(cache_path))
display(data)

In [None]:
# Add metadata for constructs
metadata = base.get_metadata(rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'construct-metadata.xlsx')
data = data.merge(metadata, how='left', on='construct')

# Create dicts to specify colors/markers
metadata_dict = metadata.set_index('construct').to_dict('dict')
construct_palette = metadata_dict['color']
construct_markers = metadata_dict['markers']

# Rename far-red channel
data.rename(columns={'SNAP-647-A': 'iRFP-A'}, inplace=True)
display(data)

In [None]:
# Add metadata for second construct
metadata_construct2 = pd.read_excel(rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'construct2-metadata.xlsx')
display(metadata_construct2)
data = data.merge(metadata_construct2, how='left', on='construct2')
display(data)

In [None]:
data['condition'] = data['construct'] + '_' + data['construct2']

Draw gates based on untransfected population, then gate transfected cells

In [None]:
gates = pd.DataFrame()
channel_list = ['mGL-A', 'mRuby2-A']
for channel in channel_list:
    gates[channel] = data[data['construct']=='GEEC555'].groupby(['exp'])[channel].apply(lambda x: x.quantile(0.999))
gates.reset_index(inplace=True)

# Indicate which channels are relevant for each experiment
gates['marker'] = 'iRFP-A'
gates['output'] = 'mRuby2-A'

# Add missing iRFP gate
gate_iRFP = 2.5e2
g = sns.displot(data=data, x='iRFP-A', col='biorep', hue='construct', palette=construct_palette,
                 kind='kde', fill=False, legend=False, common_norm=True, log_scale=True)
for name, ax in g.axes_dict.items():
    ax.axvline(gate_iRFP, color='black')
g.figure.savefig(rd.outfile(output_path/(f'hist_gate_iRFP.svg')), bbox_inches='tight')

gates['iRFP-A'] = [gate_iRFP]

display(gates)

Gate data per experiment based on transfection marker expression

In [None]:
data = data.groupby('exp')[data.columns].apply(lambda x: base.gate_data(x,gates))
data.reset_index(inplace=True, drop=True)
max = 1e6
df = data[(data['expressing']) & (data['output']<max)]
display(df)

In [None]:
df['marker'] = df['marker'].astype(float)
df['output'] = df['output'].astype(float)

# Group and compute stats
stat_list = [np.mean, np.std, sp.stats.gmean, sp.stats.gstd, sp.stats.variation]
grouped = df.groupby(by=['condition','construct','construct2','biorep','exp'])
stats = grouped[['marker','output']].agg(stat_list).reset_index().dropna()

# Rename columns as 'col_stat'
stats.columns = stats.columns.map(lambda i: base.rename_multilevel_cols(i))
stats['count'] = grouped['marker'].count().reset_index()['marker']
stats = stats.merge(metadata, how='left', on='construct')
stats.sort_values(['design','ts_kind','ts_num'], inplace=True)
stats = stats.merge(metadata_construct2, how='left', on='construct2')

display(stats)

metadata['TS'] = metadata['ts_kind']
stat_name = {'output_gmean': 'Geometric\nmean', 'output_std': 'Standard\ndeviation', 'output_variation': 'Coefficient\nof variation'}

In [None]:
# Bin by transfection marker
min_count = 100
df['bin_marker'] = df.groupby(['condition','construct','construct2','exp'])['marker'].transform(lambda x: pd.cut(x, np.logspace(2,6,15)))
df['remove_bin'] = df.groupby(['condition','construct','construct2','exp','bin_marker'])['bin_marker'].transform(lambda x: x.count() < min_count)
df_binned = df[~df['remove_bin']].copy()
df_binned['marker'] = df_binned['marker'].astype(float)
df_binned['output'] = df_binned['output'].astype(float)

# Group and compute stats
stat_list = [np.mean, np.std, sp.stats.gmean, sp.stats.gstd, sp.stats.variation]
grouped = df_binned.groupby(by=['condition','construct','construct2','biorep','exp','bin_marker'])
stats_bin = grouped[['marker','output']].agg(stat_list).reset_index().dropna()

# Rename columns as 'col_stat'
stats_bin.columns = stats_bin.columns.map(lambda i: base.rename_multilevel_cols(i))
stats_bin['count'] = grouped['marker'].count().reset_index()['marker']
stats_bin = stats_bin.merge(metadata, how='left', on='construct')

# Compute mean/median on bin span
df_binned['bin_marker_mean'] = df_binned['bin_marker'].map(lambda x: np.mean([x.left, x.right]))
df_binned['bin_marker_median'] = df_binned['bin_marker'].map(lambda x: np.median([x.left, x.right]))
stats_bin['bin_marker_mean'] = stats_bin['bin_marker'].map(lambda x: np.mean([x.left, x.right]))
stats_bin['bin_marker_median'] = stats_bin['bin_marker'].map(lambda x: np.median([x.left, x.right]))

display(stats_bin)

In [None]:
# Combine bioreps
df['remove_bin_combined'] = df.groupby(['condition','construct','construct2','bin_marker'])['bin_marker'].transform(lambda x: x.count() < min_count)
df_binned_combined = df[~df['remove_bin_combined']].copy()

stat_list = [np.mean, np.std, sp.stats.gmean, sp.stats.gstd, sp.stats.variation]
grouped = df_binned_combined.groupby(by=['condition','construct','construct2','bin_marker'])
stats_bin_combined = grouped[['marker','output']].agg(stat_list).reset_index().dropna()

stats_bin_combined.columns = stats_bin_combined.columns.map(lambda i: base.rename_multilevel_cols(i))
stats_bin_combined['count'] = grouped['marker'].count().reset_index()['marker']
stats_bin_combined = stats_bin_combined.merge(metadata, how='left', on='construct')
stats_bin_combined = stats_bin_combined.merge(metadata_construct2, how='left', on='construct2')

df_binned_combined['bin_marker_mean'] = df_binned_combined['bin_marker'].map(lambda x: np.mean([x.left, x.right]))
df_binned_combined['bin_marker_median'] = df_binned_combined['bin_marker'].map(lambda x: np.median([x.left, x.right]))
stats_bin_combined['bin_marker_mean'] = stats_bin_combined['bin_marker'].map(lambda x: np.mean([x.left, x.right]))
stats_bin_combined['bin_marker_median'] = stats_bin_combined['bin_marker'].map(lambda x: np.median([x.left, x.right]))

In [None]:
# Normalize output to gmean of output in smallest bin, and normalize marker bin by smallest bin
def normalize_output(df):
    df = df.copy()
    normalizer = sp.stats.gmean(df.loc[(df['bin_marker_median']==df['bin_marker_median'].min()), 'output'])
    df['output_norm'] = df['output'].astype(float) / normalizer
    df['bin_marker_median_norm'] = df['bin_marker_median'].astype(float) / df['bin_marker_median'].min()
    return df

by = ['condition','construct','construct2','biorep','exp']
df_binned = df_binned.groupby(by)[df_binned.columns].apply(normalize_output).reset_index(drop=True)
display(df_binned)

# Cache data
df_binned.loc[:, ~df_binned.columns.isin(['color','bin_marker'])].to_parquet(rd.outfile(output_path/('df_binned.gzip')))

In [None]:
plot_df = df_binned[~(df_binned['construct2'].isin(['none','JX00']))]
g = sns.relplot(data=plot_df, row='biorep', col='condition', facet_kws=dict(sharex=True, sharey=True, margin_titles=True,), kind='line',
                    height=3, aspect=1.3, x='bin_marker_median', y='mGL-A', hue='construct',
                    palette=construct_palette, legend=False, dashes=False, style='construct', markers=construct_markers,
                    estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)))
g.set(xscale='log', yscale='log',)
sns.despine()
for name, ax in g.axes_dict.items(): ax.grid()
g.figure.savefig(rd.outfile(output_path/(f'line_iRFP-mGL.svg')), bbox_inches='tight')

mGL expression tracks closely with iRFP expression, indicating that the transfection marker can be used as a proxy for miR amount. (As expected for co-transfections)

In [None]:
plot_df = df_binned[~(df_binned['construct2'].isin(['none','JX00']))]
g = sns.relplot(data=plot_df, row='biorep', col='group', facet_kws=dict(sharex=True, sharey=True, margin_titles=True,), kind='line',
                    height=3, aspect=1.3, x='bin_marker_median', y='output', hue='construct',
                    palette=construct_palette, legend=False, dashes=False, style='construct', markers=construct_markers,
                    estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)))
g.set(xscale='log', yscale='log',)
sns.despine()
for name, ax in g.axes_dict.items(): ax.grid()
g.figure.savefig(rd.outfile(output_path/(f'line_marker-output.svg')), bbox_inches='tight')

In [None]:
plot_df = df_binned[df_binned['biorep']==2]
g = sns.relplot(data=plot_df, row='construct2', col='group', facet_kws=dict(sharex=True, sharey=True, margin_titles=True,), kind='line',
                    height=3, aspect=1.3, x='bin_marker_median', y='output', hue='construct',
                    palette=construct_palette, legend=False, dashes=False, style='construct', markers=construct_markers,
                    estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)))
g.set(xscale='log', yscale='log',)
sns.despine()
for name, ax in g.axes_dict.items(): ax.grid()
g.figure.savefig(rd.outfile(output_path/(f'line_marker-output_biorep2.svg')), bbox_inches='tight')

In [None]:
plot_df = df_binned
label = {'none': 'dual-transcript', 'RC239': 'single-transcript', 'RC227': 'dual-vector (EF1a)', 'JX00': 'dual-vector (U6)'}
g = sns.relplot(data=plot_df, row='biorep', col='construct2', facet_kws=dict(sharex=True, sharey=True, margin_titles=True,), kind='line',
                    height=4, aspect=1, x='bin_marker_median', y='output', hue='construct',
                    palette=construct_palette, legend=False, dashes=False, style='construct', markers=construct_markers,
                    estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)))
g.set(xscale='log', yscale='log',)
sns.despine()
for (biorep, construct2), ax in g.axes_dict.items(): 
    ax.grid()
    if biorep==1: ax.set(title=label[construct2])
g.figure.savefig(rd.outfile(output_path/(f'line_marker-output_construct2.svg')), bbox_inches='tight')

In [None]:
plot_df = stats[stats['group']!='marker']
plot_df.sort_values(['construct2', 'design','ts_kind'], inplace=True)
stat_list = ['output_gmean', 'output_std', 'output_variation']
fig, axes = plt.subplots(3,1, figsize=(12,12), gridspec_kw=dict(hspace=0.6))
for i,stat in enumerate(stat_list):
    ax = axes[i]
    sns.scatterplot(data=plot_df, x='condition', y=stat, hue='construct', palette=construct_palette,
                legend=False, s=100, ax=ax, style='construct', markers=construct_markers)
    ax.set(xlabel='')
    if stat != 'output_variation': ax.set(yscale='log')
    else: ax.set(ylim=(0,ax.get_ylim()[1]))
    rd.plot.generate_xticklabels(plot_df.drop_duplicates('condition'), 'condition', ['gene', 'ts_kind', 'ts_loc'], ax=ax)
    sns.despine(ax=ax)

fig.savefig(rd.outfile(output_path/(f'stat_controller.svg')), bbox_inches='tight')

### Try making plots for figures

In [None]:
df['bin_marker_quantiles'] = df.groupby(['condition','construct','construct2','exp','biorep'])['marker'].transform(lambda x: pd.qcut(x, q=20))
quantiles = df.groupby(['condition','construct','construct2','exp','biorep','bin_marker_quantiles'])['marker'].median().rename('bin_marker_quantiles_median').reset_index()
df_quantiles = df.merge(quantiles, how='left', on=['condition','construct','construct2','exp','biorep','bin_marker_quantiles'])
display(df_quantiles)

In [None]:
# Group and compute stats
stat_list = [np.mean, np.std, sp.stats.gmean, sp.stats.gstd, sp.stats.variation]
grouped = df_quantiles.groupby(by=['gene','condition','construct','construct2','biorep','exp','bin_marker_quantiles_median'])
stats_quantiles = grouped[['marker','output']].agg(stat_list).reset_index().dropna()

# Rename columns as 'col_stat'
stats_quantiles.columns = stats_quantiles.columns.map(lambda i: base.rename_multilevel_cols(i))
stats_quantiles['count'] = grouped['marker'].count().reset_index()['marker']
stats_quantiles = stats_quantiles.merge(metadata, how='left', on='construct')
stats_quantiles['gene2'] = stats_quantiles['gene'] + '_' + stats_quantiles['design'].astype(str)
display(stats_quantiles)

stats['gene2'] = stats['gene'] + '_' + stats['design'].astype(str)

In [None]:
fig, axes = plt.subplots(2,3, figsize=(10,7), gridspec_kw=dict(hspace=0.3, wspace=0.2))
for i, ax in enumerate(axes.flatten()):
    plot_df = df_quantiles[df_quantiles['biorep']==2]
    if i<3: 
        title = 'single-transcript\ndesign '+str(i+1)
        plot_df = plot_df[(plot_df['gene']=='1T') & (plot_df['design']==i+1)]
    elif i==3: 
        title = 'dual-transcript'
        plot_df = plot_df[(plot_df['gene']=='2T') & (plot_df['ts_loc']=='3\'')]
    elif i==4: 
        title = 'dual-vector, EF1a'
        plot_df = plot_df[(plot_df['gene']=='2V') & (plot_df['construct2_promoter']=='EF1a') & (plot_df['ts_loc']=='3\'')]
    else: 
        title = 'dual-vector, U6'
        plot_df = plot_df[(plot_df['gene']=='2V') & (plot_df['construct2_promoter']=='U6') & (plot_df['ts_loc']=='3\'')]

    sns.lineplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='construct', palette=construct_palette, 
                 legend=False, dashes=False, style='construct', markers=construct_markers, ax=ax,
                 estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)))
    ax.set(xscale='log', yscale='log',xlim=(2e2,2e4), ylim=(1e1,1e5), xlabel='marker', title=title)
    sns.despine(ax=ax)
    ax.axhline(sp.stats.gmean(df_quantiles.loc[(df_quantiles['construct']=='GEEC555') & (df_quantiles['biorep']==2), 'output']), color='black')
    ax.minorticks_on()
    if not(i==0 or i==3): ax.set(ylabel='', yticklabels=['']*(len(ax.get_yticklabels())-1))
    if i<3: ax.set(xlabel='', xticklabels=['']*(len(ax.get_yticklabels())-1))

fig.savefig(rd.outfile(output_path/(f'line_compare.svg')), bbox_inches='tight')

In [None]:
# Create new marker palette for comparing designs
metadata3 = metadata.copy()
metadata3.loc[(metadata3['ts_kind']=='T') & (metadata3['design']==1), 'color'] = base.colors['red']
metadata3.loc[(metadata3['ts_kind']=='T') & (metadata3['design']==2), 'color'] = base.colors['orange']
metadata3.loc[(metadata3['ts_kind']=='T') & (metadata3['design']==3), 'color'] = base.colors['teal']
metadata3.loc[(metadata3['design']==3) & (metadata3['ts_kind']=='NT'), 'color'] = metadata3.loc[(metadata3['design']==3) & (metadata['ts_kind']=='NT'), 'color'].apply(base.get_light_color).apply(base.get_light_color)
metadata3.loc[(metadata3['design']==2) & (metadata3['ts_kind']=='NT'), 'color'] = metadata3.loc[(metadata3['design']==2) & (metadata['ts_kind']=='NT'), 'color'].apply(base.get_light_color)
metadata3.loc[(metadata3['design']==1) & (metadata3['ts_kind']=='NT'), 'color'] = base.colors['gray']
metadata3.loc[(metadata3['design']==1) & (metadata3['ts_kind']=='NT'), 'markers'] = 'X'
metadata3.loc[(metadata3['design']==2) & (metadata3['ts_kind']=='NT'), 'markers'] = 'P'
metadata3.loc[(metadata3['design']==3) & (metadata3['ts_kind']=='NT'), 'markers'] = 'v'
metadata3.loc[(metadata3['design']==1) & (metadata3['ts_kind']=='T'), 'markers'] = 'o'
metadata3.loc[(metadata3['design']==2) & (metadata3['ts_kind']=='T'), 'markers'] = 'D'
metadata3.loc[(metadata3['design']==3) & (metadata3['ts_kind']=='T'), 'markers'] = '^'
metadata_dict3 = metadata3.set_index('construct').to_dict('dict')
designs_palette = metadata_dict3['color']
designs_markers = metadata_dict3['markers']

In [None]:
# Population-level stats
plot_df = stats[stats['ts_kind'].isin(['T','NT']) & ((stats['ts_loc']=='3\'') | (stats['design']>0)) & (stats['construct2_promoter']!='U6')]
stat_list = ['output_gmean', 'output_std', 'output_variation']
fig, axes = plt.subplots(1,3, figsize=(10,4), gridspec_kw=dict(wspace=0.6))
for i,stat in enumerate(stat_list):
    ax = axes[i]
    sns.scatterplot(data=plot_df, x='gene2', y=stat, hue='construct', palette=construct_palette,
                legend=False, s=100, ax=ax, style='construct', markers=construct_markers)
    ax.set(xlabel='')
    rd.plot.generate_xticklabels(plot_df.drop_duplicates('gene2'), 'gene2', ['gene', 'design'], ax=ax)
    sns.despine(ax=ax)
    ax.axhline(stats.loc[stats['construct']=='GEEC555', stat].mean(), color='black', zorder=0)
    ax.axhline(stats.loc[(stats['gene']=='1T') & (stats['design']==0), stat].mean(), color=base.colors['gray'], zorder=0)
    if stat != 'output_variation': ax.set(yscale='log')
    else: ax.set(ylim=(0,ax.get_ylim()[1]))

fig.savefig(rd.outfile(output_path/(f'stat_population-compare.svg')), bbox_inches='tight')

In [None]:
# Compute fold change NT:T of highest bin
stat_list = ['output_gmean', 'output_std', 'output_variation']
by = ['gene','condition','construct','construct2','ts_kind','biorep','exp']
stats_quantiles_highest = stats_quantiles.groupby(by=by)['bin_marker_quantiles_median'].max().reset_index()
stats_quantiles_highest = stats_quantiles_highest.merge(stats_quantiles, how='left', on=by+['bin_marker_quantiles_median'])
display(stats_quantiles_highest)

def get_fc_T_NT(df):
    result = df.set_index('ts_kind').transpose()
    result['fc'] = result['T'] / result['NT']
    result = result[['fc']].transpose().rename(columns=lambda x: x+'_fc').reset_index(drop=True)
    return result

by = ['gene','design','gene2','construct2','ts_loc','biorep','exp']
fc_quantiles_highest = stats_quantiles_highest[stats_quantiles_highest['ts_kind'].isin(['NT','T'])].groupby(by=by)[stat_list+['ts_kind']].apply(get_fc).reset_index()
fc_quantiles_highest = fc_quantiles_highest.merge(metadata_construct2, how='left', on='construct2')
display(fc_quantiles_highest)

In [None]:
# Compute fold change low:high bin
def get_fc_low_high_bin(df):
    d = df.copy()
    low_bin = d['bin_marker_quantiles_median'].min()
    high_bin = d['bin_marker_quantiles_median'].max()
    d['bin'] = d['bin_marker_quantiles_median'].map({low_bin: 'low', high_bin: 'high'})
    result = d.dropna().set_index('bin').transpose()
    result['fc'] = result['high'] / result['low']
    result = result[['fc']].transpose().rename(columns=lambda x: x+'_fc').reset_index(drop=True)
    return result

stat_list = ['output_gmean', 'output_std', 'output_variation']
by = ['gene','gene2','condition','construct','construct2','ts_kind','biorep','exp']
fc_quantiles_bin = stats_quantiles.groupby(by=by)[stat_list+['bin_marker_quantiles_median']].apply(get_fc_low_high_bin).reset_index()

fc_quantiles_bin = fc_quantiles_bin.merge(metadata, how='left', on='construct')
fc_quantiles_bin = fc_quantiles_bin.merge(metadata_construct2, how='left', on='construct2')
display(fc_quantiles_bin)

In [None]:
# Compute slope in logspace
def get_slope(df, x, y): 
    d = df.copy()
    low_bin = d['bin_marker_quantiles_median'].min()
    high_bin = d['bin_marker_quantiles_median'].max()
    d['bin'] = d['bin_marker_quantiles_median'].map({low_bin: 'low', high_bin: 'high'})
    d = d.dropna()
    d['x_log'] = d[x].apply(np.log10)
    d['y_log'] = d[y].apply(np.log10)
    d = d.set_index('bin').transpose()
    d['difference'] = d['high'] - d['low']
    d = d.transpose()
    d['slope'] = d['y_log'] / d['x_log']
    return d.loc['difference','slope']
    
slope = stats_quantiles.groupby(by=by)[['output_gmean','bin_marker_quantiles_median']].apply(get_slope, 'bin_marker_quantiles_median', 'output_gmean').rename('slope').reset_index()

slope = slope.merge(metadata, how='left', on='construct')
slope = slope.merge(metadata_construct2, how='left', on='construct2')
display(slope)

In [None]:
gene2_palette = {
    '1T_1': base.colors['teal'],
    '1T_2': base.colors['orange'],
    '1T_3': base.colors['red'],
    '2T_0': base.colors['green'],
    '2V_0': base.colors['purple'],
}

In [None]:
# Fold-change stat
df_list = [fc_quantiles_highest, fc_quantiles_bin, slope]
stat_list = ['output_gmean_fc', 'output_gmean_fc', 'slope']
fig, axes = plt.subplots(1,3, figsize=(12,4), gridspec_kw=dict(wspace=0.6))

# first panel
ax = axes[0]
plot_df = fc_quantiles_highest
stat = 'output_gmean_fc'

plot_df = plot_df[((plot_df['ts_loc']=='3\'') | (plot_df['design']>0)) & (plot_df['construct2_promoter']!='U6')]
sns.scatterplot(data=plot_df, x='gene2', y=stat, hue='gene2', palette=gene2_palette,
                legend=False, s=100, ax=ax, )
ax.set(xlabel='', yscale='log', title='Fold change of output gmean\nT:NT constructs')
sns.despine(ax=ax)
rd.plot.generate_xticklabels(plot_df.drop_duplicates('gene2'), 'gene2', ['gene_x', 'design'], ax=ax)

# second panel
ax = axes[1]
plot_df = fc_quantiles_bin
stat = 'output_gmean_fc'

plot_df = plot_df[plot_df['ts_kind_x'].isin(['T','NT']) & ((plot_df['ts_loc']=='3\'') | (plot_df['design']>0)) & (plot_df['construct2_promoter']!='U6')]
sns.scatterplot(data=plot_df, x='gene2', y=stat, hue='construct', palette=construct_palette,
                legend=False, s=100, ax=ax, style='construct', markers=construct_markers)
ax.set(xlabel='', yscale='log',title='Fold change of output gmean\nlow:high marker bin')
sns.despine(ax=ax)
rd.plot.generate_xticklabels(plot_df.drop_duplicates('gene2'), 'gene2', ['gene_x', 'design'], ax=ax)

# third panel
ax = axes[2]
plot_df = slope
stat = 'slope'

plot_df = plot_df[plot_df['ts_kind_x'].isin(['T','NT']) & ((plot_df['ts_loc']=='3\'') | (plot_df['design']>0)) & (plot_df['construct2_promoter']!='U6')]
sns.scatterplot(data=plot_df, x='gene2', y=stat, hue='construct', palette=construct_palette,
                legend=False, s=100, ax=ax, style='construct', markers=construct_markers)
ax.set(xlabel='', yscale='log',title='Slope in logspace\nfrom low to high bin')
sns.despine(ax=ax)
rd.plot.generate_xticklabels(plot_df.drop_duplicates('gene2'), 'gene2', ['gene_x', 'design'], ax=ax)

fig.savefig(rd.outfile(output_path/(f'stat_fc-compare.svg')), bbox_inches='tight')

In [None]:
df_quantiles['gene2'] = df_quantiles['gene'] + '_' + df_quantiles['design'].astype(str)

In [None]:
# CDF
g = sns.kdeplot(data=df_quantiles, x='output', hue='construct', palette=construct_palette,
                cumulative=True, common_norm=False, legend=False, log_scale=True)

In [None]:
def gene2_to_title(name):
    kind_label = {'1T': 'single-transcript', '2T': 'dual-transcript', '2V': 'dual-vector'}
    kind, design = name.split('_')
    if design == '0': return kind_label[kind]
    else: return kind_label[kind] + ', design ' + design

plot_df = df_quantiles[~df_quantiles['group'].isin(['base','marker'])].sort_values('gene2')
g = sns.displot(data=plot_df, x='output', col='gene2', col_wrap=3,
                hue='construct', palette=construct_palette, kind='kde',
                cumulative=True, common_norm=False, legend=False, log_scale=True)
for gene2, ax in g.axes_dict.items():
    sns.kdeplot(data=df_quantiles[df_quantiles['construct']=='GEEC555'], x='output', hue='construct', palette=construct_palette,
                cumulative=True, common_norm=False, legend=False, log_scale=True, ax=ax, ls=':', zorder=0)
    sns.kdeplot(data=df_quantiles[df_quantiles['group']=='base'], x='output', hue='construct', palette=construct_palette,
                cumulative=True, common_norm=False, legend=False, log_scale=True, ax=ax, zorder=0)
    ax.set(title=gene2_to_title(gene2))
    ax.grid(zorder=0)

In [None]:
x = stats_quantiles.loc[(stats_quantiles['gene2']=='1T_1') & (stats_quantiles['ts_kind']=='T') & (stats_quantiles['biorep']==2)].sort_values('bin_marker_quantiles_median')
f = plt.scatter(x['output_gmean'], [float(y)/100 for y in range(5,101,5)])
plt.xscale('log')

### For Katie poster

In [None]:
# Create new marker palette for comparing designs
metadata3 = metadata.copy()
metadata3.loc[(metadata3['ts_kind']=='T') & (metadata3['design']==1), 'color'] = base.colors['red']
metadata3.loc[(metadata3['ts_kind']=='T') & (metadata3['design']==2), 'color'] = base.colors['orange']
metadata3.loc[(metadata3['ts_kind']=='T') & (metadata3['design']==3), 'color'] = base.colors['teal']
metadata3.loc[(metadata3['ts_kind']=='T') & (metadata3['group']=='dual'), 'color'] = '#78AF56' # green
metadata3.loc[(metadata3['ts_kind']=='T') & (metadata3['group'].isin(['ts3','ts5'])), 'color'] = '#7A1378' # magenta
metadata3.loc[(metadata3['ts_kind']=='NT'), 'color'] = base.colors['gray']
metadata3.loc[(metadata3['group'].isin(['dual','ts3','ts5'])) & (metadata3['ts_loc']=='5\''), 'color'] = metadata3.loc[(metadata3['group'].isin(['dual','ts3','ts5'])) & (metadata3['ts_loc']=='5\''), 'color'].apply(base.get_light_color)
#metadata3.loc[(metadata3['design']==3) & (metadata3['ts_kind']=='NT'), 'color'] = metadata3.loc[(metadata3['design']==3) & (metadata['ts_kind']=='NT'), 'color'].apply(base.get_light_color).apply(base.get_light_color)

metadata3.loc[(metadata3['group']=='controller') & (metadata3['ts_kind']=='NT'), 'markers'] = 'X'
metadata3.loc[(metadata3['group'].isin(['dual','ts3','ts5'])) & (metadata3['ts_kind']=='NT'), 'markers'] = 'P'
metadata3.loc[(metadata3['group']=='dual') & (metadata3['ts_kind']=='T'), 'markers'] = 'D'
metadata3.loc[(metadata3['group'].isin(['ts3','ts5'])) & (metadata3['ts_kind']=='T'), 'markers'] = '^'

metadata_dict3 = metadata3.set_index('construct').to_dict('dict')
designs_palette = metadata_dict3['color']
designs_markers = metadata_dict3['markers']

In [None]:
gene2_palette_alt = {
    '1T_1': base.colors['red'],
    '1T_2': base.colors['orange'],
    '1T_3': base.colors['teal'],
    '2T_0': base.colors['green'],
    '2V_0': base.colors['purple'],
}

df_quantiles['gene2'] = df_quantiles['gene'] + '_' + df_quantiles['design'].astype(str)

In [None]:
fig, axes = plt.subplots(2,3, figsize=(15,10), gridspec_kw=dict(hspace=0.3, wspace=0.2))
for i, ax in enumerate(axes.flatten()):
    plot_df = df_quantiles[df_quantiles['biorep']==2]
    if i<3: 
        title = 'single-transcript\ndesign '+str(i+1)
        plot_df = plot_df[(plot_df['gene']=='1T') & (plot_df['design']==i+1)]
    elif i==3: 
        title = 'dual-transcript'
        plot_df = plot_df[(plot_df['gene']=='2T') & (plot_df['ts']!='na')]
    elif i==4: 
        title = 'dual-vector, EF1a'
        plot_df = plot_df[(plot_df['gene']=='2V') & (plot_df['construct2_promoter']=='EF1a') & (plot_df['ts']!='none')]
        #display(plot_df.drop_duplicates('construct')[['construct','ts','ts_kind','ts_loc']])
    else: 
        title = 'dual-vector, U6'
        plot_df = plot_df[(plot_df['gene']=='2V') & (plot_df['construct2_promoter']=='U6') & (plot_df['ts']!='none')]

    sns.lineplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='construct', palette=designs_palette, 
                 legend=False, dashes=False, style='construct', markers=designs_markers, ax=ax,
                 estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)))
    ax.set(xscale='log', yscale='log',xlim=(2e2,2e4), ylim=(1e1,1e5), xlabel='marker', title=title)
    sns.despine(ax=ax)

    marker_baseline = sp.stats.gmean(df_quantiles.loc[(df_quantiles['construct']=='GEEC555') & (df_quantiles['biorep']==2), 'output'])
    ax.axhline(marker_baseline, color='black', ls=':', zorder=0)
    ax.annotate('marker only', (ax.get_xlim()[1], marker_baseline), ha='right', va='bottom', fontsize=sns.plotting_context('talk')['legend.fontsize'])

    ax.minorticks_on()
    if not(i==0 or i==3): ax.set(ylabel='', yticklabels=['']*(len(ax.get_yticklabels())-1))
    if i<3: ax.set(xlabel='', xticklabels=['']*(len(ax.get_yticklabels())-1))

fig.savefig(rd.outfile(output_path/(f'line_compare-with-ref-lines.svg')), bbox_inches='tight')

In [None]:
# Designs -- for Katie poster
fig, axes = plt.subplots(1,6, figsize=(24,5), sharey=True, gridspec_kw=dict(width_ratios=(1,1,1,0.3,0.3,0.3)))
plot_df = df_quantiles[df_quantiles['biorep']==2]

# dual-transcript
title = 'dual-transcript'
plot_df = df_quantiles[(df_quantiles['gene']=='2T') & (df_quantiles['ts']!='na') & (df_quantiles['biorep']==2)]

ax = axes[0]
sns.lineplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='construct', palette=designs_palette, 
             legend=False, dashes=False, style='construct', markers=designs_markers, ax=ax,
             estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)))
ax.set(xscale='log', yscale='log', xlim=(2e2,2e4), ylim=(2e1,1e5), xlabel='marker', title=title)
sns.despine(ax=ax)
marker_baseline = sp.stats.gmean(df_quantiles.loc[(df_quantiles['group']=='marker') & (df_quantiles['biorep']==2), 'output'])
ax.axhline(marker_baseline, color='black', ls=':')
ax.minorticks_on()
ax.annotate('marker only', (ax.get_xlim()[1], marker_baseline), ha='right', va='bottom', fontsize=sns.plotting_context('talk')['legend.fontsize'])

ax = axes[3]
sns.kdeplot(data=plot_df, y='output', hue='construct', palette=designs_palette, 
             legend=False, log_scale=True, common_norm=False, ax=ax)
sns.despine(ax=ax)
ax.set(xlabel='density', title=title+'\n', xlim=(0,1.2))

# dual-vector, EF1a
title = 'dual-vector, EF1a'
plot_df = df_quantiles[(df_quantiles['gene']=='2V') & (df_quantiles['construct2_promoter']=='EF1a') & (df_quantiles['ts']!='none') & (df_quantiles['biorep']==2)]
ax = axes[1]
sns.lineplot(data=plot_df[(plot_df['output']>1)], x='bin_marker_quantiles_median', y='output', hue='construct', palette=designs_palette, 
             legend=False, dashes=False, style='construct', markers=designs_markers, ax=ax,
             estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)))
ax.set(xscale='log', yscale='log', xlim=(2e2,2e4), ylim=(2e1,1e5), xlabel='marker', title=title)
sns.despine(ax=ax)
marker_baseline = sp.stats.gmean(df_quantiles.loc[(df_quantiles['group']=='marker') & (df_quantiles['biorep']==2), 'output'])
ax.axhline(marker_baseline, color='black', ls=':')
ax.minorticks_on()
ax.annotate('marker only', (ax.get_xlim()[1], marker_baseline), ha='right', va='bottom', fontsize=sns.plotting_context('talk')['legend.fontsize'])

ax = axes[4]
sns.kdeplot(data=plot_df, y='output', hue='construct', palette=designs_palette, 
             legend=False, log_scale=True, common_norm=False, ax=ax)
sns.despine(ax=ax)
ax.set(xlabel='density', title='dual-vector,\nEF1a', xlim=(0,1.2))

# dual-vector, U6
title = 'dual-vector, U6'
plot_df = df_quantiles[(df_quantiles['gene']=='2V') & (df_quantiles['construct2_promoter']=='U6') & (df_quantiles['ts']!='none') & (df_quantiles['biorep']==2)]
ax = axes[2]
sns.lineplot(data=plot_df[(plot_df['output']>1)], x='bin_marker_quantiles_median', y='output', hue='construct', palette=designs_palette, 
             legend=False, dashes=False, style='construct', markers=designs_markers, ax=ax,
             estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)))
ax.set(xscale='log', yscale='log', xlim=(2e2,2e4), ylim=(2e1,1e5), xlabel='marker', title=title)
sns.despine(ax=ax)
marker_baseline = sp.stats.gmean(df_quantiles.loc[(df_quantiles['group']=='marker') & (df_quantiles['biorep']==2), 'output'])
ax.axhline(marker_baseline, color='black', ls=':')
ax.minorticks_on()
ax.annotate('marker only', (ax.get_xlim()[1], marker_baseline), ha='right', va='bottom', fontsize=sns.plotting_context('talk')['legend.fontsize'])

ax = axes[5]
sns.kdeplot(data=plot_df, y='output', hue='construct', palette=designs_palette, 
             legend=False, log_scale=True, common_norm=False, ax=ax)
sns.despine(ax=ax)
ax.set(xlabel='density', title='dual-vector,\nU6', xlim=(0,1.2))

fig.savefig(rd.outfile(output_path/(f'line-marg-hist_two-gene.svg')), bbox_inches='tight')

In [None]:
stats['pair'] = stats['group'] + '_' + stats['design'].astype(str) + '_' + stats['ts_loc'] + '_' + stats['construct2_promoter']
stats['label'] = stats['design']
stats.loc[stats['design']>0, 'label'] = stats.loc[stats['design']>0, 'label'].map(lambda x: 'design '+str(x))
stats.loc[(stats['design']==0) & (stats['ts_loc']=='3\''), 'label'] = '3\'UTR ts'
stats.loc[(stats['design']==0) & (stats['ts_loc']=='5\''), 'label'] = '5\'UTR ts'
stats.loc[stats['construct2_promoter']=='EF1a', 'label'] = stats.loc[stats['construct2_promoter']=='EF1a', 'label'].map(lambda x: str(x)+'\nEF1a')
stats.loc[stats['construct2_promoter']=='U6', 'label'] = stats.loc[stats['construct2_promoter']=='EF1a', 'label'].map(lambda x: str(x)+'\nU6')
display(stats.drop_duplicates('label'))

In [None]:
# Population-level stats
labels = {'output_gmean': 'Geometric mean', 'output_std': 'Standard deviation'}
plot_df = stats[stats['ts_kind'].isin(['T','NT'])].copy()
plot_df.sort_values(['gene','design'], inplace=True)
stat_list = ['output_gmean', 'output_std']
fig, axes = plt.subplots(1,2, figsize=(15,5), gridspec_kw=dict(wspace=0.3))
for i,stat in enumerate(stat_list):
    ax = axes[i]
    sns.scatterplot(data=plot_df, x='pair', y=stat, hue='construct', palette=designs_palette,
                legend=False, s=100, ax=ax, style='construct', markers=designs_markers)
    ax.set(xlabel='', xticklabels=['']*(len(ax.get_xticklabels())-1),ylabel=labels[stat], xlim=(ax.get_xlim()[0], ax.get_xlim()[1]+3), yscale='log')
    sns.despine(ax=ax)

    marker_baseline = stats.loc[stats['construct']=='GEEC555', stat].mean()
    ax.axhline(marker_baseline, color='black', ls=':', zorder=0)
    ax.annotate('marker only', (ax.get_xlim()[1], marker_baseline), ha='right', va='bottom', fontsize=sns.plotting_context('talk')['legend.fontsize'])
    standard_highline = stats.loc[(stats['miR']=='none') & (stats['ts']=='none'), stat].mean()
    ax.axhline(standard_highline, color=base.colors['gray'], ls=':', zorder=0)
    ax.annotate('output only', (ax.get_xlim()[1], standard_highline), ha='right', va='bottom', fontsize=sns.plotting_context('talk')['legend.fontsize'], color=base.colors['gray'])

fig.savefig(rd.outfile(output_path/(f'stat_population-compare_poster.svg')), bbox_inches='tight')