# Example experimental analysis

This is how Kasey analyzed several biological replicates for one experiment, from start to finish. In this experiment, base gene, OL circuit, and CL circuit ComMAND constructs were transfected with varying plasmid amounts.

In [None]:
# Import our favorite packages
#import base
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rushd as rd
import scipy as sp
import seaborn as sns

# Enable concurrent editing of `base.py`, a helper script
# (to be added to the tutorial)
# from importlib import reload
# reload(base)

# Set plotting context
sns.set_style('ticks')
sns.set_context('talk',rc={'font.family': 'sans-serif', 'font.sans-serif':['Helvetica Neue']})

### Load data

Load plates from multiple biological replicates into a single DataFrame with associated metadata.

In [None]:
# Define plate-level metadata to load from multiple experiments
base_path = rd.datadir/'instruments'/'data'/'attune'/'kasey'
plates = pd.DataFrame({
    'data_path': [base_path/'2024.07.18_exp100'/'export', base_path/'2024.09.11_exp100.3'/'export', base_path/'2024.09.28_exp100.5'/'export'],
    'yaml_path': [base_path/'2024.07.18_exp100'/'export'/'wells.yaml']*3, # all bioreps have the same plate map
    'biorep': [1, 2, 3],
    'exp': ['exp100.' + str(i) for i in range(1,6,2)],
})

# Define path to save data cache and plots
output_path = rd.rootdir/'output'/'example_plasmid-titration'
cache_path = output_path/'data.gzip'

# Display metadata from yaml file to confirm layouts are correct
for p in plates['yaml_path'].unique():
    rd.plot.plot_well_metadata(p)

In [None]:
data = pd.DataFrame()

# If the cache file exists, load from cache
if cache_path.is_file(): data = pd.read_parquet(cache_path)

# Otherwise, load from the SharePoint
else: 

    # Load relevant channels from all plates
    channel_list = ['mRuby2-A','tagBFP-A','mGL-A']
    data = rd.flow.load_groups_with_metadata(plates, columns=channel_list)

    # Remove negative channel values
    for c in channel_list: data = data[data[c]>0]

    # Save as cache
    data.to_parquet(rd.outfile(cache_path))
    
display(data)

In [None]:
# Add metadata for plasmids (the 'construct' column)
metadata = pd.read_excel(rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'construct-metadata.xlsx')
data = data.merge(metadata, how='left', on='construct')
display(data)

In [None]:
# Create columns with interpretable names,
#  rather than the raw channel names from the Attune
data['marker'] = data['mGL-A']      # transfection marker
data['output'] = data['mRuby2-A']   # circuit output
data['filler'] = data['tagBFP-A']   # "filler" plasmid to keep DNA amount constant when changing amount of output plasmid

In [None]:
# Define color palettes for plotting
main_palette = {
    'na': 'black',  # base gene, untransfected
    'NT': 'grey',   # OL circuit
    'T': 'teal'     # CL circuit
}

# A modified continuous color palette, removing the hard-to-see yellow end
no_yellow_viridis = matplotlib.colors.ListedColormap(matplotlib.colormaps['viridis'](np.linspace(0,0.82,256)))

### Gate transfected cells

Draw gates based on the untransfected population, then gate transfected cells and save in a new DataFrame.

In [None]:
# Compute gates for each experiment separately
gates = data[data['construct']=='UT'].groupby(['exp'])['marker'].apply(lambda x: x.quantile(0.999)).reset_index()

# Add gate for exp100.1 manually (forgot to include untransfected condition)
gates.loc[len(gates.index)] = ['exp100.1', gates['marker'].mean()] 

display(gates)

In [None]:
# Plot 1D histogram to confirm gates look reasonable
g = sns.displot(data=data, x='marker', row='exp', col='construct',
                hue='amount', palette=no_yellow_viridis, kind='kde', hue_norm=matplotlib.colors.LogNorm(),
                facet_kws=dict(margin_titles=True),
                log_scale=True, common_norm=False)

# Add reference lines corresponding to gates
for (exp, construct), ax in g.axes_dict.items():
    ax.axvline(gates.loc[gates.exp==exp, 'marker'].values[0], color='black', zorder=0)

In [None]:
# Also plot 2D distributions
plot_df = data.groupby(['exp','construct','amount']).sample(1000)
g = sns.displot(data=plot_df, x='marker', y='output', hue='amount', palette=no_yellow_viridis, kind='kde',
                hue_norm=matplotlib.colors.LogNorm(), row='exp', col='construct', facet_kws=dict(margin_titles=True),
                log_scale=True, common_norm=False, levels=5)

# Add reference lines corresponding to gates
for (exp, construct), ax in g.axes_dict.items():
    ax.axvline(gates.loc[gates.exp==exp, 'marker'].values[0], color='black', zorder=0)

In [None]:
# Gate on marker expression
def gate_data(df):
    gate = gates.loc[gates.exp==df.exp.values[0], 'marker'].values[0]
    return data[(data['mGL-A']>gate) & (data.construct!='UT')]

transfected = data.groupby('exp')[data.columns].apply(gate_data).reset_index(drop=True)

### Explore data distributions

Plot 2D distributions across several facets to explore data.

In [None]:
# TODO

### Calculate summary statistics

Calculate geometric mean, etc. for each condition to visualize trends succinctly.

In [None]:
# TODO

# Old

In [None]:
plot_df = data.groupby(['exp','construct','amount']).sample(1000)
g = sns.displot(data=plot_df, x='marker', y='filler', hue='amount', palette=no_yellow_viridis, kind='kde',
                hue_norm=matplotlib.colors.LogNorm(), row='exp', col='construct', facet_kws=dict(margin_titles=True),
                log_scale=True, common_norm=False, levels=5)


In [None]:
plot_df = data
g = sns.displot(data=plot_df, x='marker', hue='amount', palette=no_yellow_viridis, kind='kde',
                hue_norm=matplotlib.colors.LogNorm(), row='exp', col='construct', facet_kws=dict(margin_titles=True),
                log_scale=True, common_norm=False)
g.set(xlim=(2e0,1e5))

In [None]:
plot_df = data
g = sns.displot(data=plot_df, x='filler', hue='amount', palette=no_yellow_viridis, kind='kde',
                hue_norm=matplotlib.colors.LogNorm(), row='exp', col='construct', facet_kws=dict(margin_titles=True),
                log_scale=True, common_norm=False)
g.set(xlim=(2e0,1e6))

In [None]:
plot_df = data
g = sns.displot(data=plot_df, x='output', hue='amount', palette=no_yellow_viridis, kind='kde',
                hue_norm=matplotlib.colors.LogNorm(), row='exp', col='construct', facet_kws=dict(margin_titles=True),
                log_scale=True, common_norm=False)
g.set(xlim=(2e0,1e6))

In [None]:
# Categorize cells into quadrants based on two gates
# Possible values:
#   0 = double negative
#   1 = x-positive
#   2 = y-positive
#   3 = double positive
def get_quadrant(x,y,gate_x,gate_y):
    df_quad = pd.DataFrame()
    df_quad['x'] = x > gate_x
    df_quad['y'] = y > gate_y
    df_quad['quadrant'] = df_quad['x'].astype(int) + df_quad['y'].astype(int)*2
    return df_quad['quadrant']

df_list = []
for exp, group in data.groupby('exp'): 
    df = group.copy()
    df['quadrant'] = get_quadrant(group['marker'], group['filler'], 
                                  gates.loc[gates['exp']==exp, 'marker'].values[0],
                                  gates.loc[gates['exp']==exp, 'filler'].values[0],)
    df_list.append(df)
df = pd.concat(df_list)
display(df)

In [None]:
df_transfected = df[(df['quadrant']>0) & (df['construct']!='UT')]
df_gated = df[(df['quadrant'].isin([1,3])) & (df['construct']!='UT')]

In [None]:
plot_df = df_transfected
g = sns.displot(data=plot_df, x='output', hue='amount', palette=no_yellow_viridis, kind='kde',
                hue_norm=matplotlib.colors.LogNorm(), row='exp', col='construct', facet_kws=dict(margin_titles=True),
                log_scale=True, common_norm=False)
g.set(xlim=(2e0,1e6))

In [None]:
plot_df = df_transfected.groupby(['exp','construct','amount']).sample(1000)
g = sns.displot(data=plot_df, x='marker', y='output', hue='amount', palette=no_yellow_viridis, kind='kde',
                hue_norm=matplotlib.colors.LogNorm(), row='exp', col='construct', facet_kws=dict(margin_titles=True),
                log_scale=True, common_norm=False, levels=5)

In [None]:
plot_df = df_gated
g = sns.displot(data=plot_df, x='marker', y='output', hue='amount', palette=no_yellow_viridis, kind='kde',
                hue_norm=matplotlib.colors.LogNorm(), row='exp', col='construct', facet_kws=dict(margin_titles=True),
                log_scale=True, common_norm=False, levels=5)

### Plot data for experiments with constant marker
(exp100.1, 100.3, 100.5)

In [None]:
# Bin data and calculate statistics
df_quantiles, stats, _, fits = base.calculate_bins_stats(df_gated, by=['construct','amount','exp','biorep'])
stats = stats.merge(metadata, how='left', on='construct')
fits = fits.merge(metadata, how='left', on='construct')

In [None]:
plot_df = df_quantiles[(df_quantiles['construct']!='AIO.039') & (df_quantiles['exp']=='exp100.1')]
g = sns.relplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='construct', palette=main_palette, 
             legend=False, dashes=False, style='construct', markers=main_markers, kind='line', markersize=10, markeredgewidth=1,
             estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)),
             col='amount')
g.set(xscale='log', yscale='log', xlabel='marker', ylim=(2e1,1e5))
marker_baseline = stats.loc[(stats['exp']=='exp100.1') & (stats['construct']=='AIO.039'), 'output_gmean'].mean()
for _, ax in g.axes_dict.items(): ax.axhline(marker_baseline, color='black', ls=':', zorder=0)
g.figure.savefig(rd.outfile(output_path/'line_exp100_by-amount.svg'))

In [None]:
plot_df = df_quantiles[(df_quantiles['construct']!='AIO.039') & (df_quantiles['exp']=='exp100.3')]
g = sns.relplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='construct', palette=main_palette, 
             legend=False, dashes=False, style='construct', markers=main_markers, kind='line', markersize=10, markeredgewidth=1,
             estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)),
             col='amount')
g.set(xscale='log', yscale='log', xlabel='marker', ylim=(2e1,1e5))
marker_baseline = stats.loc[(stats['exp']=='exp100.3') & (stats['construct']=='AIO.039'), 'output_gmean'].mean()
for _, ax in g.axes_dict.items(): ax.axhline(marker_baseline, color='black', ls=':', zorder=0)
g.figure.savefig(rd.outfile(output_path/'line_exp100.3_by-amount.svg'))

In [None]:
plot_df = df_quantiles[(df_quantiles['construct']!='AIO.039') & (df_quantiles['exp']=='exp100.5')]
g = sns.relplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='construct', palette=main_palette, 
             legend=False, dashes=False, style='construct', markers=main_markers, kind='line', markersize=10, markeredgewidth=1,
             estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)),
             col='amount')
g.set(xscale='log', yscale='log', xlabel='marker', ylim=(2e1,1e5))
marker_baseline = stats.loc[(stats['exp']=='exp100.5') & (stats['construct']=='AIO.039'), 'output_gmean'].mean()
for _, ax in g.axes_dict.items(): ax.axhline(marker_baseline, color='black', ls=':', zorder=0)
g.figure.savefig(rd.outfile(output_path/'line_exp100.5_by-amount.svg'))

In [None]:
label = {
    'RC140': 'base',
    'RC142': 'OL circuit',
    'RC143': 'CL circuit',
}
df_quantiles['label'] = df_quantiles['construct'].map(label)
stats['label'] = stats['construct'].map(label)
fits['label'] = fits['construct'].map(label)

In [None]:
plot_df = df_quantiles[(df_quantiles['construct']!='AIO.039') & (df_quantiles['exp']=='exp100.1')].sort_values('construct')
g = sns.relplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='amount', palette=no_yellow_viridis, 
                hue_norm=matplotlib.colors.LogNorm(),
                legend=False, dashes=False, style='construct', markers=main_markers, kind='line', markersize=10, markeredgewidth=1,
                estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)),
                col='label')
g.set(xscale='log', yscale='log', xlabel='marker', ylim=(2e1,1e5))
g.set_titles('{col_name}')
marker_baseline = stats.loc[(stats['exp']=='exp100.1') & (stats['construct']=='AIO.039'), 'output_gmean'].mean()
for _, ax in g.axes_dict.items(): ax.axhline(marker_baseline, color='black', ls=':', zorder=0)
g.figure.savefig(rd.outfile(output_path/'line_exp100_by-construct.svg'))

In [None]:
plot_df = df_quantiles[(df_quantiles['construct']!='AIO.039') & (df_quantiles['exp']=='exp100.3')].sort_values('construct')
g = sns.relplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='amount', palette=no_yellow_viridis, 
                hue_norm=matplotlib.colors.LogNorm(),
                legend=False, dashes=False, style='construct', markers=main_markers, kind='line', markersize=10, markeredgewidth=1,
                estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)),
                col='label')
g.set(xscale='log', yscale='log', xlabel='marker', ylim=(2e1,1e5))
g.set_titles('{col_name}')
marker_baseline = stats.loc[(stats['exp']=='exp100.3') & (stats['construct']=='AIO.039'), 'output_gmean'].mean()
for _, ax in g.axes_dict.items(): ax.axhline(marker_baseline, color='black', ls=':', zorder=0)
g.figure.savefig(rd.outfile(output_path/'line_exp100.3_by-construct.svg'))

In [None]:
plot_df = df_quantiles[(df_quantiles['construct']!='AIO.039') & (df_quantiles['exp']=='exp100.5')].sort_values('construct')
g = sns.relplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='amount', palette=no_yellow_viridis, 
                hue_norm=matplotlib.colors.LogNorm(),
                legend=False, dashes=False, style='construct', markers=main_markers, kind='line', markersize=10, markeredgewidth=1,
                estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)),
                col='label')
g.set(xscale='log', yscale='log', xlabel='marker', ylim=(2e1,1e5))
g.set_titles('{col_name}')
marker_baseline = stats.loc[(stats['exp']=='exp100.5') & (stats['construct']=='AIO.039'), 'output_gmean'].mean()
for _, ax in g.axes_dict.items(): ax.axhline(marker_baseline, color='black', ls=':', zorder=0)
g.figure.savefig(rd.outfile(output_path/'line_exp100.5_by-construct.svg'))

In [None]:
plot_df = fits[(fits['construct']!='AIO.039') & (fits['exp']=='exp100.1')].sort_values('construct')

fig, axes = plt.subplots(1,2, figsize=(10,5), sharey=True,)
sns.scatterplot(data=plot_df, x='label', y='slope', hue='amount', palette=no_yellow_viridis, 
                hue_norm=matplotlib.colors.LogNorm(), legend=False, ax=axes[0])
axes[0].set(xlabel='', ylim=(0,1.2), xlim=(-0.5,2.5))

sns.scatterplot(data=plot_df, x='amount', y='slope', hue='construct', palette=main_palette,
                style='construct', markers=main_markers, legend=False, ax=axes[1])
axes[1].set(ylim=(0,1.2))
sns.despine()

fig.savefig(rd.outfile(output_path/'scatter_exp100_slopes.svg'), bbox_inches='tight')

In [None]:
plot_df = fits[(fits['construct']!='AIO.039') & (fits['exp'].isin(['exp100.1', 'exp100.3', 'exp100.5']))].sort_values('construct')

fig, axes = plt.subplots(1,2, figsize=(10,5), sharey=True,)
sns.stripplot(data=plot_df, x='label', y='slope', hue='amount', palette=no_yellow_viridis, 
                hue_norm=matplotlib.colors.LogNorm(), legend=False, ax=axes[0], dodge=True, jitter=False,
                size=9, linewidth=1, edgecolor='white')
axes[0].set(xlabel='', ylim=(0,1.2), xlim=(-0.5,2.5))

sns.scatterplot(data=plot_df, x='amount', y='slope', hue='construct', palette=main_palette,
                style='construct', markers=main_markers, legend=False, ax=axes[1])
axes[1].set(ylim=(0,1.2))
sns.despine()

fig.savefig(rd.outfile(output_path/'scatter_slopes.svg'), bbox_inches='tight')