In [None]:
import base
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rushd as rd
import scipy as sp
import seaborn as sns
from statannotations.Annotator import Annotator

# enables concurrent editing of base.py
from importlib import reload
reload(base)

In [None]:
# Set plotting context
sns.set_style('ticks')
sns.set_context('paper', font_scale=1.0, rc=base.rc_context)
plt.rcParams.update(base.rc_params)

In [None]:
# Setup data loading
base_path = rd.datadir/'instruments'/'data'/'attune'/'Albert'

plates = pd.DataFrame({
    'data_path': [base_path/'Exp14'/'export', base_path/'Exp15'/'export', base_path/'Exp16'/'export',],
    'yaml_path': [base_path/'Exp14'/'export'/'wells.yaml']*3,
    'exp': ['Exp14', 'Exp15', 'Exp16'],
    'biorep': [1,2,3]
})
output_path = rd.rootdir/'output'/'straight-in'
cache_path = output_path/'data.gzip'

for p in plates['yaml_path'].unique():
    rd.plot.plot_well_metadata(p)

In [None]:
# Load data
data = pd.DataFrame()
channel_list = ['mScarlet-A','mScarlet2-A','mGreenLantern-A']

if cache_path.exists(): data = pd.read_parquet(cache_path)
else: 
    data = rd.flow.load_groups_with_metadata(plates, columns=channel_list)
    for c in channel_list: data = data[data[c]>0]
    data.to_parquet(rd.outfile(cache_path))
display(data)

In [None]:
plot_df = data.groupby(['exp','construct']).sample(1000)
g = sns.displot(data=plot_df, x='mGreenLantern-A', y='mScarlet-A', hue='exp', 
                kind='kde', col='construct', col_wrap=5,
                common_norm=False, levels=5, log_scale=True)

for _, ax in g.axes_dict.items():
    ax.axvline(1e4, c='black', ls=':', zorder=0)
    ax.axhline(2e3, c='black', ls=':', zorder=0)

In [None]:
plot_df = data
g = sns.displot(data=plot_df, x='mGreenLantern-A', y='mScarlet2-A', hue='construct', 
                kind='kde', col='construct', row='biorep',
                common_norm=False, levels=8, log_scale=True, ylim=(1e2,1e6))

for _, ax in g.axes_dict.items():
    ax.axvline(3e4, c='black', ls=':', zorder=0)
    ax.axhline(2e3, c='black', ls=':', zorder=0)

In [None]:
plot_df = data.groupby(['exp','construct']).sample(1000)
g = sns.displot(data=plot_df, x='mGreenLantern-A', y='mScarlet2-A', hue='exp', 
                kind='kde', col='construct', col_wrap=5,
                common_norm=False, levels=5, log_scale=True)

for _, ax in g.axes_dict.items():
    ax.axvline(1e4, c='black', ls=':', zorder=0)
    ax.axhline(3e3, c='black', ls=':', zorder=0)

In [None]:
# Manually draw gates
gates = pd.DataFrame({
    'mScarlet2-A': [3e3]*3,
    'mGreenLantern-A': [3e4]*3,
    'exp': ['Exp14', 'Exp15', 'Exp16']
})

# Indicate which channels are relevant for each experiment
gates['marker'] = 'mGreenLantern-A'
gates['output'] = 'mScarlet2-A'

In [None]:
# Gate data by marker expression
data = data.groupby('exp')[data.columns].apply(lambda x: base.gate_data(x,gates))
data.reset_index(inplace=True, drop=True)
data['gated'] = data['expressing']

In [None]:
# Bin data and calculate statistics
df_quantiles, df_stats = base.calculate_bins_stats(data[data['gated']])

# Add metadata 
metadata = base.get_metadata(rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'construct-metadata.xlsx')
data = data.merge(metadata, how='left', on='construct')
df_quantiles = df_quantiles.merge(metadata, how='left', on='construct')
df_stats = df_stats.merge(metadata, how='left', on='construct')

In [None]:
g = sns.stripplot(data=df_stats, x='ts_label', y='output_gmean' )

In [None]:
g = sns.stripplot(data=df_stats, x='ts_label', y='slope' )

In [None]:
fig, axes = plt.subplots(1,2, gridspec_kw=dict(width_ratios=(1,0.3)))

biorep = 1
plot_df = df_quantiles[df_quantiles['biorep']==biorep]

# line plot
ax = axes[0]
sns.lineplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='construct', ax=ax)
ax.set(xscale='log', yscale='log', xlabel='marker', )#title='Initial circuit design', ylim=(2e1,1e5))

# histogram
ax = axes[1]
sns.kdeplot(data=plot_df, y='output', hue='construct',
             legend=False, log_scale=True, common_norm=False, ax=ax)
sns.despine(ax=ax, bottom=True)
ax.set(xlabel='', ylim=axes[0].get_ylim(), ylabel='', yticklabels=[])
ax.get_xaxis().set_visible(False)

In [None]:
fig, axes = plt.subplots(1,2, gridspec_kw=dict(width_ratios=(1,0.3)))

biorep = 2
plot_df = df_quantiles[df_quantiles['biorep']==biorep]

# line plot
ax = axes[0]
sns.lineplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='construct', ax=ax)
ax.set(xscale='log', yscale='log', xlabel='marker', )#title='Initial circuit design', ylim=(2e1,1e5))

# histogram
ax = axes[1]
sns.kdeplot(data=plot_df, y='output', hue='construct',
             legend=False, log_scale=True, common_norm=False, ax=ax)
sns.despine(ax=ax, bottom=True)
ax.set(xlabel='', ylim=axes[0].get_ylim(), ylabel='', yticklabels=[])
ax.get_xaxis().set_visible(False)

In [None]:
fig, axes = plt.subplots(1,2, gridspec_kw=dict(width_ratios=(1,0.3)))

biorep = 3
plot_df = df_quantiles[df_quantiles['biorep']==biorep]

# line plot
ax = axes[0]
sns.lineplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='construct', ax=ax)
ax.set(xscale='log', yscale='log', xlabel='marker', )#title='Initial circuit design', ylim=(2e1,1e5))

# histogram
ax = axes[1]
sns.kdeplot(data=plot_df, y='output', hue='construct',
             legend=False, log_scale=True, common_norm=False, ax=ax)
sns.despine(ax=ax, bottom=True)
ax.set(xlabel='', ylim=axes[0].get_ylim(), ylabel='', yticklabels=[])
ax.get_xaxis().set_visible(False)

In [None]:
def get_base_slope(df):
    base_slope = df.loc[df['ts_label']=='base', 'slope']
    if base_slope.empty: result = pd.NA
    else: result = base_slope.values[0]
    df['base_slope'] = result
    return df

df_stats = df_stats.groupby('exp')[df_stats.columns].apply(get_base_slope).reset_index(drop=True)
df_stats['norm_slope'] = df_stats['slope'] / df_stats['base_slope']

In [None]:
g = sns.stripplot(data=df_stats, x='ts_label', y='norm_slope', hue='biorep')

In [None]:
# perform statistical tests
annotate_kwargs = base.annotate_kwargs
f, axes = plt.subplots(1,4, figsize=(5,2))
pairs = [('base','CL'), ('OL','CL')]
stat_list = ['output_gmean', 'output_std', 'slope', 'output_variation']
plot_df = df_stats

for ax, stat in zip(axes, stat_list):
    sns.stripplot(data=plot_df, x='ts_label', y=stat, ax=ax, hue='construct',  legend=False,)
    if stat != 'slope': ax.set(yscale='log',)
    annotator = Annotator(ax, pairs, data=plot_df, x='ts_label', y=stat,)
    annotator.configure(**annotate_kwargs, line_offset=2).apply_and_annotate()

In [None]:
# Create dicts to specify colors/markers
metadata_dict = metadata.set_index('construct').to_dict('dict')
main_palette = metadata_dict['color']
main_markers = metadata_dict['markers']

In [None]:
plot_df = data
g = sns.displot(data=plot_df, x='marker', y='output', hue='construct', palette=main_palette,
                kind='kde', col='construct', row='biorep', facet_kws=dict(margin_titles=True,), legend=False,
                common_norm=False, levels=8, log_scale=True, height=2)

for _, ax in g.axes_dict.items():
    ax.set(ylim=(1e2,1e6), xlim=(1e1,2e6))
    ax.axvline(3e4, c='black', ls=':', zorder=0)
    ax.minorticks_off()
    #ax.axhline(2e3, c='black', ls=':', zorder=0)
g.figure.savefig(rd.outfile(output_path/'joints.png'))

### Load ddPCR integration validation

In [None]:
ddpcr_path = rd.datadir/'projects'/'miR-iFFL'/'STRAIGHT-IN lines'
ddpcr = pd.read_excel(ddpcr_path/'ddPCR.xlsx', header=1,)
display(ddpcr)