In [None]:
import base
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path
import rushd as rd
import scipy as sp
import seaborn as sns

from importlib import reload
reload(base)

sns.set_style('ticks')
sns.set_context('talk',rc={'font.family': 'sans-serif', 'font.sans-serif':['Helvetica Neue']})

In [None]:
# Setup data loading
base_path = rd.datadir/'instruments'/'data'/'attune'/'kasey'
data_paths = [base_path/'2024.11.07_exp119'/'export', base_path/'2024.11.10_exp119_2'/'export', base_path/'2024.11.12_exp119.2'/'export', base_path/'2024.11.14_exp119.3'/'export']
plates1 = pd.DataFrame({
    'data_path': data_paths[:2],
    'yaml_path': [p/'wells.yaml' for p in data_paths[:2]],
    'exp': ['exp119', 'exp119_2']
})
plates2 = pd.DataFrame({
    'data_path': data_paths[2:],
    'yaml_path': [p/'wells.yaml' for p in data_paths[2:]],
    'exp': ['exp119.2', 'exp119.3']
})

output_path = rd.rootdir/'output'/'new_piggybac'
cache_path = output_path/'new_piggybac.gzip'

for p in plates1['yaml_path'].unique():
    rd.plot.plot_well_metadata(p)
for p in plates2['yaml_path'].unique():
    rd.plot.plot_well_metadata(p)

In [None]:
# Load data
data = pd.DataFrame()
channel_list = ['mRuby2-A','mGL-A']

if cache_path.exists(): data = pd.read_parquet(cache_path)
else: 
    data1 = rd.flow.load_groups_with_metadata(plates1, columns=channel_list)
    data2 = rd.flow.load_groups_with_metadata(plates2, columns=channel_list+['mCherry-A'])
    data2.rename(columns={'mRuby2-A': 'YL1-A', 'mCherry-A': 'mRuby2-A'}, inplace=True)

    data = pd.concat([data1,data2], ignore_index=True)

    for c in channel_list: data = data[data[c]>0]
    data.to_parquet(rd.outfile(cache_path))
display(data)

In [None]:
# Add metadata for constructs
metadata = base.get_metadata(rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'construct-metadata.xlsx')
data = data.merge(metadata, how='left', on='construct')

# Create dicts to specify colors/markers
metadata_dict = metadata.set_index('construct').to_dict('dict')
main_palette = metadata_dict['color']
main_markers = metadata_dict['markers']

In [None]:
data['marker'] = data['mGL-A']
data['output'] = data['mRuby2-A']

In [None]:
plot_df = data.groupby(['sort','biorep','construct']).sample(1000)
g = sns.displot(data=plot_df, x='marker', y='output', hue='biorep', kind='kde',
                row='sort', col='construct', facet_kws=dict(margin_titles=True),
                log_scale=True, common_norm=False, levels=8)

for (biorep, construct), ax in g.axes_dict.items():
    ax.axvline(2e2, c='black', ls=':', zorder=0)

In [None]:
gates = pd.DataFrame({
    'mRuby2-A': [2e2]*4,
    'mGL-A': [2e2]*4,
    'exp': ['exp119', 'exp119_2', 'exp119.2', 'exp119.3']
})

gates['marker'] = 'mGL-A'
gates['output'] = 'mRuby2-A'
display(gates)

In [None]:
data = data.groupby('exp')[data.columns].apply(lambda x: base.gate_data(x,gates))
data.reset_index(inplace=True, drop=True)
df = data[(data['expressing'])]
display(df)

In [None]:
plot_df = df.groupby(['sort','biorep','construct']).sample(1000)
g = sns.displot(data=plot_df, x='marker', y='output', hue='biorep', kind='kde',
                row='sort', col='construct', facet_kws=dict(margin_titles=True),
                log_scale=True, common_norm=False, levels=8)

for (biorep, construct), ax in g.axes_dict.items():
    ax.axvline(2e2, c='black', ls=':', zorder=0)

In [None]:
# Bin data and calculate statistics
df_quantiles, stats, _, fits = base.calculate_bins_stats(df, stat_list=[sp.stats.gmean, np.std, sp.stats.variation], by=['construct','exp','sort','biorep'])
stats = stats.merge(metadata, how='left', on='construct')
fits = fits.merge(metadata, how='left', on='construct')

In [None]:
sort_num = 2
biorep = 1
plot_df = df_quantiles[(df_quantiles['sort']==sort_num) & (df_quantiles['biorep']==biorep)]
fig, axes = plt.subplots(1,2, gridspec_kw=dict(width_ratios=(1,0.3)))

# line plot
ax = axes[0]
sns.lineplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='construct', palette=main_palette, 
             legend=False, dashes=False, style='construct', markers=main_markers, ax=ax, markersize=9, markeredgewidth=1,
             estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)))
ax.set(xscale='log', yscale='log', xlabel='marker',)
sns.despine(ax=ax)
marker_baseline = sp.stats.gmean(df_quantiles.loc[(df_quantiles['group']=='marker') & (df_quantiles['biorep']==biorep), 'output'])
ax.axhline(marker_baseline, color='black', ls=':')
ax.annotate('marker only', (ax.get_xlim()[1], marker_baseline), ha='right', va='bottom')

# histogram
ax = axes[1]
sns.kdeplot(data=plot_df, y='output', hue='construct', palette=main_palette, 
             legend=False, log_scale=True, common_norm=False, ax=ax)
sns.despine(ax=ax, bottom=True)
ax.set(xlabel='', ylim=axes[0].get_ylim(), ylabel='', yticklabels=[])
ax.get_xaxis().set_visible(False)

In [None]:
ts_label = {'na': 'base', 'NT': 'OL', 'T': 'CL', 'none': '–'}
marker_list = ['o', 'v', 'D']

In [None]:
fig, axes = plt.subplots(1,3, figsize=(10,4), gridspec_kw=dict(wspace=0.5,))

plot_df = stats
plot_df2 = fits

ax = axes[0]
for num, group in plot_df.groupby('sort'):
    sns.stripplot(data=group, x='ts_kind', y='output_gmean', hue='construct', palette=main_palette,
                  legend=False, ax=ax, marker=marker_list[num-1], s=8, edgecolor='white', linewidth=1)
ax.set(title='Mean', xlabel='', ylabel='', yscale='log', )#ylim=(1e3,2e4),)

ax = axes[1]
for num, group in plot_df.groupby('sort'):
    sns.stripplot(data=group, x='ts_kind', y='output_std', hue='construct', palette=main_palette,
                  legend=False, ax=ax, marker=marker_list[num-1], s=8, edgecolor='white', linewidth=1)
ax.set(title='Std.', xlabel='', ylabel='', yscale='log', )#ylim=(1e3,2e4),)

ax = axes[2]
for num, group in plot_df2.groupby('sort'):
    sns.stripplot(data=group, x='ts_kind', y='slope', hue='construct', palette=main_palette,
                  legend=False, ax=ax, marker=marker_list[num-1], s=8, edgecolor='white', linewidth=1)
ax.set(title='Slope', xlabel='', ylabel='',)

# ax = axes[3]
# for num, group in plot_df.groupby('sort'):
#     sns.stripplot(data=group, x='ts_kind', y='output_variation', hue='construct', palette=main_palette,
#                   legend=False, ax=ax, marker=marker_list[num-1], s=8, edgecolor='white', linewidth=1)
# ax.set(title='CV', xlabel='', ylabel='',)

for ax in axes:
    ax.set_xticklabels([ts_label[x.get_text()] for x in ax.get_xticklabels()], rotation=45, ha='right',)
    sns.despine(ax=ax)

fig.savefig(rd.outfile(output_path/'stats.png'))