In [None]:
import base
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path
import rushd as rd
import scipy as sp
import seaborn as sns

from importlib import reload
reload(base)

sns.set_style('ticks')
sns.set_context('talk',rc={'font.family': 'sans-serif', 'font.sans-serif':['Helvetica Neue']})

In [None]:
base_path = rd.datadir/'instruments'/'data'/'collaborators'/'birnbaum_steph'
output_path = rd.rootdir/'output'/'lenti_tcell'
cache_path = output_path/'data.gzip'

plates = pd.DataFrame({
    'data_path': [base_path/'2024-06-10 Galloway Exp 1'/'export', base_path/'2024-10-25 Galloway 2'/'export',
                  base_path/'2024-11-06 Galloway 3'/'export'],
    'yaml_path': [base_path/'2024-06-10 Galloway Exp 1'/'metadata.yaml', base_path/'2024-10-25 Galloway 2'/'export'/'metadata.yaml',
                  base_path/'2024-11-06 Galloway 3'/'export'/'wells.yaml'],
})

for p in plates['yaml_path'].unique():
    rd.plot.plot_well_metadata(p)

In [None]:
# Load data
data = pd.DataFrame()
channel_list = ['FITC-A', 'PE-A', 'APC-A750-A', 'PB450-A']

if cache_path.exists(): data = pd.read_parquet(cache_path)
else: 
    d1 = rd.flow.load_groups_with_metadata(plates.iloc[0].to_frame().transpose(), columns=channel_list)
    d2 = rd.flow.load_groups_with_metadata(plates.iloc[1].to_frame().transpose(), columns=channel_list)
    d3 = rd.flow.load_groups_with_metadata(plates.iloc[2].to_frame().transpose(), columns=channel_list)

    d1 = d1.rename({'FITC-A': 'mGL-A', 'PE-A': 'mRuby2-A', 'APC-A750-A': 'livedead-A'}, axis=1)
    d2 = d2.rename({'FITC-A': 'mGL-A', 'PE-A': 'mRuby2-A', 'PB450-A': 'livedead-A'}, axis=1)
    d3 = d3.rename({'FITC-A': 'mGL-A', 'PE-A': 'mRuby2-A', 'APC-A750-A': 'livedead-A'}, axis=1)

    d1['biorep'] = 1
    d2['biorep'] = 2

    data = pd.concat([d1, d2, d3], ignore_index=True)
    for c in ['mGL-A', 'mRuby2-A', 'livedead-A']: data = data[data[c]>0]
    data.to_parquet(rd.outfile(cache_path))
display(data)

In [None]:
# Add metadata for constructs
metadata = base.get_metadata(rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'construct-metadata.xlsx')
data = data.merge(metadata, how='left', on='construct')
display(data)

# Create dicts to specify colors/markers
metadata_dict = metadata.set_index('construct').to_dict('dict')
main_palette = metadata_dict['color']
main_markers = metadata_dict['markers']

In [None]:
gates = pd.DataFrame()
channel_list = ['mGL-A', 'mRuby2-A',]
for channel in channel_list:
    gates[channel] = data[data['construct']=='UT'].groupby(['biorep'])[channel].apply(lambda x: x.quantile(0.9999))
gates.reset_index(inplace=True)

# Add metadata
gates['marker'] = 'mGL-A'
gates['output'] = 'mRuby2-A'
gates['exp'] = 'steph_' + gates['biorep'].astype(str)

data['marker'] = data['mGL-A']
data['output'] = data['mRuby2-A']
data['exp'] = 'steph_' + data['biorep'].astype(str)

display(gates)

In [None]:
# Manually adjust marker gate for bioreps 3&4
gates.loc[gates['biorep'].isin([3,4]), 'mGL-A'] = 2e5
display(gates)

In [None]:
# Look at data to draw livedead gates
plot_df = data[(data['biorep']<3) & (data['construct']=='UT')]
g = sns.displot(data=plot_df, x='livedead-A', col='biorep', 
                kind='kde', log_scale=True, common_norm=False)
for biorep, ax in g.axes_dict.items():
    if biorep==1: ax.axvline(3e3, c='black')
    else: ax.axvline(5e4, c='black')

In [None]:
# Look at data for to draw livedead gates
plot_df = data[(data['biorep']>2) & (data['construct']=='UT')]
g = sns.displot(data=plot_df, x='livedead-A', col='biorep', hue='stain',
                kind='kde', log_scale=True, common_norm=False)
for _, ax in g.axes_dict.items():
    ax.axvline(1e4, c='black')

In [None]:
# Look at gates
manual_gates = [4e4, 4e4, 2e5, 2e5]
plot_df = data.groupby('construct').sample(3000)
g = sns.displot(data=plot_df, x='mGL-A', y='mRuby2-A', col='biorep', hue='construct',
                kind='kde', log_scale=True, common_norm=False)
for biorep, ax in g.axes_dict.items():
    ax.axvline(manual_gates[biorep-1], c='black')

In [None]:
# Gate data by marker expression
#data = data.groupby(['exp','biorep'])[data.columns].apply(lambda x: base.gate_data(x,gates))
#data.reset_index(inplace=True, drop=True)
data.loc[data['biorep']==1, 'expressing'] = data.loc[data['biorep']==1, 'mGL-A'] > 4e4
data.loc[data['biorep']==2, 'expressing'] = data.loc[data['biorep']==2, 'mGL-A'] > 4e4
data.loc[data['biorep']==3, 'expressing'] = data.loc[data['biorep']==3, 'mGL-A'] > 2e5 #3e4
data.loc[data['biorep']==4, 'expressing'] = data.loc[data['biorep']==4, 'mGL-A'] > 2e5 #4e4

# Gate live cells (livedead-A < gate)
data.loc[data['biorep']==1, 'live'] = data.loc[data['biorep']==1, 'livedead-A'] < 3e3
data.loc[data['biorep']==2, 'live'] = data.loc[data['biorep']==2, 'livedead-A'] < 5e4
data.loc[data['biorep']>2, 'live'] = data.loc[data['biorep']>2, 'livedead-A'] < 1e4
display(data)

df = data[(data['expressing']) & (data['live']) & (data['construct']!='UT')]

In [None]:
plot_df = df[df['moi']==1]
g = sns.displot(data=plot_df, x='mGL-A', y='mRuby2-A', row='biorep', col='construct', hue='dox',
                kind='kde', log_scale=True, common_norm=False, legend=False)

In [None]:
# Bin data and calculate statistics
df_quantiles, stats, _, fits = base.calculate_bins_stats(df, stat_list=[sp.stats.gmean, np.std, sp.stats.variation], by=['construct','moi','dox','exp','biorep'])
stats = stats.merge(metadata, how='left', on='construct')
fits = fits.merge(metadata, how='left', on='construct')

In [None]:
for biorep in range(1,5):
    plot_df = df_quantiles[(df_quantiles['biorep']==biorep) & (df_quantiles['dox']==1000) & (df_quantiles['moi']==1)]
    fig, axes = plt.subplots(1,2, gridspec_kw=dict(width_ratios=(1,0.3)))

    # line plot
    ax = axes[0]
    sns.lineplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='construct', palette=main_palette, 
                legend=False, dashes=False, style='construct', markers=main_markers, ax=ax, markersize=9, markeredgewidth=1,
                estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)))
    ax.set(xscale='log', yscale='log', xlabel='marker',)
    sns.despine(ax=ax)
    marker_baseline = sp.stats.gmean(df_quantiles.loc[(df_quantiles['group']=='marker') & (df_quantiles['biorep']==biorep), 'output'])
    ax.axhline(marker_baseline, color='black', ls=':')
    ax.annotate('marker only', (ax.get_xlim()[1], marker_baseline), ha='right', va='bottom')

    # histogram
    ax = axes[1]
    sns.kdeplot(data=plot_df, y='output', hue='construct', palette=main_palette, 
                legend=False, log_scale=True, common_norm=False, ax=ax)
    sns.despine(ax=ax, bottom=True)
    ax.set(xlabel='', ylim=axes[0].get_ylim(), ylabel='', yticklabels=[])
    ax.get_xaxis().set_visible(False)

    fig.savefig(rd.outfile(output_path/f'joint_biorep{biorep}.png'))

In [None]:
ts_label = {'na': 'base', 'NT': 'OL', 'T': 'CL', 'none': '–'}
marker_list = ['o', 'v', 'D', 'X']

In [None]:
fig, axes = plt.subplots(1,3, figsize=(10,4), gridspec_kw=dict(wspace=0.5,))

plot_df = stats[(stats['dox']==1000) & (stats['moi']==1)]
plot_df2 = fits[(fits['dox']==1000) & (fits['moi']==1)]

ax = axes[0]
for biorep, group in plot_df.groupby('biorep'):
    sns.stripplot(data=group, x='ts_kind', y='output_gmean', hue='construct', palette=main_palette,
                  legend=False, ax=ax, marker=marker_list[biorep-1], s=8, edgecolor='white', linewidth=1)
ax.set(title='Mean', xlabel='', ylabel='', yscale='log', )#ylim=(1e3,2e4),)

ax = axes[1]
for biorep, group in plot_df.groupby('biorep'):
    sns.stripplot(data=group, x='ts_kind', y='output_std', hue='construct', palette=main_palette,
                  legend=False, ax=ax, marker=marker_list[biorep-1], s=8, edgecolor='white', linewidth=1)
ax.set(title='Std.', xlabel='', ylabel='', yscale='log', )#ylim=(1e3,2e4),)

ax = axes[2]
for biorep, group in plot_df2.groupby('biorep'):
    sns.stripplot(data=group, x='ts_kind', y='slope', hue='construct', palette=main_palette,
                  legend=False, ax=ax, marker=marker_list[biorep-1], s=8, edgecolor='white', linewidth=1)
ax.set(title='Slope', xlabel='', ylabel='',)

# ax = axes[3]
# for num, group in plot_df.groupby('sort'):
#     sns.stripplot(data=group, x='ts_kind', y='output_variation', hue='construct', palette=main_palette,
#                   legend=False, ax=ax, marker=marker_list[num-1], s=8, edgecolor='white', linewidth=1)
# ax.set(title='CV', xlabel='', ylabel='',)

for ax in axes:
    ax.set_xticklabels([ts_label[x.get_text()] for x in ax.get_xticklabels()], rotation=45, ha='right',)
    sns.despine(ax=ax)

fig.savefig(rd.outfile(output_path/'stats.png'))

In [None]:
baseline_df = data[(data['construct']=='UT')].groupby(['exp','biorep'])['output'].apply(sp.stats.gmean).rename('output_gmean').reset_index()

### Test stat annotator

In [None]:
from statannotations.Annotator import Annotator

In [None]:
fig, axes = plt.subplots(1,3, figsize=(10,4), gridspec_kw=dict(wspace=0.5,))

plot_df = stats[(stats['dox']==1000) & (stats['moi']==1)]
plot_df2 = fits[(fits['dox']==1000) & (fits['moi']==1)]

plot_settings = dict(x='ts_kind', hue='construct', palette=main_palette,
                     legend=False,  s=8, edgecolor='white', linewidth=1)
pairs = [("na","T"), ("NT","T"), ("na","NT")]

ax = axes[0]
for biorep, group in plot_df.groupby('biorep'):
    sns.stripplot(data=group, y='output_gmean', ax=ax, marker=marker_list[biorep-1], **plot_settings)
ax.set(title='Mean', xlabel='', ylabel='', yscale='log', )#ylim=(1e3,2e4),)
annotator = Annotator(ax, pairs, data=plot_df, y='output_gmean', **plot_settings)
annotator.configure(test='t-test_ind', text_format='star', loc='outside')

ax = axes[1]
for biorep, group in plot_df.groupby('biorep'):
    sns.stripplot(data=group, x='ts_kind', y='output_std', hue='construct', palette=main_palette,
                  legend=False, ax=ax, marker=marker_list[biorep-1], s=8, edgecolor='white', linewidth=1)
ax.set(title='Std.', xlabel='', ylabel='', yscale='log', )#ylim=(1e3,2e4),)

ax = axes[2]
for biorep, group in plot_df2.groupby('biorep'):
    sns.stripplot(data=group, x='ts_kind', y='slope', hue='construct', palette=main_palette,
                  legend=False, ax=ax, marker=marker_list[biorep-1], s=8, edgecolor='white', linewidth=1)
ax.set(title='Slope', xlabel='', ylabel='',)

# ax = axes[3]
# for num, group in plot_df.groupby('sort'):
#     sns.stripplot(data=group, x='ts_kind', y='output_variation', hue='construct', palette=main_palette,
#                   legend=False, ax=ax, marker=marker_list[num-1], s=8, edgecolor='white', linewidth=1)
# ax.set(title='CV', xlabel='', ylabel='',)

for ax in axes:
    ax.set_xticklabels([ts_label[x.get_text()] for x in ax.get_xticklabels()], rotation=45, ha='right',)
    sns.despine(ax=ax)

In [None]:
fig, axes = plt.subplots(1,3, figsize=(10,4), gridspec_kw=dict(wspace=0.5,))

plot_df = stats[(stats['dox']==1000) & (stats['moi']==1)]
plot_df2 = fits[(fits['dox']==1000) & (fits['moi']==1)]

plot_settings = dict(x='ts_kind', 
                     legend=False,)
pairs = [('na','T'), ('NT','T'), ('na','NT')]

ax = axes[0]
sns.stripplot(data=plot_df, y='output_gmean', ax=ax, hue='construct', palette=main_palette, **plot_settings)
ax.set(title='Mean', xlabel='', ylabel='', yscale='log', )#ylim=(1e3,2e4),)
annotator = Annotator(ax, pairs, data=plot_df, y='output_gmean', **plot_settings)
annotator.configure(test='t-test_ind', text_format='star', loc='outside').apply_and_annotate()

ax = axes[1]
for biorep, group in plot_df.groupby('biorep'):
    sns.stripplot(data=group, x='ts_kind', y='output_std', hue='construct', palette=main_palette,
                  legend=False, ax=ax, marker=marker_list[biorep-1], s=8, edgecolor='white', linewidth=1)
ax.set(title='Std.', xlabel='', ylabel='', yscale='log', )#ylim=(1e3,2e4),)

ax = axes[2]
sns.stripplot(data=plot_df2, y='slope', ax=ax, hue='construct', palette=main_palette, **plot_settings)
ax.set(title='Slope', xlabel='', ylabel='', )
annotator = Annotator(ax, pairs, data=plot_df2, y='slope', **plot_settings)
annotator.configure(test='Mann-Whitney', text_format='star', loc='outside').apply_and_annotate()

# ax = axes[3]
# for num, group in plot_df.groupby('sort'):
#     sns.stripplot(data=group, x='ts_kind', y='output_variation', hue='construct', palette=main_palette,
#                   legend=False, ax=ax, marker=marker_list[num-1], s=8, edgecolor='white', linewidth=1)
# ax.set(title='CV', xlabel='', ylabel='',)

for ax in axes:
    ax.set_xticklabels([ts_label[x.get_text()] for x in ax.get_xticklabels()], rotation=45, ha='right',)
    sns.despine(ax=ax)

In [None]:
display(plot_df['ts_kind'].unique())
display(pairs)

In [None]:
values = [0,0.1,1,10,100]
num = len(values)

fig, axes = plt.subplots(1,num, figsize=(10,4), gridspec_kw=dict(wspace=0.5,))

plot_df = stats[(stats['dox']==1000) & (stats['moi']==1)]
plot_df2 = fits[(fits['dox']==1000) & (fits['moi']==1)]

plot_settings = dict(x='ts_kind', legend=False,)
pairs = [('na','T'), ('NT','T')]

for ax, val in zip(axes, values):
    sns.stripplot(data=plot_df, y='output_gmean', ax=ax, hue='construct', palette=main_palette, **plot_settings)
    ax.set(title='Mean', xlabel='', ylabel='', yscale='log', )#ylim=(1e3,2e4),)
    sns.despine(ax=ax)
    annotator = Annotator(ax, pairs, data=plot_df, y='output_gmean', **plot_settings)
    annotator.configure(test='t-test_ind', text_format='star', loc='inside', verbose=0, line_height=0, #line_width=0.5,
                        text_offset=-5,)
    annotator.apply_and_annotate()