In [None]:
import base
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path
import rushd as rd
import scipy as sp
import seaborn as sns

from importlib import reload
reload(base)

sns.set_style('ticks')
sns.set_context('talk',rc={'font.family': 'sans-serif', 'font.sans-serif':['Helvetica Neue']})

In [None]:
# Setup data loading
base_path = rd.datadir/'instruments'/'data'/'attune'/'kasey'
plates = pd.DataFrame({
    'data_path': [base_path/'2024.10.03_exp116'/'export'/'plate2', base_path/'2024.10.21_exp116.2'/'export'],
    'yaml_path': [base_path/'2024.10.03_exp116'/'export'/'plate2'/'exp116_wells.yaml', base_path/'2024.10.21_exp116.2'/'export'/'wells.yaml'],
})
output_path = rd.rootdir/'output'/'lenti_iPS11'
cache_path = output_path/'lenti_iPS11.gzip'

for p in plates['yaml_path'].unique():
    rd.plot.plot_well_metadata(p)

In [None]:
# Load data
data = pd.DataFrame()
channel_list = ['mRuby2-A','mGL-A']

if cache_path.exists(): data = pd.read_parquet(cache_path)
else: 
    data = rd.flow.load_groups_with_metadata(plates, columns=channel_list)
    for c in channel_list: data = data[data[c]>0]
    data.to_parquet(rd.outfile(cache_path))
display(data)

In [None]:
# Add metadata for constructs
metadata = base.get_metadata(rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'construct-metadata.xlsx')
data = data.merge(metadata, how='left', on='construct')
display(data)

In [None]:
# Create dicts to specify colors/markers
metadata_dict = metadata.set_index('construct').to_dict('dict')
main_palette = metadata_dict['color']
main_markers = metadata_dict['markers']

In [None]:
gates = pd.DataFrame()
channel_list = ['mGL-A', 'mRuby2-A',]
for channel in channel_list:
    gates[channel] = data[data['construct']=='UI'].groupby(['biorep'])[channel].apply(lambda x: x.quantile(0.9999))
gates.reset_index(inplace=True)

# Indicate which channels are relevant for each experiment
gates['marker'] = 'mGL-A'
gates['output'] = 'mRuby2-A'
gates['marker'] = gates['mGL-A']
gates['output'] = gates['mRuby2-A']
gates['exp'] = gates['biorep']

display(gates)

In [None]:
data['marker'] = data['mGL-A']
data['output'] = data['mRuby2-A']
data['exp'] = data['biorep']

In [None]:
plot_df = data[(data['dox']==1000) & (data['construct']!='UI')].groupby(['biorep','construct']).sample(1000)
g = sns.displot(data=plot_df, x='marker', y='output', hue='construct', palette=main_palette, kind='kde',
                row='biorep', col='group', facet_kws=dict(margin_titles=True),
                log_scale=True, common_norm=False, levels=8)

for (biorep, construct), ax in g.axes_dict.items():
    ax.axvline(gates.loc[gates['biorep']==biorep, 'marker'].values[0], c='black', ls=':', zorder=0)
    ax.axvline(2e3, c='black', ls=':', zorder=0)
    ax.axhline(gates.loc[gates['biorep']==biorep, 'output'].values[0], c='black', ls=':', zorder=0)

In [None]:
gates['marker'] = [2e3]*3

In [None]:
# Gate data by marker expression
def gate_data(df, gates):
    df = df.copy()
    exp = df['exp'].values[0] # the same for entire df, assuming df = data.groupby('exp')
    gates_dict = gates.set_index('exp').to_dict('dict') # format: column -> {index: value}
    df['expressing'] = df['marker'] > gates_dict['marker'][exp]
    return df

data = data.groupby('exp')[data.columns].apply(lambda x: gate_data(x,gates))
data.reset_index(inplace=True, drop=True)
df = data[(data['expressing']) & (data['construct']!='UI') & (data['marker']>0) & (data['output']>0)]

In [None]:
display(df[df['biorep']==3])

In [None]:
# Bin data and calculate statistics
df_quantiles, stats, _, fits = base.calculate_bins_stats(df)
stats = stats.merge(metadata, how='left', on='construct')
fits = fits.merge(metadata, how='left', on='construct')

In [None]:
biorep = 2
plot_df = df_quantiles[(df_quantiles['biorep']==biorep)]
fig, axes = plt.subplots(1,2, gridspec_kw=dict(width_ratios=(1,0.3)))

# line plot
ax = axes[0]
sns.lineplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='construct', palette=main_palette, 
             legend=False, dashes=False, style='construct', markers=main_markers, ax=ax, markersize=9, markeredgewidth=1,
             estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)))
ax.set(xscale='log', yscale='log', xlabel='marker', ylim=(2e2,1e6), )#xlim=(2e3,3e4))
sns.despine(ax=ax)
marker_baseline = sp.stats.gmean(df_quantiles.loc[(df_quantiles['group']=='marker') & (df_quantiles['biorep']==biorep), 'output'])
ax.axhline(marker_baseline, color='black', ls=':')
ax.annotate('marker only', (ax.get_xlim()[1], marker_baseline), ha='right', va='bottom')

# histogram
ax = axes[1]
sns.kdeplot(data=plot_df, y='output', hue='construct', palette=main_palette, 
             legend=False, log_scale=True, common_norm=False, ax=ax)
sns.despine(ax=ax, bottom=True)
ax.set(xlabel='', ylim=axes[0].get_ylim(), ylabel='', yticklabels=[])
ax.get_xaxis().set_visible(False)

In [None]:
biorep = 1
plot_df = df_quantiles[(df_quantiles['biorep']==biorep)]
fig, axes = plt.subplots(1,2, gridspec_kw=dict(width_ratios=(1,0.3)))

# line plot
ax = axes[0]
sns.lineplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='construct', palette=main_palette, 
             legend=False, dashes=False, style='construct', markers=main_markers, ax=ax, markersize=9, markeredgewidth=1,
             estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)))
ax.set(xscale='log', yscale='log', xlabel='marker', ylim=(2e2,1e6))
sns.despine(ax=ax)
marker_baseline = sp.stats.gmean(df_quantiles.loc[(df_quantiles['group']=='marker') & (df_quantiles['biorep']==biorep), 'output'])
ax.axhline(marker_baseline, color='black', ls=':')
ax.annotate('marker only', (ax.get_xlim()[1], marker_baseline), ha='right', va='bottom')

# histogram
ax = axes[1]
sns.kdeplot(data=plot_df, y='output', hue='construct', palette=main_palette, 
             legend=False, log_scale=True, common_norm=False, ax=ax)
sns.despine(ax=ax, bottom=True)
ax.set(xlabel='', ylim=axes[0].get_ylim(), ylabel='', yticklabels=[])
ax.get_xaxis().set_visible(False)

In [None]:
biorep = 3
plot_df = df_quantiles[(df_quantiles['biorep']==biorep)]
fig, axes = plt.subplots(1,2, gridspec_kw=dict(width_ratios=(1,0.3)))

# line plot
ax = axes[0]
sns.lineplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='construct', palette=main_palette, 
             legend=False, dashes=False, style='construct', markers=main_markers, ax=ax, markersize=9, markeredgewidth=1,
             estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)))
ax.set(xscale='log', yscale='log', xlabel='marker', ylim=(2e2,1e6))
sns.despine(ax=ax)
marker_baseline = sp.stats.gmean(df_quantiles.loc[(df_quantiles['group']=='marker') & (df_quantiles['biorep']==biorep), 'output'])
ax.axhline(marker_baseline, color='black', ls=':')
ax.annotate('marker only', (ax.get_xlim()[1], marker_baseline), ha='right', va='bottom')

# histogram
ax = axes[1]
sns.kdeplot(data=plot_df, y='output', hue='construct', palette=main_palette, 
             legend=False, log_scale=True, common_norm=False, ax=ax)
sns.despine(ax=ax, bottom=True)
ax.set(xlabel='', ylim=axes[0].get_ylim(), ylabel='', yticklabels=[])
ax.get_xaxis().set_visible(False)