In [None]:
import base
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path
import rushd as rd
import scipy as sp
import seaborn as sns

from importlib import reload
reload(base)

sns.set_style('ticks')
sns.set_context('talk',rc={'font.family': 'sans-serif', 'font.sans-serif':['Helvetica Neue']})

In [None]:
# Setup data loading
base_path = rd.datadir/'instruments'/'data'/'attune'/'kasey'
plates = pd.DataFrame({
    'data_path': [base_path/'2024.11.07_exp117.4'/'export'/f'plate{n}' for n in range(1,4)],
    'yaml_path': [base_path/'2024.11.07_exp117.4'/'export'/'wells.yaml']*3,
    'biorep': range(1,4)
})
output_path = rd.rootdir/'output'/'lenti_therapeutic-titer'
cache_path = output_path/'lenti_therapeutic-titer.gzip'

for p in plates['yaml_path'].unique():
    rd.plot.plot_well_metadata(p)

In [None]:
# Load data
data = pd.DataFrame()
channel_list = ['mRuby2-A','mGL-A']

if cache_path.exists(): data = pd.read_parquet(cache_path)
else: 
    data = rd.flow.load_groups_with_metadata(plates, columns=channel_list)
    for c in channel_list: data = data[data[c]>0]
    data.to_parquet(rd.outfile(cache_path))
display(data)

### Calculate titer
`rd.flow.moi` takes:

1. A DataFrame with the following columns:

    - condition
    - replicate
    - starting_cell_count
    - scaling (dilution factor relative to max_virus)
    - max_virus

2. Information to gate infected cells

    - color_column_name (channel to gate on)
    - color_column_cutoff (gate)

3. Optional parameters

    - output_path (where to save the plots)
    - summary_method (mean/median of replicates)

In [None]:
# Create columns for rd.flow.moi
data['condition'] = data['construct']
data['starting_cell_count'] = 2e4
data['max_virus'] = 4

In [None]:
# Draw gates on uninfected population
gates = pd.DataFrame()
channel_list = ['mGL-A', 'mRuby2-A']
for channel in channel_list:
    gates[channel] = data[data['condition']=='UI'].groupby(['biorep'])[channel].apply(lambda x: x.quantile(0.995))
gates.reset_index(inplace=True)

In [None]:
display(gates)

In [None]:
plot_df = data[(data['condition']=='UI')]
x = 'mGL-A'
y = 'mRuby2-A'
g = sns.displot(data=plot_df, x=x, y=y, kind='kde', col='biorep',
                log_scale=True, hue='construct', legend=False, color=base.colors['gray'],
                common_norm=False, fill=False, levels=7, facet_kws=dict(margin_titles=True))

for biorep, ax in g.axes_dict.items():
    gate = gates[(gates['biorep']==biorep)]
    if gate.empty: continue
    ax.axvline(gate[x].values[0], color='black', zorder=0)
    ax.axhline(gate[y].values[0], color='black', zorder=0)

g.figure.savefig(rd.outfile(output_path/'kde_mGL-mRuby2_UI-gates.png'))

In [None]:
for biorep, group in data.groupby('biorep'):

    plot_df = group[(group['scaling']==1)].groupby('condition').sample(1000)

    g = sns.displot(data=plot_df, x='mGL-A', y='mRuby2-A', col='condition', col_wrap=4, kind='kde',
                    log_scale=True, common_norm=False, fill=False, levels=7,
                    hue='construct')
    
    gate = gates[(gates['biorep']==biorep)]
    if gate.empty: continue

    for _, ax in g.axes_dict.items():
        ax.axvline(gate[x].values[0], color='black', zorder=0)
        ax.axhline(gate[y].values[0], color='black', zorder=0)

    g.figure.savefig(rd.outfile(output_path/f'kde_mGL-mRuby2_by-construct-with-gates_biorep{biorep}.png'))

In [None]:
# Add marker/output metadata
data['marker'] = data['mGL-A']
data.loc[data['construct'].isin(['RC809','RC810','RC811']), 'marker'] = data.loc[data['construct'].isin(['RC809','RC810','RC811']), 'mRuby2-A']

data['output'] = data['mRuby2-A']
data.loc[data['construct'].isin(['RC809','RC810','RC811']), 'output'] = data.loc[data['construct'].isin(['RC809','RC810','RC811']), 'mGL-A']

In [None]:
# Categorize cells into quadrants based on two gates
# Possible values:
#   0 = double negative
#   1 = x-positive
#   2 = y-positive
#   3 = double positive
def get_quadrant(df, x, y, gates):
    gate_x = gates.loc[(gates['biorep']==df['biorep'].values[0]), x]
    gate_y = gates.loc[(gates['biorep']==df['biorep'].values[0]), y]
    df['x'] = data[x] > gate_x.values[0]
    df['y'] = data[y] > gate_y.values[0]
    df['quadrant'] = df['x'].astype(int) + df['y'].astype(int)*2
    return df

x = 'mGL-A'
y = 'mRuby2-A'
data = data.groupby(['biorep'])[data.columns].apply(lambda df: get_quadrant(df, x, y, gates))
data.reset_index(drop=True, inplace=True)
display(data)

In [None]:
# Gate on marker-positive cells to calculate titer
#   RC806-8 on mGL: x-positive (1) or double-positive (3)
#   RC809-11 on mRuby2: y-positive (2) or double-positive (3)

data['gated'] = data['quadrant'] > 1.5
data.loc[data['construct'].isin(['RC806','RC807','RC808']), 'gated'] = data.loc[data['construct'].isin(['RC806','RC807','RC808']), 'quadrant'].isin([1,3])
display(data)

In [None]:
# Calculate titer
df_titer = data.groupby(['biorep'])[data.columns].apply(lambda df: rd.flow.moi(df, 'gated', 0.5))
df_titer.reset_index(inplace=True)
display(df_titer)

### Analyze highest virus amount

In [None]:
# Add metadata for constructs
metadata = base.get_metadata(rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'construct-metadata.xlsx')
data = data.merge(metadata, how='left', on='construct')
display(data)

# Create dicts to specify colors/markers
metadata_dict = metadata.set_index('construct').to_dict('dict')
main_palette = metadata_dict['color']
main_markers = metadata_dict['markers']

In [None]:
fxn_list = ['RC806','RC807','RC808']
data.loc[data['construct'].isin(fxn_list), 'marker'] = data.loc[data['construct'].isin(fxn_list), 'mGL-A']
data.loc[~data['construct'].isin(fxn_list), 'marker'] = data.loc[~data['construct'].isin(fxn_list), 'mRuby2-A']
data.loc[data['construct'].isin(fxn_list), 'output'] = data.loc[data['construct'].isin(fxn_list), 'mRuby2-A']
data.loc[~data['construct'].isin(fxn_list), 'output'] = data.loc[~data['construct'].isin(fxn_list), 'mGL-A']

gates = pd.DataFrame()
channel_list = ['mGL-A', 'mRuby2-A',]
for channel in channel_list:
    gates[channel] = data[data['construct']=='UI'].groupby(['biorep'])[channel].apply(lambda x: x.quantile(0.999))
gates.reset_index(inplace=True)

# Manually adjust gates
gates.loc[gates['biorep'].isin([2,3]), 'mRuby2-A'] = 2e2
display(gates)

In [None]:
plot_df = data[(data['scaling']==1)].groupby(['biorep','construct']).sample(1000)

g = sns.displot(data=plot_df, x='mGL-A', y='mRuby2-A', hue='construct', kind='kde',
                row='biorep', col='construct', facet_kws=dict(margin_titles=True),
                log_scale=True, common_norm=False, levels=8)

for (biorep, construct), ax in g.axes_dict.items():
    ax.axvline(gates.loc[gates['biorep']==biorep, 'mGL-A'].values[0], c='black', ls=':', zorder=0)
    ax.axhline(gates.loc[gates['biorep']==biorep, 'mRuby2-A'].values[0], c='black', ls=':', zorder=0)

In [None]:
# Gate data by marker expression
def gate_data(df, gates):
    df = df.copy()
    exp = df['biorep'].values[0] # the same for entire df, assuming df = data.groupby('exp')
    gates_dict = gates.set_index('biorep').to_dict('dict') # format: column -> {index: value}
    df.loc[df['construct'].isin(fxn_list), 'expressing'] = df.loc[df['construct'].isin(fxn_list), 'marker'] > gates_dict['mGL-A'][exp]
    df.loc[~df['construct'].isin(fxn_list), 'expressing'] = df.loc[~df['construct'].isin(fxn_list), 'marker'] > gates_dict['mRuby2-A'][exp]
    return df

data = data.groupby('biorep')[data.columns].apply(lambda x: gate_data(x,gates))
data.reset_index(inplace=True, drop=True)
df = data[(data['expressing']) & (data['construct']!='UI')]

In [None]:
plot_df = df[(df['scaling']==1)].groupby(['biorep','construct']).sample(1000)

g = sns.displot(data=plot_df, x='marker', y='output', hue='construct', kind='kde',
                row='biorep', col='construct', facet_kws=dict(margin_titles=True),
                log_scale=True, common_norm=False, levels=8)

# for (biorep, construct), ax in g.axes_dict.items():
#     ax.axvline(gates.loc[gates['biorep']==biorep, 'mGL-A'].values[0], c='black', ls=':', zorder=0)
#     ax.axhline(gates.loc[gates['biorep']==biorep, 'mRuby2-A'].values[0], c='black', ls=':', zorder=0)

In [None]:
# Bin data and calculate statistics
df_quantiles, stats, _, fits = base.calculate_bins_stats(df, stat_list=[sp.stats.gmean, np.std, sp.stats.variation], by=['construct','scaling','biorep'])
stats = stats.merge(metadata, how='left', on='construct')
fits = fits.merge(metadata, how='left', on='construct')

In [None]:
for biorep in range(1,4):
    plot_df = df_quantiles[(df_quantiles['biorep']==biorep) & (df_quantiles['scaling']==1)]
    fig, axes = plt.subplots(1,2, gridspec_kw=dict(width_ratios=(1,0.3)))

    # line plot
    ax = axes[0]
    sns.lineplot(data=plot_df, x='bin_marker_quantiles_median', y='output', hue='construct', #palette=main_palette, 
                legend=False, dashes=False, style='construct', ax=ax, #markers=main_markers, markersize=9, markeredgewidth=1,
                estimator=sp.stats.gmean, errorbar=lambda x: (sp.stats.gmean(x) / sp.stats.gstd(x), sp.stats.gmean(x) * sp.stats.gstd(x)))
    ax.set(xscale='log', yscale='log', xlabel='marker',)
    sns.despine(ax=ax)
    marker_baseline = sp.stats.gmean(df_quantiles.loc[(df_quantiles['group']=='marker') & (df_quantiles['biorep']==biorep), 'output'])
    ax.axhline(marker_baseline, color='black', ls=':')
    ax.annotate('marker only', (ax.get_xlim()[1], marker_baseline), ha='right', va='bottom')

    # histogram
    ax = axes[1]
    sns.kdeplot(data=plot_df, y='output', hue='construct', #palette=main_palette, 
                legend=False, log_scale=True, common_norm=False, ax=ax)
    sns.despine(ax=ax, bottom=True)
    ax.set(xlabel='', ylim=axes[0].get_ylim(), ylabel='', yticklabels=[])
    ax.get_xaxis().set_visible(False)

    fig.savefig(rd.outfile(output_path/f'joint_biorep{biorep}.png'))

In [None]:
ts_label = {'na': 'base', 'NT': 'OL', 'T': 'CL', 'none': '–'}
marker_list = ['o', 'v', 'D', 'X']

In [None]:
fig, axes = plt.subplots(1,3, figsize=(10,4), gridspec_kw=dict(wspace=0.5,))

plot_df = stats[(stats['scaling']==1)]
plot_df2 = fits[(fits['scaling']==1)]

ax = axes[0]
for biorep, group in plot_df.groupby('biorep'):
    sns.stripplot(data=group, x='construct', y='output_gmean', hue='construct', #palette=main_palette,
                  legend=False, ax=ax, )#marker=marker_list[biorep-1], s=8, edgecolor='white', linewidth=1)
ax.set(title='Mean', xlabel='', ylabel='', yscale='log', )#ylim=(1e3,2e4),)

ax = axes[1]
for biorep, group in plot_df.groupby('biorep'):
    sns.stripplot(data=group, x='construct', y='output_std', hue='construct', #palette=main_palette,
                  legend=False, ax=ax, )#marker=marker_list[biorep-1], s=8, edgecolor='white', linewidth=1)
ax.set(title='Std.', xlabel='', ylabel='', yscale='log', )#ylim=(1e3,2e4),)

ax = axes[2]
for biorep, group in plot_df2.groupby('biorep'):
    sns.stripplot(data=group, x='construct', y='slope', hue='construct', #palette=main_palette,
                  legend=False, ax=ax, )#marker=marker_list[biorep-1], s=8, edgecolor='white', linewidth=1)
ax.set(title='Slope', xlabel='', ylabel='',)

# ax = axes[3]
# for num, group in plot_df.groupby('sort'):
#     sns.stripplot(data=group, x='ts_kind', y='output_variation', hue='construct', palette=main_palette,
#                   legend=False, ax=ax, marker=marker_list[num-1], s=8, edgecolor='white', linewidth=1)
# ax.set(title='CV', xlabel='', ylabel='',)

for ax in axes:
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right',) #[ts_label[x.get_text()] for x in ax.get_xticklabels()], 
    sns.despine(ax=ax)

fig.savefig(rd.outfile(output_path/'stats.png'))