In [None]:
import base
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rushd as rd
import scipy as sp
import seaborn as sns

# enables concurrent editing of base.py
from importlib import reload
reload(base)

In [None]:
base_path_1 = rd.datadir/'instruments'/'data'/'attune'/'kasey'/'2024.04.05_exp89'/'export'
base_path_2 = rd.datadir/'instruments'/'data'/'attune'/'chris'/'2024.06.02-exp95-lenti-miR-iFFL'/'export'
plate_list = ['_'.join(x) for x in zip(
        ['plate'+str(i) for i in range(1,10)], 
        (['293T']*3 + ['MEF2A']*3 + ['MEF8A']*3),
        ['P9','P14','P15']*3
    )]

plates = pd.DataFrame({
    'data_path': [base_path_1/'293T_control', 
                  base_path_1/'293T_plate1', base_path_1/'293T_plate2', base_path_1/'293T_plate3',
                  base_path_1/'MEF_3_plate1', 
                  base_path_1/'MEF_4-1_plate1', base_path_1/'MEF_4-1_plate2', base_path_1/'MEF_4-1_plate3'] +
                 [base_path_2/p for p in plate_list],
    'yaml_path': [base_path_1/'kasey_yaml2'/'plate_control.yaml', 
                  base_path_1/'kasey_yaml2'/'plate01.yaml', base_path_1/'kasey_yaml2'/'plate02.yaml', base_path_1/'kasey_yaml2'/'plate03.yaml',
                  base_path_1/'kasey_yaml2'/'mef_3_plate01.yaml', 
                  base_path_1/'kasey_yaml2'/'mef_4-1_plate01.yaml', base_path_1/'kasey_yaml2'/'mef_4-1_plate02.yaml', base_path_1/'kasey_yaml2'/'mef_4-1_plate03.yaml'] +
                 [base_path_2/(p+'_metadata.yaml') for p in plate_list],
})

output_path = rd.rootdir/'output'/'lenti_combined'
cache_path = output_path/'data.gzip'
metadata_path = rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'construct-metadata.xlsx'

# Load data
data = pd.DataFrame()
if cache_path.is_file(): data = pd.read_parquet(cache_path)
else: 
    channel_list = ['mCherry-A','mRuby2-A','FSC-A','SSC-A','tagBFP-A','mGL-A']
    data = rd.flow.load_groups_with_metadata(plates, columns=channel_list)

    # Remove negative channel values
    for c in channel_list: data = data[data[c]>0]
    
    #data.dropna(inplace=True)
    data.to_parquet(rd.outfile(cache_path))

# Add metadata for constructs
metadata = base.get_metadata(metadata_path)
data = data.merge(metadata, how='left', on='construct')
display(data)

In [None]:
data['cell'] = data['cell_type'].apply(lambda x: x.split('-')[0])
data['exp'] = data['cell_type'] + '_' + data['virus_batch']

def map_biorep(df):
    biorep_map = {val:i for i,val in enumerate(df['exp'].unique())}
    d = df.copy()
    d['biorep'] = d['exp'].map(biorep_map)
    return d

data = data.groupby('cell')[data.columns].apply(map_biorep).reset_index(drop=True)
display(data)

In [None]:
# Create dicts to specify colors/markers
metadata_dict = metadata.set_index('construct').to_dict('dict')
designs_palette = metadata_dict['color']
designs_markers = metadata_dict['markers']

In [None]:
# Gate cells
gates = pd.DataFrame()
channel_list = ['mGL-A', 'mCherry-A', 'mRuby2-A']
for channel in channel_list:
    gates[channel] = data[(data['virus_dilution']==0)].groupby(['exp'])[channel].apply(lambda x: x.quantile(0.999))
gates.reset_index(inplace=True)

# Add missing gates
gates.loc[len(gates.index)] = ['293T_P10'] + list(gates.loc[gates['exp']=='293T_na', channel_list].mean().values)
gates.loc[len(gates.index)] = ['293T_P14_'] + list(gates.loc[gates['exp']=='293T_na', channel_list].mean().values)
gates.loc[len(gates.index)] = ['293T_P16'] + list(gates.loc[gates['exp']=='293T_na', channel_list].mean().values) 
gates.loc[len(gates.index)] = ['MEF-3_P10'] + list(gates.loc[gates['exp'].str.contains('MEF'), channel_list].mean().values)
gates.loc[len(gates.index)] = ['MEF-4-1_P10'] + list(gates.loc[gates['exp'].str.contains('MEF'), channel_list].mean().values)
gates.loc[len(gates.index)] = ['MEF-4-1_P14'] + list(gates.loc[gates['exp'].str.contains('MEF'), channel_list].mean().values)
gates.loc[len(gates.index)] = ['MEF-4-1_P16'] + list(gates.loc[gates['exp'].str.contains('MEF'), channel_list].mean().values)

# Indicate which channels are relevant for each experiment
gates.sort_values(['exp'], inplace=True)
gates['marker'] = 'mGL-A'
gates['output'] = 'mRuby2-A'

# Gate data by marker expression
data = data.groupby(['cell_type','virus_batch'])[data.columns].apply(lambda x: base.gate_data(x,gates))
data.reset_index(inplace=True, drop=True)
df = data[(data['expressing']) & (data['virus_dilution']!=0)]

In [None]:
# Bin by marker quantiles
by = ['exp','cell','biorep','construct','dox','virus_dilution']
df['bin_marker_quantiles'] = df.groupby(by)['marker'].transform(lambda x: pd.qcut(x, q=20, duplicates='drop'))
quantiles = df.groupby(by+['bin_marker_quantiles'])['marker'].median().rename('bin_marker_quantiles_median').reset_index()
df_quantiles = df.merge(quantiles, how='left', on=by+['bin_marker_quantiles'])

df_quantiles['marker'] = df_quantiles['marker'].astype(float)
df_quantiles['output'] = df_quantiles['output'].astype(float)

# Population stats
stat_list = [np.std, sp.stats.gmean, sp.stats.variation]
grouped = df_quantiles.groupby(by=by)
stats = grouped[['marker','output']].agg(stat_list).reset_index().dropna()

# Rename columns as 'col_stat'
stats.columns = stats.columns.map(lambda i: base.rename_multilevel_cols(i))
stats['count'] = grouped['output'].count().reset_index()['output']
stats = stats.merge(metadata, how='left', on='construct')
display(stats)

# Quantile stats & slope
df_quantiles['bin_marker_quantiles_median_log'] = df_quantiles['bin_marker_quantiles_median'].apply(np.log10)
df_quantiles['output_log'] = df_quantiles['output'].apply(np.log10)

stat_list = [sp.stats.gmean, ]#sp.stats.gstd, sp.stats.variation]
grouped = df_quantiles.groupby(by=by+['bin_marker_quantiles_median'])
stats_quantiles = grouped[['marker','output']].agg(stat_list).reset_index().dropna()

# Rename columns as 'col_stat'
stats_quantiles.columns = stats_quantiles.columns.map(lambda i: base.rename_multilevel_cols(i))
stats_quantiles['count'] = grouped['output'].count().reset_index()['output']
stats_quantiles = stats_quantiles.merge(metadata, how='left', on='construct')

stats_quantiles['bin_marker_quantiles_median_log'] = stats_quantiles['bin_marker_quantiles_median'].apply(np.log10)
stats_quantiles['output_gmean_log'] = stats_quantiles['output_gmean'].apply(np.log10)

# Compute slope for all constructs
def get_slope(df):
    slope, intercept, r_value, p_value, stderr = sp.stats.linregress(df['bin_marker_quantiles_median_log'], df['output_gmean_log'])
    result = pd.DataFrame(columns=['slope', 'intercept_log', 'r_value', 'p_value', 'stderr'])
    result.loc[len(result.index)] = [slope, intercept, r_value, p_value, stderr]
    return result

fits = stats_quantiles.groupby(by)[stats_quantiles.columns].apply(get_slope).reset_index()
fits['intercept'] = fits['intercept_log'].apply(lambda x: 10**x)
fits = fits.merge(metadata, how='left', on='construct')
display(fits)

In [None]:
# Visualize histograms for each biorep
plot_df = data[data['group'].isin(['controller','base'])]
g = sns.displot(data=plot_df, x='marker', hue='construct', palette=designs_palette, kind='kde',
                row='virus_dilution', col='exp', log_scale=True, fill=False, common_norm=False,
                height=2, facet_kws=dict(margin_titles=True))
g.figure.savefig(rd.outfile(output_path/(f'hist_marker.svg')), bbox_inches='tight')

In [None]:
plot_df = df[df['group'].isin(['controller','base'])]
g = sns.displot(data=plot_df, x='output', hue='construct', palette=designs_palette, kind='kde',
                row='virus_dilution', col='exp', log_scale=True, fill=False, common_norm=False,
                height=2, facet_kws=dict(margin_titles=True))
g.figure.savefig(rd.outfile(output_path/(f'hist_output.svg')), bbox_inches='tight')

In [None]:
fig, axes = plt.subplots(2,3, gridspec_kw=dict(wspace=0.5))
plot_df = stats[stats['group'].isin(['controller','base']) & (stats['dox']==1000) & (stats['virus_dilution']==1)]
plot_df2 = fits[fits['group'].isin(['controller','base']) & (fits['dox']==1000) & (fits['virus_dilution']==1)]
for i, (name, group) in enumerate(plot_df.groupby('cell')):
    ax = axes[i,0]
    sns.stripplot(data=group, x='design', y='output_gmean', ax=ax, hue='construct',
                  palette=designs_palette, legend=False,)
    ax.set(yscale='log')

    ax = axes[i,1]
    sns.stripplot(data=group, x='design', y='output_std', ax=ax, hue='construct',
                  palette=designs_palette, legend=False,)
    ax.set(yscale='log')

for i, (name, group) in enumerate(plot_df2.groupby('cell')):
    ax = axes[i,2]
    sns.stripplot(data=group, x='design', y='slope', ax=ax, hue='construct',
                  palette=designs_palette, legend=False,)
    #ax.set(ylin='log')

for ax in axes.flatten():
    sns.despine(ax=ax)

Plot titrations

In [None]:
plot_df = stats_quantiles[stats_quantiles['group'].isin(['controller','base']) & (stats_quantiles['dox']==1000) 
                          & ~((stats_quantiles['biorep']<4) & (stats_quantiles['design']==3))]
g = sns.FacetGrid(data=plot_df, hue='virus_dilution', palette='viridis', row='cell', col='construct', 
                  margin_titles=True, height=3)
g.map(sns.scatterplot, 'bin_marker_quantiles_median', 'output_gmean')
for ax in g.axes.flatten():
    ax.set_xscale('log')
    ax.set_yscale('log')
g.figure.savefig(rd.outfile(output_path/(f'scatter_virus-dilutions.svg')), bbox_inches='tight')

Lenti infections do not differ in output expression as a function of titer. It seems that titer changes fraction of infected cells but does not escalate to multiple infections per cell.