In [None]:
import base
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rushd as rd
import scipy.stats
import seaborn as sns

# enables concurrent editing of base.py
from importlib import reload
reload(base)

sns.set_style('ticks')
sns.set_context('talk',rc={'font.family': 'sans-serif', 'font.sans-serif':['Helvetica Neue']})

## Setup

- Load data
- Add metadata
- Draw gates
- Gate transfected cells

Result from this section: DataFrame `df` representing transfected cells.

Load all lenti collection data collected as of 2024.03.27

In [None]:
base_path = rd.datadir/'instruments'/'data'/'attune'/'kasey'/'2024.04.05_exp89'/'export'

plates = pd.DataFrame({
    'data_path': [base_path/'293T_control', base_path/'293T_plate1', base_path/'293T_plate2', base_path/'293T_plate3',
                  base_path/'MEF_3_plate1', base_path/'MEF_4-1_plate1', base_path/'MEF_4-1_plate2', base_path/'MEF_4-1_plate3'],
    'yaml_path': [base_path/'plate_control.yaml', base_path/'plate01.yaml', base_path/'plate02.yaml', base_path/'plate03.yaml',
                  base_path/'mef_3_plate01.yaml', base_path/'mef_4-1_plate01.yaml', base_path/'mef_4-1_plate02.yaml', base_path/'mef_4-1_plate03.yaml'

    ],
})

output_path = rd.rootdir/'output'/'lenti-exp89'
cache_path = output_path/'data.gzip'

metadata_keys = set()
for p in plates['yaml_path'].unique():
    print(p)
    rd.plot.plot_well_metadata(p)
    metadata_keys.update(rd.flow.load_well_metadata(p).keys())
display(metadata_keys)

In [None]:
# Load data
data = pd.DataFrame()
if cache_path.is_file(): data = pd.read_parquet(cache_path)
else: 
    channel_list = ['mCherry-A','mRuby2-A','FSC-A','SSC-A','tagBFP-A','mGL-A']
    data = rd.flow.load_groups_with_metadata(plates, columns=channel_list)

    # Remove negative channel values
    for c in channel_list: data = data[data[c]>0]
    
    data.to_parquet(rd.outfile(cache_path))
# Create dicts to specify colors/markers
metadata = base.get_metadata(rd.datadir/'projects'/'miR-iFFL'/'plasmids'/'construct-metadata.xlsx')
metadata['TS'] = metadata['ts_kind']
metadata_dict = metadata.set_index('construct').to_dict('dict')
construct_palette = metadata_dict['color']
construct_markers = metadata_dict['markers']
display(data)

In [None]:
gates = pd.DataFrame()
for channel in ['mGL-A', 'mRuby2-A']:
    gates[channel] = data[data['construct']=='untransfected'].groupby(['cell'])[channel].apply(lambda x: x.quantile(0.9998))
gates.reset_index(inplace=True)
untransfected_mR2_gmean = scipy.stats.gmean(data[data.construct == 'untransfected']['mRuby2-A'])

# Indicate which channels are relevant for each experiment
#gates.sort_values(['exp'], inplace=True)
#gates['marker'] = ['tagBFP-A']*6 + ['mGL-A']*3
#gates['output'] = ['mCherry-A']*6 + ['mRuby2-A']*3

display(gates)

Gate data based on the transfection marker

n.b. we have to reuse the 293T gate for the MEFs because we didn't have untransfected MEFs :(

In [None]:
g = sns.FacetGrid(data=data, hue='dox', row='construct', col='cell')
g.map(sns.kdeplot, 'mGL-A', log_scale=True, common_norm=False)
for ax in g.axes.flatten():
    ax.axvline(gates.iloc[0]['mGL-A'], ls=':', color='k')

In [None]:
gated = data[(data['mGL-A'] > gates.iloc[0]['mGL-A']) & (data.construct != 'untransfected')].copy()
display(gated)

In [None]:
g = sns.FacetGrid(data=gated, hue='dox', row='construct', col='cell')
g.map(sns.kdeplot, 'mRuby2-A', log_scale=True, common_norm=False)

### Binning
For each cell type and dox condition, pick 30 quantiles to discretize into, and assign cells to corresponding bins.
Then, groupby per condition on bins and compute gmeans

In [None]:
n_quantiles = 20
transfection_bin = gated.groupby(['cell', 'dox', 'construct']).apply(lambda df: pd.qcut(df['mGL-A'], n_quantiles, labels=np.linspace(0,1,n_quantiles, endpoint=False)), include_groups=False).reset_index().set_index('level_3')['mGL-A']
gated['transfection_bin'] = transfection_bin

In [None]:
def compute_bin_gmeans(df):
    # Calculate the gmean by bin
    return df.loc[:, ('mGL-A', 'mRuby2-A')].apply(scipy.stats.gmean)
bin_gmeans = (gated.groupby(['cell', 'dox', 'construct', 'lenti_293T_passage', 'transfection_bin'], observed=True)
      .apply(compute_bin_gmeans, include_groups=False)
      .rename(columns={"mGL-A": "mGL-bin-gmean", "mRuby2-A": "mRuby2-bin-gmean"}).reset_index())
gated_with_bins = pd.merge(gated, bin_gmeans, on=['cell', 'dox', 'construct', 'lenti_293T_passage', 'transfection_bin'], validate="many_to_one")

In [None]:
bin_gmeans.transfection_bin

### Example of the aspect ratio and area under the curve
1. Select a quantile range to select. Here, we use the 5% quantile to the 95% quantile range.
2. Compute the fold change between these to get the aspect ratio

In [None]:
for construct, color in zip(['RC130', 'RC131'], ['gray', 'teal']):
    palette={0: 'k', 1: color}
    example_df = bin_gmeans[(bin_gmeans.cell == '293T') & (bin_gmeans.construct == construct) & (bin_gmeans.lenti_293T_passage == 10) & (bin_gmeans.dox == 1000)].copy()
    example_df['in_range'] = (example_df.transfection_bin.astype(float) >= 0.05) & (example_df.transfection_bin.astype(float) <= 0.95)
    indexed_df = example_df.set_index('transfection_bin')
    ax = sns.scatterplot(data=example_df, x='mGL-bin-gmean', y='mRuby2-bin-gmean', hue='in_range', palette=palette, legend=None)

    mGL_vals = indexed_df.iloc[1:19]['mGL-bin-gmean'].values
    mRuby2_vals = indexed_df.iloc[1:19]['mRuby2-bin-gmean'].values
    example_df['lognorm-mGL'] = (np.log(example_df['mGL-bin-gmean']) - np.log(np.min(mGL_vals))) / (np.log(np.max(mGL_vals)) - np.log(np.min(mGL_vals)))
    example_df['lognorm-mRuby2'] = (np.log(example_df['mRuby2-bin-gmean']) - np.log(np.min(mRuby2_vals))) / (np.log(np.max(mRuby2_vals)) - np.log(np.min(mRuby2_vals)))
    ax.add_patch(matplotlib.patches.Rectangle(
        (np.min(mGL_vals), np.min(mRuby2_vals)),
        np.max(mGL_vals) - np.min(mGL_vals),
        np.max(mRuby2_vals) - np.min(mRuby2_vals),
        facecolor='0.7',
        edgecolor=None,
        zorder=-10
    ))
    aspect_ratio =  (
        (np.log(np.max(mRuby2_vals)) - np.log(np.min(mRuby2_vals))) /
        (np.log(np.max(mGL_vals)) - np.log(np.min(mGL_vals)))
    )
    ax.text(0.02, 0.93, f'aspect ratio = {aspect_ratio:0.2f}', transform=ax.transAxes)
    ax.set_xscale('log')
    ax.set_yscale('log')
    ax.set_xlim(3e2, 6e4)
    ax.set_ylim(2e1, 3e3)
    ax.set_aspect('equal')
    plt.savefig(rd.outfile(rd.rootdir/'output'/'lenti-exp89'/f'aspect_explanation_{construct}.svg'), bbox_inches='tight')
    plt.show()

    # calculate the normalized plots
    ax = sns.scatterplot(data=example_df, x='lognorm-mGL', y='lognorm-mRuby2', hue='in_range', palette=palette, legend=None)
    sns.lineplot(data=example_df, x='lognorm-mGL', y='lognorm-mRuby2', color=color, ax=ax, legend=None)
    ax.fill_between(example_df['lognorm-mGL'], example_df['lognorm-mRuby2'], color=color, alpha=0.2)
    aoc = np.trapz(example_df['lognorm-mRuby2'].values, example_df['lognorm-mGL'].values)
    ax.text(0.02, 0.93, f'AOC = {aoc:0.2f}', transform=ax.transAxes)
    ax.set_xlim(0,1)
    ax.set_ylim(0,1)
    ax.set_aspect('equal')
    plt.savefig(rd.outfile(rd.rootdir/'output'/'lenti-exp89'/f'aoc_explanation_{construct}.svg'), bbox_inches='tight')
    plt.show()
    

In [None]:
np.ones((2,2)) * np.ma.masked

In [None]:
def compute_aspect_ratio_aoc(df):
    """Calculates both the "aspect ratio" and (normed) area under curve given summary gmean data

    The aspect ratio is the fold change of mRuby2 divided by the fold change of mGL (infection reporter).
    
    The area under curve is a normalized area under curve which tells us something about
    the concavity / convexity.

    We drop the lower and upper 5% bins to make this more robust.
    """

    indexed_gmeans = df.set_index('transfection_bin')

    mGL_vals = indexed_gmeans.iloc[1:19]['mGL-bin-gmean'].values
    mRuby2_vals = indexed_gmeans.iloc[1:19]['mRuby2-bin-gmean'].values
    lognorm_mGL = (np.log(mGL_vals) - np.log(np.min(mGL_vals))) / (np.log(np.max(mGL_vals)) - np.log(np.min(mGL_vals)))
    lognorm_mRuby2 = (np.log(mRuby2_vals) - np.log(np.min(mRuby2_vals))) / (np.log(np.max(mRuby2_vals)) - np.log(np.min(mRuby2_vals)))

    aspect_ratio =  (
        (np.log(np.max(mRuby2_vals)) - np.log(np.min(mRuby2_vals))) /
        (np.log(np.max(mGL_vals)) - np.log(np.min(mGL_vals)))
    )

    # x axis
    aoc = np.trapz(lognorm_mRuby2, lognorm_mGL)
    return pd.DataFrame({"aspect": [aspect_ratio], "aoc": [aoc]})
aspect_aoc = bin_gmeans[bin_gmeans.dox == 1000].groupby(['cell', 'dox', 'construct', 'lenti_293T_passage']).apply(compute_aspect_ratio_aoc, include_groups=False).reset_index().drop(columns=["level_4"])
display(aspect_aoc)

In [None]:
def adjust_heatmap(ax, xspaces, yspaces, spacewidth=0.25):
    """
    Adjust a generated heatmap, to add blank row/column spaces at specific locations.

    First, to add spaces, we adjust the coordinate / plotted array to insert blank empty rows/columns.
    Then, we mark those blank rectangles as masked, so they aren't drawn.
    Finally, we update the axis limits and tick locations to match
    """
    quadmesh = ax.get_children()[0]
    coords = quadmesh._coordinates.astype('float64')
    array = quadmesh.get_array()
    for idx, loc in enumerate(xspaces):
        coords = np.concatenate((coords[:,:(loc + idx),:], coords[:,[loc+idx],:], coords[:,(loc+idx):,:] + np.array([spacewidth,0.0]).reshape((1,1,2))), axis=1)
        array = np.concatenate((array[:,:(loc + idx)], np.zeros(array[:,[loc+idx]].shape), array[:,(loc+idx):]), axis=1)
    for idx, loc in enumerate(yspaces):
        coords = np.concatenate((coords[:(loc + idx),:,:], coords[[loc+idx],:,:], coords[(loc+idx):,:,:] + np.array([0.0, spacewidth]).reshape((1,1,2))), axis=0)
        array = np.concatenate((array[:(loc + idx),:], np.zeros(array[[loc+idx],:].shape), array[(loc+idx):,:]), axis=0)

    for idx, loc in enumerate(xspaces):
        array[:,loc+idx] = np.ma.masked
    for idx, loc in enumerate(yspaces):
        array[loc+idx,:] = np.ma.masked
    
    quadmesh._coordinates = coords
    quadmesh.set_array(array)

    ax.set_xlim(np.min(coords[:,:,0]), np.max(coords[:,:,0])+0.5)
    ax.set_ylim(np.min(coords[:,:,1]), np.max(coords[:,:,1])+0.5)
    xtick_locations = coords[0,:-1,0][~array.mask[0,:]] + 0.5
    ax.set_xticks(xtick_locations, labels=ax.get_xticklabels())
    ytick_locations = coords[:-1,0,1][~array.mask[:,0]] + 0.5
    ax.set_yticks(ytick_locations, labels=ax.get_yticklabels())

plot_df = aspect_aoc.copy()
plot_df['condition'] = plot_df.cell + '_' + plot_df.lenti_293T_passage.map(str)
ax = sns.heatmap(plot_df.pivot(index="condition", columns="construct", values="aspect"), cmap='viridis_r', vmin=0, vmax=1, square=True)
adjust_heatmap(ax, [6,8,10], [3,4])
ax.set_title('Aspect ratio (lower is better)')
rd.plot.generate_xticklabels(metadata, 'construct', ['ts_kind', 'design'], linespacing=1.4)
ax.tick_params(axis='x', labelrotation=0)
plt.savefig(rd.outfile(rd.rootdir/'output'/'lenti-exp89'/'aspect_ratio.svg'), bbox_inches='tight')
plt.show()
ax = sns.heatmap(plot_df.pivot(index="condition", columns="construct", values="aoc"), cmap='RdYlGn', vmin=0, vmax=1, square=True)
adjust_heatmap(ax, [6,8,10], [3,4])
ax.set_title('AOC (greater than 0.5 is better)')
rd.plot.generate_xticklabels(metadata, 'construct', ['ts_kind', 'design'], linespacing=1.4)
ax.tick_params(axis='x', labelrotation=0)
plt.savefig(rd.outfile(rd.rootdir/'output'/'lenti-exp89'/'aoc.svg'), bbox_inches='tight')
plt.show()

In [None]:
g = sns.FacetGrid(data=bin_gmeans[bin_gmeans.dox == 1000], row='cell', col='construct', hue='lenti_293T_passage', margin_titles=True)
g.map(sns.scatterplot, 'mGL-bin-gmean', 'mRuby2-bin-gmean')
plt.savefig(rd.outfile(rd.rootdir/'output'/'lenti-exp89'/'raw_gmeans_linear.svg'), bbox_inches='tight')
plt.show()
g = sns.FacetGrid(data=bin_gmeans[bin_gmeans.dox == 1000], row='cell', col='construct', hue='lenti_293T_passage', margin_titles=True)
g.map(sns.scatterplot, 'mGL-bin-gmean', 'mRuby2-bin-gmean')
metadata_indexed = metadata.set_index('construct')
for ax, construct in zip(g.axes.flatten(), [f'RC{i}' for i in range(124,136)] * 3):
    ax.set_xscale('log')
    ax.set_yscale('log')
    ax.axhline(untransfected_mR2_gmean, color='k', ls=':', zorder=5)
    ax.set_facecolor((metadata_indexed.loc[construct, 'color'], 0.2))
plt.savefig(rd.outfile(rd.rootdir/'output'/'lenti-exp89'/'raw_gmeans_log.svg'), bbox_inches='tight')
plt.show()

In [None]:
g = sns.FacetGrid(data=gated_with_bins[gated_with_bins.dox == 1000], row='cell', col='construct', hue='lenti_293T_passage', margin_titles=True, col_order=[f'RC{i}' for i in range(124,136)])
g.map(sns.lineplot, 'mGL-bin-gmean', 'mRuby2-A', estimator=scipy.stats.gmean)
for ax in g.axes.flatten():
    ax.set_xscale('log')
    ax.set_yscale('log')
    ax.axhline(untransfected_mR2_gmean, color='k', ls=':', zorder=5)

In [None]:
metadata[(metadata.construct>='RC124') & (metadata.construct<='RC135')]