In [None]:
import base
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rushd as rd
import scipy as sp
import seaborn as sns

# enables concurrent editing of base.py
from importlib import reload
reload(base)

sns.set_style('ticks')
sns.set_context('talk',rc={'font.family': 'sans-serif', 'font.sans-serif':['Helvetica Neue']})

## Setup

- Load data
- Add metadata
- Draw gates
- Gate transfected cells

Result from this section: DataFrame `df` representing transfected cells.

Load all transfection data collected as of 2024.03.27

In [None]:
base_path = rd.datadir/'instruments'/'data'/'attune'/'kasey'

exp45_1_path = base_path/'2022.07.15_exp45'
exp45_2_path = base_path/'2023.01.14_exp45.2'/'export'
exp45_3_path = base_path/'2023.02.10_exp45.3'/'export'

exp55_1_path = base_path/'2023.01.14_exp55'/'export'
exp55_2_path = base_path/'2023.02.10_exp55.2'/'export'
exp55_3_path = base_path/'2024.03.18_exp55.3'/'export'

exp67_1_path = base_path/'2023.06.15_exp67'/'export'
exp67_2_path = base_path/'2023.06.17_exp67.2'/'export'
exp67_3_path = base_path/'2024.03.18_exp67.3'/'export'

plates = pd.DataFrame({
    'data_path': [exp45_1_path/'293T'/'export', exp45_2_path, exp45_3_path,
                  exp55_1_path, exp55_2_path, exp55_3_path,
                  exp67_1_path/'plate1', exp67_1_path/'plate2', exp67_2_path/'plate1', exp67_2_path/'plate2', exp67_3_path/'plate1', exp67_3_path/'plate2'],
    'yaml_path': ([exp45_1_path/'exp45_wells2.yaml', exp45_2_path/'exp45.2_wells2.yaml', exp45_2_path/'exp45.2_wells2.yaml'] + 
                  [exp55_1_path/'exp55_wells2.yaml']*3 + 
                  [exp67_1_path/'exp67_plate1_wells2.yaml', exp67_1_path/'exp67_plate2_wells2.yaml']*2 + [exp67_3_path/'exp67.3_plate1_wells.yaml', exp67_3_path/'exp67.3_plate2_wells.yaml']),
    'biorep': [1, 2, 3,
               1, 2, 3,
               1, 1, 2, 2, 3, 3],
    'exp': ['exp45', 'exp45.2', 'exp45.3',
            'exp55', 'exp55.2', 'exp55.3',
            'exp67', 'exp67', 'exp67.2', 'exp67.2', 'exp67.3', 'exp67.3']
})

output_path = rd.rootdir/'output'/'controller-tuning-transfections'
cache_path = rd.rootdir/'output'/'controller-tuning-transfections'/'data.gzip'

metadata_keys = set()
for p in plates['yaml_path'].unique():
    rd.plot.plot_well_metadata(p)
    metadata_keys.update(rd.flow.load_well_metadata(p).keys())
display(metadata_keys)

In [None]:
# Load data
data = pd.DataFrame()
if cache_path.is_file(): data = pd.read_parquet(cache_path)
else: 
    data = rd.flow.load_groups_with_metadata(plates)

    # Remove unneeded columns and negative channel values
    channel_list = ['mCherry-A','mRuby2-A','FSC-A','SSC-A','tagBFP-A','mGL-A']
    for c in data.columns:
        if not(c in (list(plates.columns) + channel_list + list(metadata_keys))):
            data.drop(columns=c, inplace=True)
    for c in channel_list: data = data[data[c]>0]
    
    data.dropna(inplace=True)
    data.to_parquet(rd.outfile(cache_path))
display(data)

In [None]:
# Add metadata for constructs
metadata = base.get_metadata('construct-metadata.csv')
data = data.merge(metadata, how='left', on='construct')
display(data)

Draw gates based on untransfected population, then gate transfected cells

In [None]:
gates = pd.DataFrame()
for channel in ['tagBFP-A', 'mGL-A', 'mCherry-A', 'mRuby2-A']:
    gates[channel] = data[data['construct']=='UT'].groupby(['exp'])[channel].apply(lambda x: x.quantile(0.999))
gates.reset_index(inplace=True)

# Add missing gates
gates.loc[gates['exp']=='exp67.2', 'mGL-A'] = gates.loc[gates['exp']=='exp67', 'mGL-A'].values[0]
gates.loc[gates['exp']=='exp67.2', 'mRuby2-A'] = gates.loc[gates['exp']=='exp67', 'mRuby2-A'].values[0]

# Indicate which channels are relevant for each experiment
gates.sort_values(['exp'], inplace=True)
gates['marker'] = ['tagBFP-A']*6 + ['mGL-A']*3
gates['output'] = ['mCherry-A']*6 + ['mRuby2-A']*3

display(gates)

Gate data per experiment based on transfection marker expression

In [None]:
data = data.groupby('exp')[data.columns].apply(lambda x: base.gate_data(x,gates))
data.reset_index(inplace=True, drop=True)
df = data[data['expressing']]
display(df)

## Summarize

Create summary plots of common metrics across all the data.

Note that these plots are mainly FacetGrids, not artisanal plots suitable for figures/presentations.