### Setup

In [None]:
from dataclasses import dataclass
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path
import pickle
import rushd as rd
import scipy as sp
import seaborn as sns
from typing import Optional

sns.set_style('ticks')
sns.set_context('talk',rc={'font.family': 'sans-serif', 'font.sans-serif':['Helvetica Neue']})

datadir = Path(Path('../datadir.txt').read_text())

In [None]:
# Load data
file_path = datadir/'instruments'/'data'/'attune'/'Emma'/'2022.10.11_EXP11'/'Data'
yaml_path = file_path/'wells.yaml'
data = rd.flow.load_csv_with_metadata(file_path, yaml_path)
x = 'mGL-A'
y = 'mRuby2-A'

d = data.loc[(data[y]>0) & (data[x]>0)]
display(d)

# Define path to output folder 
output_path = rd.rootdir/'output'/'2022.10.15_EXP11'

# indicate which channels should have linear-scale axes when plotting
# other channels will be plotted with log-scale by default
lin = set(['FSC-A','FSC-H','FSC-W','SSC-A','SSC-H','SSC-W'])

### Attribute Mapping (from KL exp42)

In [None]:
'''
This code maps conditions to colors. More specifically, it connects conditions, colors (color and shade),
and condition names (label). This helps to make plotting faster/easier/more automatic with pretty legends.
It is by no means the optimal implementation, and the dataclass data type/structure in python is 
something I found by googling. I'll talk to Chris about a more robust way to do this kind of color mapping,
but for now here's how I did it.
'''
@dataclass
class Attr:
    label: str
    color: str = ''
    shade: int = 0

'''
Function to perform the mapping given attribute dictionaries (easy to specify).
It adds two columns to the dataframe: Condition (shorthand, values separated by
a delimiter) and ConditionLabel (long name for nice legends/better interpretability).
It also returns a dictionary for converting Condition to ConditionLabel (in case
you want to repeat this elsewhere later) and a color palette dictionary compatible
with seaborn (use hue='ConditionLabel').
'''
def get_mapping(
    df: pd.DataFrame,
    col: list, # column names
    attr_color: dict, 
    attr_shade: dict, 
    n_shades: int, 
    delim: Optional[tuple] = ('',' ')
    ):

    labeler = {}
    palette = {}

    # Custom color palette (maps generic color names to specific hues)
    colors = {
        'red': 'crimson', 'orange': 'darkorange', 'yellow': '#ccb804', 'green': 'olive', 'teal': 'teal', 
        'blue': '#1650a1', 'purple': 'purple', 'pink': 'hotpink', 'brown': 'black', 'gray': 'grey'
    }
    # nice color groupings with this palette: 
    #   teal   red   orange  (pink)
    #   purple blue  green   (yellow)

    for val_c, attr_c in attr_color.items():
        for val_s, attr_s in attr_shade.items():
            condition = val_c + delim[0] + val_s
            condition_label = attr_c.label + delim[1] + attr_s.label
            color = colors[attr_c.color]

            # Default way to map shades using the built-in seaborn light palette
            #   Maps from original color (index 0) to white (index -1)
            color_map = sns.light_palette(color, n_shades, reverse=True)
            hue = color_map[attr_s.shade]

            labeler[condition] = condition_label
            palette[condition_label] = hue

    # Add columns to dataframe with condition labels
    df['Condition'] = df[col[0]].astype(str) + delim[0] + df[col[1]].astype(str)
    df['ConditionLabel'] = df['Condition'].replace(labeler)
    
    return labeler, palette 

In [None]:

# Attribute dictionary mapping constructs (values in 'Construct' column) to 
#   full names/labels and colors
construct_attr = {
    'CMV-miR-FF3': Attr('CMV-mRuby2(miR-FF3)', 'yellow'),
    'CMV-miR-FF4': Attr('CMV-mRuby2(miR-FF4)', 'purple'), 
    'CMV-miR-FF5': Attr('CMV-mRuby2(miR-FF5)', 'blue'),
    'CMV-miR-FF6': Attr('CMV-mRuby2(miR-FF6)', 'green'),
    'PGK-miR-FF3': Attr('PGK-mRuby2(miR-FF3)', 'yellow'),
    'PGK-miR-FF4': Attr('PGK-mRuby2(miR-FF4)', 'purple'), 
    'PGK-miR-FF5': Attr('PGK-mRuby2(miR-FF5)', 'blue'),
    'PGK-miR-FF6': Attr('PGK-mRuby2(miR-FF6)', 'green')
}

# Attribute dictionary mapping dox concentrations (values in 'Dox' column) to
#   additional label info (added to end of full construct name) and color shade
reporter_attr = {
    '0': Attr('PGK-mGL', shade=0),
    '1': Attr('PGK-mGL-TSx1', shade=1),
    '2': Attr('PGK-mGL-TSx2', shade=2),
    '4': Attr('PGK-mGL-TSx4', shade=3)
}

n_shades = 5
labeler, condition_palette = get_mapping(d, ['Construct','TS'], construct_attr, reporter_attr, n_shades, delim=('_',' + '))
display(d)
#display(labeler)
'''
For a dox titration, one way to use this shade parameter would be the following:
n_shades = (number of dox concentrations) + 1
dox_attr = {
    '1000': Attr(' + 1000 ng/uL dox', shade=0),
    '100': Attr(' + 100 ng/uL dox', shade=1),
    '0': Attr(' - dox', shade=2)
}
'''

### Histograms

In [None]:
# Histogram plots

def plot_hists(d,param,sample_list,name,xlim=None, fig_title='',gate=None):

    plt.figure(figsize=(6,5))

    # kde plots
    g = sns.kdeplot(data=d, x=param, hue='ConditionLabel', hue_order=sample_list,
                    palette=condition_palette, fill=True, log_scale=(x not in lin),
                    clip=(0.1,1e7), alpha = 0.1, legend=True)
    sns.move_legend(g, title='Condition', frameon=True,
                        loc='upper center',bbox_to_anchor=(0.5,-0.2))
    
    g.set_title(fig_title)
    if gate is not None: g.axvline(gate,ls='--',c='gray',alpha=0.5)

    # Format axes
    sns.despine(ax=g)
    if xlim is not None: g.set_xlim(xlim)
    g.minorticks_off()

    # Save figure as .svg
    plt.savefig(str(output_path)+'/hist-'+x+name+'.svg',bbox_inches='tight')

# From EXP10 gating:
gate_mGL = 5e2
gate_mRuby2 = 2e2

In [None]:
d = d.loc[d['mRuby2-A']>gate_mRuby2]

plot_list = {
    'CMV-FF3': ['CMV-miR-FF3_0','CMV-miR-FF3_1','CMV-miR-FF3_2','CMV-miR-FF3_4'],
    'CMV-FF4': ['CMV-miR-FF4_0','CMV-miR-FF4_1','CMV-miR-FF4_2','CMV-miR-FF4_4'],
    'CMV-FF5': ['CMV-miR-FF5_0','CMV-miR-FF5_1','CMV-miR-FF5_2','CMV-miR-FF5_4'],
    'CMV-FF6': ['CMV-miR-FF6_0','CMV-miR-FF6_1','CMV-miR-FF6_2','CMV-miR-FF6_4']
    
}
for name, sample_list in plot_list.items():
    sample_list = [labeler[s] for s in sample_list]
    dd = d.loc[d['ConditionLabel'].isin(sample_list)]
    num_cells = dd.groupby(['Condition','Replicate'])[x].count().min()
    print(num_cells)
    dd = dd.groupby(['Condition','Replicate']).sample(n=num_cells, random_state=1)
    plot_hists(dd,x,sample_list,xlim=(1e0,1e6),gate=gate_mGL,name=name,fig_title=name)

In [None]:
plot_list = {
    'PGK-FF3': ['PGK-miR-FF3_0','PGK-miR-FF3_1','PGK-miR-FF3_2','PGK-miR-FF3_4'],
    'PGK-FF4': ['PGK-miR-FF4_0','PGK-miR-FF4_1','PGK-miR-FF4_2','PGK-miR-FF4_4'],
    'PGK-FF5': ['PGK-miR-FF5_0','PGK-miR-FF5_1','PGK-miR-FF5_2','PGK-miR-FF5_4'],
    'PGK-FF6': ['PGK-miR-FF6_0','PGK-miR-FF6_1','PGK-miR-FF6_2','PGK-miR-FF6_4']
    
}
for name, sample_list in plot_list.items():
    sample_list = [labeler[s] for s in sample_list]
    dd = d.loc[d['ConditionLabel'].isin(sample_list)]
    num_cells = dd.groupby(['Condition','Replicate'])[x].count().min()
    print(num_cells)
    dd = dd.groupby(['Condition','Replicate']).sample(n=num_cells, random_state=1)
    plot_hists(dd,x,sample_list,xlim=(1e0,1e6),gate=gate_mGL,name=name,fig_title=name)

### Fold Change

In [None]:
# Functions related to calculating summary statistics for conditions
def rename(index):
    if index[1] == '': return index[0]
    else: return index[0] + '_' + index[1]

def calc_stats(df,by,x,stat):
    
    # Filter data to remove log-unfriendly values
    d = df.copy()
    for xi in x: d = d.loc[(d[xi]>0)]

    # Group and compute stats
    grouped = d.groupby(by=by)
    stats = grouped[x].agg(stat).reset_index()

    # Rename columns as 'col_stat'
    stats.columns = stats.columns.map(lambda i: rename(i))

    # Add columns for count and fraction (of total one grouping level up)
    s = grouped[x[0]].count()
    if len(by) > 1:
        s = (s/s.groupby(by[:-1]).transform('sum')).dropna().reset_index(name='Fraction')
        stats = stats.assign(Fraction=s['Fraction'])
    s['Count'] = grouped[x[0]].count().rename('Count').reset_index()['Count']
    stats = stats.assign(Count=s['Count'])

    return stats

# Calculate statistics on samples
x = ['mGL-A','mRuby2-A']
stat = [sp.stats.gmean, np.median, np.std]
stats = calc_stats(d,['Construct','TS','Replicate'],x,stat)
display(stats)

# Add condition labels for plotting (i.e. 'Condition' and 'ConditionLabel' columns)
get_mapping(stats, ['Construct','TS'], construct_attr, reporter_attr, n_shades, delim=('_',' + '))

# Compute avg & sd across replicates for relevant stats
#  (more useful for biological replicates)
x = ['mGL-A_gmean','mRuby2-A_gmean']
stat = [np.mean, np.std]
stats2 = calc_stats(stats,['Construct','TS'],x,stat)
display(stats2)

In [None]:
# Function to calculate fold-change between two groups
#  (Uses construct label info from above)
def calc_fc(df, by, var, var_norm, var_val, skip_list=[]):
    grouped = df.groupby(by)
    fc = pd.DataFrame(columns=[by,'mGL-A_gmean_FC','mRuby2-A_gmean_FC'])
    for name, group in grouped:
        if name in skip_list: continue
        norm_to = group.loc[(group[var]==var_norm)]
        stat = group.loc[(group[var]==var_val)]
        result_mGL = stat['mGL-A_gmean_mean'].values[0] / norm_to['mGL-A_gmean_mean'].values[0]
        result_mRuby2 = stat['mRuby2-A_gmean_mean'].values[0] / norm_to['mRuby2-A_gmean_mean'].values[0]
        df = pd.DataFrame.from_dict({by: [name], 'mGL-A_gmean_FC': [result_mGL], 'mRuby2-A_gmean_FC': [result_mRuby2]})
        fc = pd.concat([fc, df])
    
    return fc

In [None]:
# Fold change Nothing to No TS -> TS
var_list=[1,2,4]
for var_val in var_list:
    var_norm = 0
    skip_list = []
    fc = calc_fc(stats2, 'Construct', 'TS', var_norm, var_val, skip_list)
    print('Fold change no TS -> TSx'+str(var_val))
    display(fc)


In [None]:
## Make grouped bar plot

def plot_grpbar(d,x_param,y_param,hue_param,colors,plot_title='', extra_name='',ylimit=None,logaxis=False,hue_order=None, gate=None):
    plt.figure(figsize=(6,3))
    #sns.set_theme(style='whitegrid')
    g = sns.stripplot(data=d, x=x_param, y=y_param, hue=hue_param, palette=colors, hue_order=hue_order, dodge=True)
   
    if ylimit is not None: g.set(ylim=ylimit)
    if gate is not None: g.axhline(gate,ls='--',c='gray',alpha=0.5)

    plt.legend(fontsize='16', title_fontsize='18',title='Target\n Sites',bbox_to_anchor=(1.05, 0.95), loc='upper left', borderaxespad=0,columnspacing=0.5,labelspacing=0.25)
    plt.xticks(fontsize='16',va='center')
    g.set_xticklabels(['FF3','FF4','FF5','FF6'])
    sns.despine(ax=g)
    g.set_ylabel('mGL MFI (a.u.)',fontsize=18)
    g.set_yticklabels([])
    g.set_xlabel(None)
    plt.xticks(rotation=0,va='top')
    plt.gca().set_yscale('log')

    # Save figure as .svg
    plt.savefig(str(output_path)+'/grpbar-'+extra_name+'.svg',bbox_inches='tight')
    plt.savefig(str(output_path)+'/grpbar-'+extra_name+'.tiff',bbox_inches='tight',dpi=700)

order=[0,1,2,4]
cp_dark = sns.color_palette('dark')
cp_norm = sns.color_palette('muted')
color_map = sns.light_palette(cp_dark[2], 4, reverse=True)

cmv_stats = stats.loc[((stats['Construct']=='CMV-miR-FF3')|(stats['Construct']=='CMV-miR-FF4')|(stats['Construct']=='CMV-miR-FF5')|(stats['Construct']=='CMV-miR-FF6'))]
pgk_stats = stats.loc[((stats['Construct']=='PGK-miR-FF3')|(stats['Construct']=='PGK-miR-FF4')|(stats['Construct']=='PGK-miR-FF5')|(stats['Construct']=='PGK-miR-FF6'))]

colors=[cp_norm[7],color_map[0],color_map[1],color_map[2]]
x='Construct'
y='mGL-A_gmean'
hue='TS'
plot_grpbar(cmv_stats,x,y,hue,colors,plot_title='GMean mGL-A',extra_name='gmean-mGL-A-log-cmv-poster', logaxis=True,ylimit=(1e2,4e4),hue_order=order,gate=gate_mGL)
plot_grpbar(pgk_stats,x,y,hue,colors,plot_title='GMean mGL-A',extra_name='gmean-mGL-A-log-pgk-poster', logaxis=True,ylimit=(1e2,4e4),hue_order=order,gate=gate_mGL)
#plot_grpbar(quad1,x,y,hue,plot_title='GMean EGFP-A',extra_name='gmean-EGFP-A-lin', logaxis=False)
