### Setup

In [None]:
# Import packages
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import rushd as rd
import seaborn as sns
import sys
import yaml
import itertools

from pathlib import Path
from typing import Any, Dict, List, Set, Tuple, Union, Optional

# Set formatting style for Seaborn visualizations
sns.set_style('ticks')
sns.set_context('talk',rc={'font.family': 'sans-serif', 'font.sans-serif':['Helvetica Neue']})

# Read in path to the main directory with this code, where the path is specified in 'datadir.txt'
datadir = Path(Path('../datadir.txt').read_text())

In [None]:
# Load data from .csv files into one Pandas Dataframe, with each file (sample) annotated
# with metadata from a .yaml file

### EDIT these each time ###
file_path1 = str(datadir/'instruments'/'data'/'attune'/'Emma'/'2022.10.04_EXP9'/'Data')
file_path2 = str(datadir/'instruments'/'data'/'attune'/'Emma'/'2023.01.16_EXP12'/'Data')
file_path3 = str(datadir/'instruments'/'data'/'attune'/'Emma'/'2023.02.09_EXP13'/'Data')
yaml_path1 = file_path1 + '/wells.yaml'
yaml_path2 = file_path2 + '/wells.yaml'
yaml_path3 = file_path3 + '/wells.yaml'
### END EDIT ###

data1 = rd.flow.load_csv_with_metadata(file_path1, yaml_path1)
data2 = rd.flow.load_csv_with_metadata(file_path2, yaml_path2)
data3 = rd.flow.load_csv_with_metadata(file_path3, yaml_path3)
data = pd.concat([data1, data2, data3], ignore_index=True, sort=False)

'''
Alternatively, if you didn't export from FlowJo with the default file naming scheme,
you can specify file name format and include the metadata from the file name:

    regex = r"^.*export_exp29_plate3_(?P<well>[A-G0-9]+)_(?P<population>.+)\.csv" ### EDIT
    data = load_csv_with_metadata(file_path, yaml_path, regex)

See regex101.com for help creating and checking your custom regex.
'''

#display(data1)
#display(data2)
#display(data3)
display(data)

In [None]:
# Define path to output folder 
output_path = rd.rootdir/'output'/'2023.02.15_EXP9_EXP12_EXP13'

# Define color palette
cp_dark = sns.color_palette('dark')
cp_norm = sns.color_palette('muted')
cp_light = sns.color_palette('pastel')
# order: 0 blue, 1 orange, 2 green, 3 red, 4 purple, 5 brown, 6 pink, 7 gray, 8 yellow, 9 teal

# Create an array of additional info for plotting
# [construct, full_name, color, linestyle, markerstyle]
extra_info = [['PGK-miR-FF3.TS-none', 'miR-FF3 + no TS', cp_norm[7], '-', 'o'],
                ['PGK-miR-FF3.TS-FF3x2', 'miR-FF3 + TS-FF3x2', cp_norm[2], '-', 'o'],
                ['PGK-miR-FF3.TS-FF4x2', 'miR-FF3 + TS-FF4x2', cp_norm[0], '-', 'o'],
                ['PGK-miR-FF3.TS-FF5x2', 'miR-FF3 + TS-FF5x2', cp_norm[6], '-', 'o'],
                ['PGK-miR-FF3.TS-FF6x2', 'miR-FF3 + TS-FF6x2', cp_dark[1], '-', 'o'],
                ['PGK-miR-FF4.TS-none', 'miR-FF4 + no TS', cp_norm[7], '-', 'o'],
                ['PGK-miR-FF4.TS-FF3x2', 'miR-FF4 + TS-FF3x2', cp_norm[2], '-', 'o'],
                ['PGK-miR-FF4.TS-FF4x2', 'miR-FF4 + TS-FF4x2', cp_norm[0], '-', 'o'],
                ['PGK-miR-FF4.TS-FF5x2', 'miR-FF4 + TS-FF5x2', cp_norm[6], '-', 'o'],
                ['PGK-miR-FF4.TS-FF6x2', 'miR-FF4 + TS-FF6x2', cp_dark[1], '-', 'o'],
                ['PGK-miR-FF5.TS-none', 'miR-FF5 + no TS', cp_norm[7], '-', 'o'],
                ['PGK-miR-FF5.TS-FF3x2', 'miR-FF5 + TS-FF3x2', cp_norm[2], '-', 'o'],
                ['PGK-miR-FF5.TS-FF4x2', 'miR-FF5 + TS-FF4x2', cp_norm[0], '-', 'o'],
                ['PGK-miR-FF5.TS-FF5x2', 'miR-FF5 + TS-FF5x2', cp_norm[6], '-', 'o'],
                ['PGK-miR-FF5.TS-FF6x2', 'miR-FF5 + TS-FF6x2', cp_dark[1], '-', 'o'],
                ['PGK-miR-FF6.TS-none', 'miR-FF6 + no TS', cp_norm[7], '-', 'o'],
                ['PGK-miR-FF6.TS-FF3x2', 'miR-FF6 + TS-FF3x2', cp_norm[2], '-', 'o'],
                ['PGK-miR-FF6.TS-FF4x2', 'miR-FF6 + TS-FF4x2', cp_norm[0], '-', 'o'],
                ['PGK-miR-FF6.TS-FF5x2', 'miR-FF6 + TS-FF5x2', cp_norm[6], '-', 'o'],
                ['PGK-miR-FF6.TS-FF6x2', 'miR-FF6 + TS-FF6x2', cp_dark[1], '-', 'o'],
                ['PGK-miR-none.TS-none', 'miR-none + no TS', cp_norm[7], '-', 'o'],
                ['PGK-miR-none.TS-FF3x2', 'miR-none + TS-FF3x2', cp_norm[2], '-', 'o'],
                ['PGK-miR-none.TS-FF4x2', 'miR-none + TS-FF4x2', cp_norm[0], '-', 'o'],
                ['PGK-miR-none.TS-FF5x2', 'miR-none + TS-FF5x2', cp_norm[6], '-', 'o'],
                ['PGK-miR-none.TS-FF6x2', 'miR-none + TS-FF6x2', cp_dark[1], '-', 'o'],
                ['PGK-miR-none', 'mRuby2 control', cp_light[3], '-', 'o'],
                ['TS-none', 'mGL Control', cp_light[9], '-', 'o'],
                ['NT', 'Not Transfected', cp_norm[7], '--', 'o']
             ]

### END EDIT ###

# Indicate which channels should have linear-scale axes when plotting
# Other channels will be plotted with log-scale by default
lin = set(['FSC-A','FSC-H','FSC-W','SSC-A','SSC-H','SSC-W'])

# Split up extra info into dicts for plotting
extra_info = pd.DataFrame(data=extra_info, columns=['Sample','Full Name','color','ls','ms'])
full_names = dict(zip(extra_info['Sample'],extra_info['Full Name']))
sample_colors = dict(zip(extra_info['Sample'],extra_info['color']))
sample_colors2 = dict(zip(extra_info['Full Name'],extra_info['color']))
sample_ls = dict(zip(extra_info['Sample'],extra_info['ls']))
sample_ms = dict(zip(extra_info['Sample'],extra_info['ms']))
#display(full_names)
# Add column to Dataframe with full name
data['ConstructLabels'] = data['Construct'].replace(full_names)
display(extra_info)
display(data)

### Quadrant Analysis

In [None]:
# Categorize cells into quadrants based on two gates
# Possible values:
#   0 = double negative
#   1 = x-positive
#   2 = y-positive
#   3 = double positive
def get_quadrant(x,y,gate_x,gate_y):
    df_quad = pd.DataFrame()
    df_quad['x'] = x > gate_x
    df_quad['y'] = y > gate_y
    df_quad['quadrant'] = df_quad['x'].astype(int) + df_quad['y'].astype(int)*2
    return df_quad['quadrant']

#Gates were determined from single replicate analysis    
gate_mGL = 5e2
gate_mRuby2 = 2e2

#Gating for cells that have non-negative mGL value and cells that have the mRuby2 plasmid
d = data.loc[(data['mGL-A']>0) & ((data['mRuby2-A']>gate_mRuby2))]
gate_FSC = 0
#Doing quadrant analysis with FSC=0 separates into mGL+ and mGL- populations
d['Quadrant'] = get_quadrant(d['FSC-A'],d['mGL-A'],gate_FSC,gate_mGL)
#display(data)

# Compute counts and fractions for the quadrants
grouped = d.groupby(by=['Construct','miR','TS','Plate','Quadrant'])
quadrants = grouped['FSC-A'].count().rename('Count')
quadrants = (quadrants/quadrants.groupby(['Construct','miR','TS','Plate']).transform('sum')).dropna().reset_index(name='Fraction')

from scipy.stats import gmean
mean_mGL = grouped['mGL-A'].mean().rename('Mean-mGL-A').reset_index()
gmean_mGL = grouped['mGL-A'].apply(gmean).rename('GMean-mGL-A').reset_index()
quadrants['Mean-mGL-A'] = mean_mGL['Mean-mGL-A']
quadrants['GMean-mGL-A'] = gmean_mGL['GMean-mGL-A']

# Add labels for making nicer plots
quadrant_labels = {0:'None', 1:'mGL Negative', 2:'None', 3:'mGL Positive'}
quadrants['QuadrantLabels'] = quadrants['Quadrant'].replace(quadrant_labels)
quadrants['OppositeFraction'] = 1-quadrants['Fraction']

quadrants_main = quadrants.copy()

# ignore populations with fewer than 50 cells
quadrants = quadrants.loc[quadrants['Fraction'] > (50/1e4)]

#display(quadrants)
quad1=quadrants.loc[(quadrants['QuadrantLabels']=='mGL Positive')&(quadrants['miR']!='NA')&(quadrants['TS']!='NA')]
display(quad1)
quad2=quadrants.loc[(quadrants['QuadrantLabels']=='mGL Positive')&(quadrants['miR']!='NA')&(quadrants['miR']!='None')&(quadrants['TS']!='NA')]


In [None]:
# Function to calculate fold-change between two groups
#  (Uses construct label info from above)
def calc_fc(df, by, var, var_norm, var_val, skip_list=[]):
    grouped = df.groupby(by)
    fc = pd.DataFrame(columns=[by,'mGL-A_gmean_FC'])
    for name, group in grouped:
        if name in skip_list: continue
        norm_to = group.loc[(group[var]==var_norm)]
        stat = group.loc[(group[var]==var_val)]
        result_mGL = stat['GMean-mGL-A'].values[0] / norm_to['GMean-mGL-A'].values[0]
        df = pd.DataFrame.from_dict({by: [name], 'mGL-A_gmean_FC': [result_mGL]})
        fc = pd.concat([fc, df])
    
    return fc

In [None]:
# Fold change No TS -> TS (with miR)
var_list=['FF3x2','FF4x2','FF5x2','FF6x2']
i=0
fcs_rel = np.zeros(shape=(4,4))
for var_val in var_list:
    var_norm = 'None'
    skip_list = []
    fc = calc_fc(quad2, 'miR', 'TS', var_norm, var_val, skip_list)
    fcs_rel[:,i]=fc['mGL-A_gmean_FC']
    i=i+1
    print('Fold change no TS -> '+var_val)
    display(fc)



In [None]:
def plot_heatmap(d,xlabels=None, ylabels=None, plot_title='', extra_name='',max=None,min=None,color=None,vals=False):
    plt.figure(figsize=(4,3))
    plt.rcParams.update({'font.size': 16})
    bar=True
    if vals==True:
        bar=False
    g = sns.heatmap(data=d,annot=vals,fmt='.2f',xticklabels=xlabels,yticklabels=ylabels,vmax=max,vmin=min,cmap=color,cbar=bar)
    g.set_title(plot_title,fontsize=18)
    g.set_xlabel('Target Sites',fontsize=18)
    g.set_ylabel('miRNA',fontsize=18)
    g.tick_params(axis='both', which='major', labelsize=16)

    # Save figure as .svg
    plt.savefig(str(output_path)+'/heatmap-'+extra_name+'.svg',bbox_inches='tight')
    plt.savefig(str(output_path)+'/heatmap-'+extra_name+'.tiff',bbox_inches='tight',dpi=700)

miRs = ['FF3','FF4','FF5','FF6']
sites = ['FF3','FF4','FF5','FF6']

plot_heatmap(fcs_rel,xlabels=sites,ylabels=miRs,extra_name='PGK-rel-miR-Activity_poster',plot_title='Relative mGL Expression',max=1.2,min=0,color=sns.color_palette('Greens',as_cmap=True))
plot_heatmap(fcs_rel,xlabels=sites,ylabels=miRs,extra_name='PGK-rel-miR-Activity-Annot_poster',plot_title='Relative mGL Expression',max=1.2,min=0,color=sns.color_palette('Greens',as_cmap=True),vals=True)