In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import glob
from math import log2

from helpers import mpl_plotting_helpers as mph
from helpers import stats_helpers as sh
from helpers import general_helpers as gh

In [9]:
# values here are just the sample IDs in my notes
conditions = {"DMSO unstim" : [1,2,3],
              "DMSO stim" : [4,5,6],
              "0.1 uM RDN unstim" : [7,8,9],
              "0.1 uM RDN stim" : [10,11,12],
              "1 uM RDN unstim" : [13,14,15],
              "1 uM RDN stim" : [16,17,18],
              "10 uM RDN unstim" : [19,20,21],
              "10 uM RDN stim" : [22,23,24],
              "0.1 uM Soq unstim" : [25,26,27],
              "0.1 uM Soq stim" : [28,29,30],
              "1 uM Soq unstim" : [31,32,33],
              "1 uM Soq stim" : [34,35,36],
              "10 uM Soq unstim" : [37,38,39],
              "10 uM Soq stim" : [40,41,42],}

files = [ f for f in glob.glob("./*.xls") if "aggregate" not in f]
print(files)
outfile_pdfs = ["figs" + f.split(".")[1] + "_all.pdf" for f in files]
outfile_stat = ["stats" + f.split(".")[1] + "_all_stat" for f in files]

['./ab_elisa.xls', './car_elisa.xls', './t2kb_elisa.xls']


In [5]:
# Most of the work is already done by Flowreada, so I just need
# to parse out the conditions from each file

def parse_conditions(a_flow_xlsx, 
                     condition_dict = conditions, # only need the keys here
                     flow_col = "GFP+ CD69+ %",
                     id_col = "Sample ID"):
    """
    Using the excel file, grab the two columns of interest and parse them
    into a new dictionary with the keys defined in the condition_dict
    returns: data_dict in the same shape as the condition_dict
    """
    # Read the Excel sheet as a dataframe and keep only the two cols of interest
    flow_df = pd.read_excel(a_flow_xlsx)[[id_col, flow_col]]
    # Turn the DF into a list of lists for easy manipulation
    flow_data = [list(row) for row in flow_df.to_numpy()]
    # Make the empty data dictionary
    data_dict = {key : [] for key, value in condition_dict.items()}
    # And populate it
    for row in flow_data:
        # Make the key from the IDs
        dd_key = gh.list_to_str(row[0].split(" ")[:-1],
                                delimiter = " ",
                                newline = False)
        data_dict[dd_key].append(row[1])
    return data_dict
    

In [12]:
# Since we're relying on Floreada for most of the backend, this is actually pretty easy meow
data = [parse_conditions(f, 
                         condition_dict = conditions,
                         flow_col = "IL2 pg/mL",
                         id_col = "Sample ID") for f in files]

ymaxs = [20, 1100, 200]

# now we can perform the stats and plot, which is mad easy
i=0
for d in data:
    # This is a dict, we need a list of (ID, [data])
    labelled_groups = [[key, value] for key, value in d.items()]
    # HolmSidak multiple comparison correction
    stats = sh.HolmSidak(*labelled_groups, override = True) # labels = True, alpha = 0.05, no_comp = []
    stats.write_output(outfile_stat[i])
    # Then we should just be able to plot. We're gonna do one big graph with
    # everything on it, which should be fine (in theory, we'll see)
    mph.dotplot(labelled_groups, 
                rotation = 90,
                colours = ["hotpink", "hotpink",
                           "lightskyblue", "lightskyblue",
                           "steelblue", "steelblue",
                           "blue", "blue",
                           "lavender", "lavender",
                           "mediumpurple", "mediumpurple",
                           "indigo", "indigo"],
                ylabel = "IL2 (pg/mL)",
                title = "Activation induced IL2 expression",
                filename = outfile_pdfs[i],
                ymin = 0,
                ymax = ymaxs[i],
                markersize = 30,
                figsize = (5,4),
                tick_fontsize = 10,
                label_fontsize = 12,
                title_fontsize = 12)
    i += 1

In [None]:
help(mph.dotplot)

In [None]:
data

In [None]:
outfile_stat