In [22]:
import itertools as itt
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
import os
import pandas as pd
import pickle
import scipy.optimize as spo
import scipy.special as spsp
import scipy.stats as sps
import seaborn as sns

from matplotlib.ticker import FixedLocator

mpl.rcParams['axes.titlesize'] = 'xx-large'
mpl.rcParams['axes.labelsize'] = 'xx-large'
mpl.rcParams['xtick.labelsize'] = 'x-large'
mpl.rcParams['ytick.labelsize'] = 'x-large'
mpl.rcParams['xtick.direction'] = 'out'
mpl.rcParams['ytick.direction'] = 'out'
mpl.rcParams['legend.frameon'] = True
mpl.rcParams['legend.framealpha'] = 0.5
mpl.rcParams['legend.fontsize'] = 'large'

path_proj = os.getcwd()+ '/'
path_sc = path_proj+'csv/Single_cell/'
path_sort = path_proj+'csv/Sort/'
print(path_proj)

#LOAD THE PROCESSED DATA
df = pd.read_csv(path_sc+'Pooled_data.csv', sep=';', decimal=',')

#GLOBAL VARIABLES FOR ITERATION
or_cells = ['SLAM-HSC', 'ST-HSC', 'MPP']
conds = ['P', 'P+ILs']
times = ['24h', '48h']

#SET A RANDOM SEED TO REPLICATE THE P-VALUES
#FROM STATISTICAL TESTS BASED ON RANDOM PERMUTATIONS
np.random.seed(123)

/Users/quanti/Desktop/M_HSC/


In [25]:
#MAP THE VECTORS OF counts OF classes TO A DIFFERENT SUPPORT (USUALLY CONTAINING classes)
def spread_to_support(classes, counts, support):
    counts_sum = counts.sum()
    if counts_sum == 0:
        return np.zeros(len(support))
    else:
        return np.hstack([counts[classes==cl] if any(classes==cl) else np.zeros(1) for cl in support])

#CALCULATE THE G-TEST STATISTIC ON data WITH labels,
#ON THE CONTINGENCY TABLE OF support (DATA SUPPORT) X all_perm_labels (LABELS SUPPORT)
def chi_squared_stat(data, labels, support, all_perm_labels, bool_stack=True):
    if bool_stack:
        table = np.array([
            spread_to_support(*np.unique(np.hstack(data[labels==k]), return_counts=True),
                              support=support)
            for k in all_perm_labels
        ])
    else:
        table = np.array([
            spread_to_support(*np.unique(data[labels==k], return_counts=True),
                              support=support)
            for k in all_perm_labels
        ])
    return sps.chi2_contingency(table, correction=False, lambda_='log-likelihood')[0]

####PERMUTATION TESTING
#INPUT
#    stat_func: the statistic to be calculated on data and permuted data
#    args: fixed input variables for stat_func
#    data: array of data to be permuted 
#    perm_label: array of labels, with same lenght as data, used to permute data
#    n_iter: number of permuted data to be drawn
#    boold_p_dist: if True, also return the stat_func calculated on the permuted data.
#        Useful to plot the statistic' distribution. 
#OUTPUT: left, right and two-sided pvalues;
#        if boold_p_dist=True, also return the permuted versions of the statistics

def permutation_testing(stat_func, args, data, perm_label, n_iter=250000, boold_p_dist=False):
    stat = stat_func(data, perm_label, *args)
    perm_stat = np.array([
        stat_func(data, perm_label[np.random.permutation(len(perm_label))], *args)
        for k in np.arange(n_iter)])
    
    left_pval = ((perm_stat <= stat).sum()+1.)/(n_iter+1.)
    right_pval = ((perm_stat >= stat).sum()+1.)/(n_iter+1.)
    two_sided_pval = min(2 * min(left_pval, right_pval), 1)
    if boold_p_dist:
        return {'left':left_pval, 'right':right_pval, 'two-sided':two_sided_pval}, stat, perm_stat
    else:
        return {'left':left_pval, 'right':right_pval, 'two-sided':two_sided_pval}

In [5]:
#FIG 2B
pvals_dct = {}
for oc in or_cells:
    print(oc)
    df_temp = df[(df.Original_cell==oc)]

    data = np.array([df_temp[(df_temp.Family==fam)].Class.values
                     for fam in np.unique(df_temp.Family)])
    perm_label = np.array([df_temp[(df_temp.Family==fam)].Culture_condition.iloc[0]
                           for fam in np.unique(df_temp.Family)])
    
    table_support = np.unique(np.hstack(data))
    all_perm_labels = np.unique(perm_label)
    bool_stack = True
    args = (table_support, all_perm_labels, bool_stack,)
    
    pvals_dct[oc] = permutation_testing(chi_squared_stat, args, data, perm_label)
print(pvals_dct)
with open('./pickled_data/Fig2B_pvals', 'wb') as fp:
    pickle.dump(pvals_dct, fp)

SLAM-HSC
ST-HSC
MPP
{'SLAM-HSC': {'left': 0.842688629245483, 'right': 0.15731537073851704, 'two-sided': 0.31463074147703407}, 'ST-HSC': {'left': 0.999996000016, 'right': 7.999968000127999e-06, 'two-sided': 1.5999936000255998e-05}, 'MPP': {'left': 1.0, 'right': 3.9999840000639995e-06, 'two-sided': 7.999968000127999e-06}}


In [6]:
###FIG 2C
pvals_dct = {}
for oc in or_cells:
    print(oc)
    df_temp = df[(df.Original_cell==oc)]

    data = np.array([df_temp[(df_temp.Family==fam)].Generation.max()
                     for fam in np.unique(df_temp.Family)])
    perm_label = np.array([df_temp[(df_temp.Family==fam)].Culture_condition.iloc[0]
                           for fam in np.unique(df_temp.Family)])
    
    table_support = np.unique(data)
    all_perm_labels = np.unique(perm_label)
    bool_stack = False
    args = (table_support, all_perm_labels, bool_stack,)
    
    pvals_dct[oc] = permutation_testing(chi_squared_stat, args, data, perm_label)
print(pvals_dct)
        
with open('./pickled_data/Fig2C_pvals', 'wb') as fp:
    pickle.dump(pvals_dct, fp)

SLAM-HSC
ST-HSC
MPP
{'SLAM-HSC': {'left': 0.9653961384154464, 'right': 0.03461186155255379, 'two-sided': 0.06922372310510758}, 'ST-HSC': {'left': 0.9923960304158783, 'right': 0.007607969568121728, 'two-sided': 0.015215939136243455}, 'MPP': {'left': 0.9810240759036963, 'right': 0.018983924064303742, 'two-sided': 0.037967848128607484}}


In [7]:
with open('./pickled_data/Fig2C_pvals', 'rb') as fp:
    pvals_dct = pickle.load(fp)
pvals_dct

{'MPP': {'left': 0.9810240759036963,
  'right': 0.018983924064303742,
  'two-sided': 0.037967848128607484},
 'SLAM-HSC': {'left': 0.9653961384154464,
  'right': 0.03461186155255379,
  'two-sided': 0.06922372310510758},
 'ST-HSC': {'left': 0.9923960304158783,
  'right': 0.007607969568121728,
  'two-sided': 0.015215939136243455}}

In [8]:
###FIG 2D
pvals_dct = {}
for oc in or_cells:
    print(oc)
    df_temp = df[(df.Original_cell==oc)&(df.Generation==0)]

    data = np.array([df_temp[(df_temp.Family==fam)].Class.iloc[0]
                     for fam in np.unique(df_temp.Family)])
    perm_label = np.array([df_temp[(df_temp.Family==fam)].Culture_condition.iloc[0]
                           for fam in np.unique(df_temp.Family)])
    
    table_support = np.unique(data)
    all_perm_labels = np.unique(perm_label)
    bool_stack = False
    args = (table_support, all_perm_labels, bool_stack,)
    
    pvals_dct[oc] = permutation_testing(chi_squared_stat, args, data, perm_label)
print(pvals_dct)
        
with open('./pickled_data/Fig2D_pvals', 'wb') as fp:
    pickle.dump(pvals_dct, fp)

SLAM-HSC
ST-HSC
MPP
{'SLAM-HSC': {'left': 0.646501413994344, 'right': 0.35420258318966724, 'two-sided': 0.7084051663793345}, 'ST-HSC': {'left': 0.8933844264622941, 'right': 0.10661957352170591, 'two-sided': 0.21323914704341182}, 'MPP': {'left': 0.9981200075199699, 'right': 0.0018839924640301439, 'two-sided': 0.0037679849280602877}}


In [9]:
#FIG 2E
sym_labs = ['SYM UNDIF', 'SYM DIF', 'ASYM UNDIF', 'ASYM DIF']

def first_div_class(df):#sis_type, prog_type):
    daugher1, daugher2 = df.Class.values
    progenitor = df.Original_cell.values[0]
    if daugher1 == daugher2:
        if daugher1 == progenitor:
            return sym_labs[0]
        else:
            return sym_labs[1]
    else:
        if (daugher1 == progenitor) or (daugher2 == progenitor):
            return sym_labs[2]
        else:
            return sym_labs[3]
    
pvals_dct = {}
for oc in or_cells:
    print(oc)
    df_temp = df[(df.Original_cell==oc)&(df.Generation==1)]

    data = np.array([first_div_class(df_temp[df_temp.Family==fam])
                     for fam in np.unique(df_temp.Family) if len(df_temp[df_temp.Family==fam])==2])
    perm_label = np.array([df_temp[(df_temp.Family==fam)].Culture_condition.iloc[0]
                           for fam in np.unique(df_temp.Family) if len(df_temp[df_temp.Family==fam])==2])
    
    table_support = np.unique(data)
    all_perm_labels = np.unique(perm_label)
    bool_stack = False
    args = (table_support, all_perm_labels, bool_stack,)
    
    pvals_dct[oc] = permutation_testing(chi_squared_stat, args, data, perm_label)
print(pvals_dct)
        
with open('./pickled_data/Fig2E_pvals', 'wb') as fp:
    pickle.dump(pvals_dct, fp)

SLAM-HSC
ST-HSC
MPP
{'SLAM-HSC': {'left': 0.23293906824372704, 'right': 0.7787488850044599, 'two-sided': 0.4658781364874541}, 'ST-HSC': {'left': 0.6389534441862232, 'right': 0.38085447658209365, 'two-sided': 0.7617089531641873}, 'MPP': {'left': 0.8721325114699541, 'right': 0.1326434694261223, 'two-sided': 0.2652869388522446}}


In [26]:
#SUPPLEMENTARY FIG 2B
pvals_dct = {}
for oc in or_cells:
    print(oc)
    df_temp = df[(df.Original_cell==oc)&(df.Generation==1)]

    data = np.array([df_temp[(df_temp.Family==fam)].Class.values
                     for fam in np.unique(df_temp.Family)])
    perm_label = np.array([df_temp[(df_temp.Family==fam)].Culture_condition.iloc[0]
                           for fam in np.unique(df_temp.Family)])
    
    table_support = np.unique(np.hstack(data))
    all_perm_labels = np.unique(perm_label)
    bool_stack = True
    args = (table_support, all_perm_labels, bool_stack,)
    
    pvals_dct[oc] = permutation_testing(chi_squared_stat, args, data, perm_label)
print(pvals_dct)
with open('./pickled_data/SuppFig2B_pvals', 'wb') as fp:
    pickle.dump(pvals_dct, fp)

SLAM-HSC
ST-HSC
MPP
{'SLAM-HSC': {'left': 0.885312458750165, 'right': 0.11469154123383507, 'two-sided': 0.22938308246767014}, 'ST-HSC': {'left': 0.9996360014559942, 'right': 0.00036799852800588795, 'two-sided': 0.0007359970560117759}, 'MPP': {'left': 0.9999880000479998, 'right': 1.5999936000255998e-05, 'two-sided': 3.1999872000511996e-05}}


In [27]:
#SANITY CHECK: TEST FOR WELL AND EXPERIMENT EFFECT TO FAMILIAL CELL CLASS DISTIRBUTION
#(IN FIXED CONDITION, TIME AND PROGENITOR CLASS)
#FAMILIES ARE PERMUTED ACROSS WELLS AND EXPERIMENTS AFTER THE WELL_EXPERIMENT LABEL
#THE STATISTIC USED IS THE G-TEST STATISTIC ON THE CONTINGENCY TABLE OF CELL COUNTS
#SORTED BY CELL CLASS VS WELL_EXPERIMENT LABEL

pvals_dct = {}
for oc in or_cells:
    pvals_dct[oc] = {}
    for cnd in conds:
        pvals_dct[oc][cnd] = {}
        for t in times:
            print(oc, cnd, t)
            df_temp = df[(df.Original_cell==oc)&(df.Culture_condition==cnd)&(df.Culture_time==t)]

            data = np.array([df_temp[(df_temp.Family==fam)].Class.values
                             for fam in np.unique(df_temp.Family)])
            perm_label = np.array([df_temp[(df_temp.Family==fam)].Well_experiment.iloc[0]
                                   for fam in np.unique(df_temp.Family)])

            table_support = np.unique(np.hstack(data))
            all_perm_labels = np.unique(perm_label)
            bool_stack = True
            args = (table_support, all_perm_labels, bool_stack,)

            pvals_dct[oc][cnd][t] = permutation_testing(chi_squared_stat, args, data, perm_label)
with open('./pickled_data/WellExperiment_test_pvals', 'wb') as fp:
    pickle.dump(pvals_dct, fp)

SLAM-HSC P 24h
SLAM-HSC P 48h
SLAM-HSC P+ILs 24h
SLAM-HSC P+ILs 48h
ST-HSC P 24h
ST-HSC P 48h
ST-HSC P+ILs 24h
ST-HSC P+ILs 48h
MPP P 24h
MPP P 48h
MPP P+ILs 24h
MPP P+ILs 48h


In [28]:
with open('./pickled_data/WellExperiment_test_pvals', 'rb') as fp:
    pvals_dct = pickle.load(fp)
for oc in or_cells:
    for cnd in conds:
        for t in times:
            print(oc, cnd, t, '\t', pvals_dct[oc][cnd][t]['right'])

SLAM-HSC P 24h 	 0.008683965264138943
SLAM-HSC P 48h 	 0.5123499506001976
SLAM-HSC P+ILs 24h 	 0.2304630781476874
SLAM-HSC P+ILs 48h 	 0.6260374958500166
ST-HSC P 24h 	 0.2593549625801497
ST-HSC P 48h 	 0.0967436130255479
ST-HSC P+ILs 24h 	 0.6066975732097072
ST-HSC P+ILs 48h 	 0.7582449670201319
MPP P 24h 	 0.24542301830792676
MPP P 48h 	 0.6022295910816357
MPP P+ILs 24h 	 0.37274650901396394
MPP P+ILs 48h 	 0.3910064359742561
