# T-test
example

In [67]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind
from collections import Counter

In [None]:
from clustergrammer_widget import *
net = Network(clustergrammer_widget)

In [2]:
np.random.seed(99)

# Create sample data.
a = np.random.randn(10000)
b = np.random.randn(10000) + 0.1

# Use scipy.stats.ttest_ind.
t, p = ttest_ind(a, b, equal_var=False)

print('ttest_ind:' + str(p))

ttest_ind:2.81708330972e-07


# Cell-Type Populations in Plasma and PMA Treated

## Load Data

In [3]:
# Plasma Treated
net.load_file('../cytof_data/Plasma_UCT.txt')
net.random_sample(axis='row', num_samples=110000, random_state=99)
df_plasma = net.export_df()

print(df_plasma.shape)

# PMA Treated
net.load_file('../cytof_data/PMA_UCT.txt')
net.random_sample(axis='row', num_samples=110000, random_state=99)
df_pma = net.export_df()

print(df_pma.shape)

(110000, 28)
(110000, 28)


## Randomly Sample Data and Calc Cell-Type Populations

In [122]:
np.random.seed(99)
num_samples = 2000
num_runs = 100

sampled_counts = {}
sampled_counts['plasma'] = {}
sampled_counts['pma'] = {}

for inst_treatment in ['plasma', 'pma']:
    
    for i in range(num_runs):

        # randomly subsample 
        random_state = int(np.random.random()*100)    
        net.load_df(df_plasma)
        net.random_sample(axis='row', num_samples=num_samples, random_state=random_state)
        inst_df = net.export_df()
        inst_rows = inst_df.index.tolist()

        # get cell-types from rows
        all_types = [i[2] for i in inst_rows]

        types_list = sorted(list(set(all_types)))

        # get the counts of all cell types in the subsampled data
        inst_counts = Counter(all_types)

        for inst_type in inst_counts.keys():

            # initialize counts
            if inst_type not in sampled_counts[inst_treatment]:
                sampled_counts[inst_treatment][inst_type] = []

            # append new count
            sampled_counts[inst_treatment][inst_type].append(inst_counts[inst_type])
            
    # change to numpy array 
    for inst_type in sampled_counts[inst_treatment]:
        sampled_counts[inst_treatment][inst_type] = np.asarray(sampled_counts[inst_treatment][inst_type])
    
    
    
print('done')

done


In [124]:
print(sampled_counts['pma']['B cells'].mean())
print(sampled_counts['pma']['B cells'].std())

124.3
9.45991543303


In [125]:
print(sampled_counts['plasma']['B cells'].mean())
print(sampled_counts['plasma']['B cells'].std())

124.31
11.3821746604


## Calc Stats and Significant Differences Between Plasma and PMA

In [126]:
results = {}

for inst_type in sampled_counts['plasma']:
    
    results[inst_type] = {}
    
    a = sampled_counts['plasma'][inst_type]
    b = sampled_counts['pma'][inst_type]

    a_mean = a.mean()
    b_mean = b.mean()
    
    a_std = a.std()
    b_std = b.std()    
    
    results[inst_type]['plasma_mean'] = a_mean
    results[inst_type]['pma_mean'] = b_mean   

    results[inst_type]['plasma_std'] = a_std
    results[inst_type]['pma_std'] = b_std      
    
    print(inst_type)
    print('Plasma: ' + str(a_mean) + ' PMA: ' + str(b_mean))

    t, p = ttest_ind(a, b, equal_var=False)
    
    results[inst_type]['ttest_t'] = t
    results[inst_type]['ttest_pval'] = t

    print('ttest_ind: ' + str(p) + '\n')

NK cells_CD56hi
Plasma: 21.7 PMA: 22.17
ttest_ind: 0.427762153249

CD4 Tcells
Plasma: 472.61 PMA: 473.67
ttest_ind: 0.696218828586

Undefined
Plasma: 78.24 PMA: 77.55
ttest_ind: 0.528964347678

B cells
Plasma: 124.31 PMA: 124.3
ttest_ind: 0.994642994464

CD8 Tcells
Plasma: 277.63 PMA: 280.31
ttest_ind: 0.271429925343

CD1c DCs
Plasma: 46.42 PMA: 47.06
ttest_ind: 0.468327298808

NK cells_CD16hi_CD57hi
Plasma: 119.27 PMA: 117.07
ttest_ind: 0.108796687434

CD4 Tcells_CD161hi
Plasma: 72.15 PMA: 72.14
ttest_ind: 0.993150834628

NK cells_CD16hi
Plasma: 156.52 PMA: 157.14
ttest_ind: 0.728304150694

CD14hi monocytes
Plasma: 170.72 PMA: 168.98
ttest_ind: 0.333920750767

CD4 Tcells+CD27hi
Plasma: 218.17 PMA: 218.19
ttest_ind: 0.991280263886

CD14low monocytes
Plasma: 10.89 PMA: 11.28
ttest_ind: 0.475338973344

CD4 Tcells_CD127hi
Plasma: 86.55 PMA: 86.95
ttest_ind: 0.779181874194

Basophils
Plasma: 24.13 PMA: 23.06
ttest_ind: 0.124360203148

Neutrophils
Plasma: 2.88172043011 PMA: 2.69411764706
tt

In [129]:
# results

In [131]:
# these are two functions that save and load data in json

# save dict to json
def save_to_json(inst_dict, filename, indent='no-indent'):
  import json

  # save as a json
  fw = open(filename, 'w')
  if indent == 'indent':
    fw.write( json.dumps(inst_dict, indent=2) )
  else:
    fw.write( json.dumps(inst_dict) )
  fw.close()


# load json to dict
def load_to_dict( filename ):
  import json
  # load
  f = open(filename,'r')
  inst_dict = json.load(f)
  f.close()
  return inst_dict


In [132]:
save_to_json(results, 'PMA_cell_type_results.json')