# T-test
example

In [67]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind
from collections import Counter

In [None]:
from clustergrammer_widget import *
net = Network(clustergrammer_widget)

In [2]:
np.random.seed(99)

# Create sample data.
a = np.random.randn(10000)
b = np.random.randn(10000) + 0.1

# Use scipy.stats.ttest_ind.
t, p = ttest_ind(a, b, equal_var=False)

print('ttest_ind:' + str(p))

ttest_ind:2.81708330972e-07


# Cell-Type Populations in Plasma and PMA Treated

## Load Data

In [3]:
# Plasma Treated
net.load_file('../cytof_data/Plasma_UCT.txt')
net.random_sample(axis='row', num_samples=110000, random_state=99)
df_plasma = net.export_df()

print(df_plasma.shape)

# PMA Treated
net.load_file('../cytof_data/PMA_UCT.txt')
net.random_sample(axis='row', num_samples=110000, random_state=99)
df_pma = net.export_df()

print(df_pma.shape)

(110000, 28)
(110000, 28)


## Randomly Sample Data and Calc Cell-Type Populations

In [133]:
np.random.seed(99)
num_samples = 2000
num_runs = 100

sampled_counts = {}
sampled_counts['plasma'] = {}
sampled_counts['pma'] = {}

for inst_treatment in ['plasma', 'pma']:
    
    for i in range(num_runs):

        # randomly subsample 
        random_state = int(np.random.random()*100)    
        net.load_df(df_plasma)
        net.random_sample(axis='row', num_samples=num_samples, random_state=random_state)
        inst_df = net.export_df()
        inst_rows = inst_df.index.tolist()

        # get cell-types from rows
        all_types = [i[2] for i in inst_rows]

        types_list = sorted(list(set(all_types)))

        # get the counts of all cell types in the subsampled data
        inst_counts = Counter(all_types)

        for inst_type in inst_counts.keys():

            # initialize counts
            if inst_type not in sampled_counts[inst_treatment]:
                sampled_counts[inst_treatment][inst_type] = []

            # append new count
            sampled_counts[inst_treatment][inst_type].append( (inst_counts[inst_type]/float(num_samples))*100 )
            
    # change to numpy array 
    for inst_type in sampled_counts[inst_treatment]:
        sampled_counts[inst_treatment][inst_type] = np.asarray(sampled_counts[inst_treatment][inst_type])
    
    
    
print('done')

done


In [134]:
print(sampled_counts['pma']['B cells'].mean())
print(sampled_counts['pma']['B cells'].std())

6.215
0.472995771651


In [135]:
print(sampled_counts['plasma']['B cells'].mean())
print(sampled_counts['plasma']['B cells'].std())

6.2155
0.56910873302


## Calc Stats and Significant Differences Between Plasma and PMA

In [146]:
results = {}

for inst_type in sampled_counts['plasma']:
    
    results[inst_type] = {}
    
    a = sampled_counts['plasma'][inst_type]
    b = sampled_counts['pma'][inst_type]

    a_mean = a.mean()
    b_mean = b.mean()
    
    a_std = a.std()
    b_std = b.std()    
    
    results[inst_type]['plasma_mean'] = a_mean
    results[inst_type]['pma_mean'] = b_mean   

    results[inst_type]['plasma_std'] = a_std
    results[inst_type]['pma_std'] = b_std      
    
    print(inst_type)
    print('Plasma: ' + str(a_mean) + ' PMA: ' + str(b_mean))

    t, p = ttest_ind(a, b, equal_var=False)
    
    results[inst_type]['ttest_t'] = t
    results[inst_type]['ttest_pval'] = p

    print('ttest_ind: ' + str(p) + '\n')

NK cells_CD56hi
Plasma: 1.085 PMA: 1.1085
ttest_ind: 0.427762153249

CD4 Tcells
Plasma: 23.6305 PMA: 23.6835
ttest_ind: 0.696218828586

Undefined
Plasma: 3.912 PMA: 3.8775
ttest_ind: 0.528964347678

B cells
Plasma: 6.2155 PMA: 6.215
ttest_ind: 0.994642994464

CD8 Tcells
Plasma: 13.8815 PMA: 14.0155
ttest_ind: 0.271429925343

CD1c DCs
Plasma: 2.321 PMA: 2.353
ttest_ind: 0.468327298808

NK cells_CD16hi_CD57hi
Plasma: 5.9635 PMA: 5.8535
ttest_ind: 0.108796687434

CD4 Tcells_CD161hi
Plasma: 3.6075 PMA: 3.607
ttest_ind: 0.993150834628

NK cells_CD16hi
Plasma: 7.826 PMA: 7.857
ttest_ind: 0.728304150694

CD14hi monocytes
Plasma: 8.536 PMA: 8.449
ttest_ind: 0.333920750767

CD4 Tcells+CD27hi
Plasma: 10.9085 PMA: 10.9095
ttest_ind: 0.991280263886

CD14low monocytes
Plasma: 0.5445 PMA: 0.564
ttest_ind: 0.475338973344

CD4 Tcells_CD127hi
Plasma: 4.3275 PMA: 4.3475
ttest_ind: 0.779181874194

Basophils
Plasma: 1.2065 PMA: 1.153
ttest_ind: 0.124360203148

Neutrophils
Plasma: 0.144086021505 PMA: 0.134

In [147]:
# results

In [148]:
# these are two functions that save and load data in json

# save dict to json
def save_to_json(inst_dict, filename, indent='no-indent'):
  import json

  # save as a json
  fw = open(filename, 'w')
  if indent == 'indent':
    fw.write( json.dumps(inst_dict, indent=2) )
  else:
    fw.write( json.dumps(inst_dict) )
  fw.close()


# load json to dict
def load_to_dict( filename ):
  import json
  # load
  f = open(filename,'r')
  inst_dict = json.load(f)
  f.close()
  return inst_dict


In [149]:
save_to_json(results, 'cell_type_D3_graph/PMA_cell_type_results.json')

In [150]:
results

{'B cells': {'plasma_mean': 6.2155000000000005,
  'plasma_std': 0.5691087330203255,
  'pma_mean': 6.2149999999999999,
  'pma_std': 0.47299577165129081,
  'ttest_pval': 0.9946429944637516,
  'ttest_t': 0.0067228282389649565},
 'Basophils': {'plasma_mean': 1.2064999999999999,
  'plasma_std': 0.24006821947104956,
  'pma_mean': 1.153,
  'pma_std': 0.24767115294276806,
  'ttest_pval': 0.12436020314846763,
  'ttest_t': 1.5432838236493891},
 'CD14hi monocytes': {'plasma_mean': 8.5359999999999996,
  'plasma_std': 0.60357601012631368,
  'pma_mean': 8.4490000000000016,
  'pma_std': 0.65905159130374613,
  'ttest_pval': 0.33392075076704331,
  'ttest_t': 0.96862971125326569},
 'CD14low monocytes': {'plasma_mean': 0.54449999999999998,
  'plasma_std': 0.18985454958994266,
  'pma_mean': 0.56400000000000017,
  'pma_std': 0.19378854455307723,
  'ttest_pval': 0.47533897334374109,
  'ttest_t': -0.71518268187146583},
 'CD1c DCs': {'plasma_mean': 2.3209999999999997,
  'plasma_std': 0.28505964288197655,
  'p