# T-test
example

In [67]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind
from collections import Counter

In [None]:
from clustergrammer_widget import *
net = Network(clustergrammer_widget)

In [2]:
np.random.seed(99)

# Create sample data.
a = np.random.randn(10000)
b = np.random.randn(10000) + 0.1

# Use scipy.stats.ttest_ind.
t, p = ttest_ind(a, b, equal_var=False)

print('ttest_ind:' + str(p))

ttest_ind:2.81708330972e-07


# Cell-Type Populations in Plasma and PMA Treated

## Load Data

In [3]:
# Plasma Treated
net.load_file('../cytof_data/Plasma_UCT.txt')
net.random_sample(axis='row', num_samples=110000, random_state=99)
df_plasma = net.export_df()

print(df_plasma.shape)

# PMA Treated
net.load_file('../cytof_data/PMA_UCT.txt')
net.random_sample(axis='row', num_samples=110000, random_state=99)
df_pma = net.export_df()

print(df_pma.shape)

(110000, 28)
(110000, 28)


## Randomly Sample Data and Calc Cell-Type Populations

In [109]:
np.random.seed(99)
num_samples = 2000
num_runs = 5

sampled_counts = {}
sampled_counts['plasma'] = {}
sampled_counts['pma'] = {}

for inst_treatment in ['plasma', 'pma']:
    
    for i in range(num_runs):

        # randomly subsample 
        random_state = int(np.random.random()*100)    
        net.load_df(df_plasma)
        net.random_sample(axis='row', num_samples=num_samples, random_state=random_state)
        inst_df = net.export_df()
        inst_rows = inst_df.index.tolist()

        # get cell-types from rows
        all_types = [i[2] for i in inst_rows]

        types_list = sorted(list(set(all_types)))

        # get the counts of all cell types in the subsampled data
        inst_counts = Counter(all_types)

        for inst_type in inst_counts.keys():

            # initialize counts
            if inst_type not in sampled_counts[inst_treatment]:
                sampled_counts[inst_treatment][inst_type] = []

            # append new count
            sampled_counts[inst_treatment][inst_type].append(inst_counts[inst_type])
            
    # change to numpy array 
    for inst_type in sampled_counts[inst_treatment]:
        sampled_counts[inst_treatment][inst_type] = np.asarray(sampled_counts[inst_treatment][inst_type])
    
    
    


In [110]:
print(sampled_counts['pma']['B cells'].mean())
print(sampled_counts['pma']['B cells'].std())

118.6
11.9599331102


In [111]:
print(sampled_counts['plasma']['B cells'].mean())
print(sampled_counts['plasma']['B cells'].std())

126.6
9.32952303175


## Calc Stats and Significant Differences Between Plasma and PMA

In [119]:
results = {}

for inst_type in sampled_counts['plasma']:
    
    results[inst_type] = {}
    
    a = sampled_counts['plasma'][inst_type]
    b = sampled_counts['pma'][inst_type]

    a_mean = a.mean()
    b_mean = b.mean()
    
    a_std = a.std()
    b_std = b.std()    
    
    results[inst_type]['plasma_mean'] = a_mean
    results[inst_type]['pma_mean'] = b_mean   

    results[inst_type]['plasma_std'] = a_std
    results[inst_type]['pma_std'] = b_std      
    
    print(inst_type)
    print('Plasma: ' + str(a_mean) + ' PMA: ' + str(b_mean))

    t, p = ttest_ind(a, b, equal_var=False)
    
    results[inst_type]['ttest_t'] = t
    results[inst_type]['ttest_pval'] = t

    print('ttest_ind: ' + str(p) + '\n')

NK cells_CD56hi
Plasma: 22.8 PMA: 18.8
ttest_ind: 0.21888902798

CD4 Tcells
Plasma: 468.2 PMA: 455.2
ttest_ind: 0.397334309517

Undefined
Plasma: 75.4 PMA: 75.8
ttest_ind: 0.940799333083

B cells
Plasma: 126.6 PMA: 118.6
ttest_ind: 0.324076202332

CD8 Tcells
Plasma: 281.8 PMA: 291.0
ttest_ind: 0.332279668434

CD1c DCs
Plasma: 45.8 PMA: 41.4
ttest_ind: 0.211929166973

NK cells_CD16hi_CD57hi
Plasma: 118.6 PMA: 119.0
ttest_ind: 0.951993687058

CD4 Tcells_CD161hi
Plasma: 72.4 PMA: 75.6
ttest_ind: 0.488680775934

NK cells_CD16hi
Plasma: 159.0 PMA: 170.6
ttest_ind: 0.177854425695

CD14hi monocytes
Plasma: 168.0 PMA: 172.0
ttest_ind: 0.683538563096

CD4 Tcells+CD27hi
Plasma: 221.2 PMA: 220.4
ttest_ind: 0.944297701953

CD14low monocytes
Plasma: 11.2 PMA: 11.8
ttest_ind: 0.849221485021

CD4 Tcells_CD127hi
Plasma: 83.8 PMA: 82.0
ttest_ind: 0.787429810791

Basophils
Plasma: 25.8 PMA: 25.8
ttest_ind: 1.0

Neutrophils
Plasma: 3.0 PMA: 3.0
ttest_ind: 1.0

CD4 Tcells_Tregs
Plasma: 116.4 PMA: 119.0
tt

In [120]:
results

{'B cells': {'plasma_mean': 126.59999999999999,
  'plasma_std': 9.3295230317524798,
  'pma_mean': 118.59999999999999,
  'pma_std': 11.959933110180843,
  'ttest_pval': 1.0548253254997797,
  'ttest_t': 1.0548253254997797},
 'Basophils': {'plasma_mean': 25.800000000000001,
  'plasma_std': 4.9558046773455473,
  'pma_mean': 25.800000000000001,
  'pma_std': 4.4899888641287298,
  'ttest_pval': 0.0,
  'ttest_t': 0.0},
 'CD14hi monocytes': {'plasma_mean': 168.0,
  'plasma_std': 16.780941570722426,
  'pma_mean': 172.0,
  'pma_std': 8.1486195149853451,
  'ttest_pval': -0.42884501393511792,
  'ttest_t': -0.42884501393511792},
 'CD14low monocytes': {'plasma_mean': 11.199999999999999,
  'plasma_std': 2.9933259094191529,
  'pma_mean': 11.800000000000001,
  'pma_std': 5.2687759489277965,
  'ttest_pval': -0.19802950859533533,
  'ttest_t': -0.19802950859533533},
 'CD1c DCs': {'plasma_mean': 45.799999999999997,
  'plasma_std': 5.2687759489277965,
  'pma_mean': 41.399999999999999,
  'pma_std': 3.666060555