In [2]:
import numpy as np
import pandas as pd
from __future__ import division
from scipy import stats
from statsmodels.stats.descriptivestats import sign_test
from statsmodels.stats.weightstats import zconfint

In [57]:
def get_bootstrap_samples(data, n_samples):
    indices = np.random.randint(0, len(data), (n_samples, len(data)))
    samples = data[indices]
    return samples
def stat_intervals(stat, alpha):
    boundaries = np.percentile(stat, [100 * alpha / 2., 100 * (1 - alpha / 2.)])
    return boundaries

def permutation_t_stat_ind(sample1, sample2):
    return np.mean(sample1) - np.mean(sample2)

def get_random_combinations(n1, n2, max_combinations):
    index = range(n1 + n2)
    indices = set([tuple(index)])
    for i in range(max_combinations - 1):
        np.random.shuffle(index)
        indices.add(tuple(index))
    return [(index[:n1], index[n1:]) for index in indices]

def permutation_test(sample, mean, max_permutations = None, alternative = 'two-sided'):
    if alternative not in ('two-sided', 'less', 'greater'):
        raise ValueError("alternative not recognized\n"
                         "should be 'two-sided', 'less' or 'greater'")
    
    t_stat = permutation_t_stat_ind(sample, mean)
    
    zero_distr = permutation_zero_dist_ind(sample, mean, max_permutations)
    
    if alternative == 'two-sided':
        return sum([1. if abs(x) >= abs(t_stat) else 0. for x in zero_distr]) / len(zero_distr)
    
    if alternative == 'less':
        return sum([1. if x <= t_stat else 0. for x in zero_distr]) / len(zero_distr)

    if alternative == 'greater':
        return sum([1. if x >= t_stat else 0. for x in zero_distr]) / len(zero_distr)

In [66]:
data = np.array([49,58,75,110,112,132,151,276,281,362])
med = 200

In [6]:
stats.wilcoxon(np.array(data) - med)

WilcoxonResult(statistic=17.0, pvalue=0.2845026979112075)

In [7]:
data_2_neg = [22,22,15,13,19,19,18,20,21,13,13,15]
data_2_pos = [17,18,18,15,12,4,14,15,10]

In [8]:
stats.mannwhitneyu(data_2_pos,data_2_neg,alternative="less")

MannwhitneyuResult(statistic=27.0, pvalue=0.02900499272087373)

In [9]:
challenger = pd.read_csv('challenger.txt', sep="\t")
challenger.shape

(23, 3)

In [45]:
np.random.seed(0)
days_damage = challenger[challenger.Incident==1].Temperature
days_no_damage = challenger[challenger.Incident==0].Temperature

data_damage = map(np.mean, get_bootstrap_samples(np.array(days_damage), 1000))
data_no_damage = map(np.mean, get_bootstrap_samples(np.array(days_no_damage), 1000))

delta = map(lambda x: x[1]-x[0], zip(data_damage, data_no_damage))

map(lambda x: round(x,4), stat_intervals(delta, 0.05))

[1.4504, 8.0646]

In [63]:
np.random.seed(0)
print "p-value: %f" % permutation_test(days_damage, days_no_damage, 
                               max_permutations = 10000, alternative="two-sided")

p-value: 0.005700


p-value: 0.000000
