In [13]:
import numpy as np
import pandas as pd

from scipy import stats

from statsmodels.stats.descriptivestats import sign_test
from statsmodels.stats.weightstats import zconfint

import matplotlib.pyplot as plt

%matplotlib inline


## Data: Verizon repair time

Verizon was an Incumbent Local Exchange Carrier (ILEC), responsible for maintaining land-line phone service in certain areas. Verizon also sold long-distance service, as did a number of competitors, termed Competitive Local Exchange Carriers (CLEC). When something went wrong, Verizon was responsible for repairs, and was supposed to make repairs as quickly for CLEC long-distance customers as for their own. The New York Public Utilities Commission (PUC) monitored fairness by comparing repair times for Verizon and different CLECs, for different classes of repairs and time periods.

The data is for one combination of CLEC, class of service, and period.

In [2]:
verizon_data = pd.read_csv('work/hse/applied_stats/week3/verizon.csv', sep='\t')
verizon_data.head()

Unnamed: 0,Time,Group
0,17.5,ILEC
1,2.4,ILEC
2,0.0,ILEC
3,0.65,ILEC
4,22.23,ILEC


In [3]:
ilec_time = verizon_data[verizon_data.Group == 'ILEC'].Time.values
clec_time = verizon_data[verizon_data.Group == 'CLEC'].Time.values

In [5]:
ilec_time.mean() - clec_time.mean()

-8.097519857859533

In [11]:
sign_test(clec_time, 8)

(3.5, 0.21003961563110352)

In [12]:
_, p = sign_test(clec_time, 8)
p/2

0.10501980781555176

In [30]:
stats.binomtest(clec_time[clec_time > 8].size, clec_time.size, 0.5, alternative='greater')

BinomTestResult(k=15, n=23, alternative='greater', proportion_estimate=0.6521739130434783, pvalue=0.10501980781555176)

In [17]:
ilec_time.mean(), clec_time.mean()

(8.411610576923076, 16.50913043478261)

In [21]:
stats.ttest_ind(ilec_time, clec_time, equal_var=False, alternative='greater')

Ttest_indResult(statistic=-1.9833785404229538, pvalue=0.9701269392926206)

In [31]:
stats.ttest_rel?

[0;31mSignature:[0m
[0mstats[0m[0;34m.[0m[0mttest_rel[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0ma[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mb[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0maxis[0m[0;34m=[0m[0;36m0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnan_policy[0m[0;34m=[0m[0;34m'propagate'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0malternative[0m[0;34m=[0m[0;34m'two-sided'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Calculate the t-test on TWO RELATED samples of scores, a and b.

This is a test for the null hypothesis that two related or
repeated samples have identical average (expected) values.

Parameters
----------
a, b : array_like
    The arrays must have the same shape.
axis : int or None, optional
    Axis along which to compute test. If None, compute over the whole
    arrays, `a`, and `b`.
nan_policy : {'propagate', 'raise', 'omit'}, optional
    Defines how to handle when input conta

In [29]:
stats.wilcoxon(ilec_time - clec_time.mean(), alternative='greater')

WilcoxonResult(statistic=165073.0, pvalue=1.0)

In [28]:
stats.wilcoxon?

[0;31mSignature:[0m
[0mstats[0m[0;34m.[0m[0mwilcoxon[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mx[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0my[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mzero_method[0m[0;34m=[0m[0;34m'wilcox'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcorrection[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0malternative[0m[0;34m=[0m[0;34m'two-sided'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmode[0m[0;34m=[0m[0;34m'auto'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0maxis[0m[0;34m=[0m[0;36m0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnan_policy[0m[0;34m=[0m[0;34m'propagate'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Calculate the Wilcoxon signed-rank test.

The Wilcoxon signed-rank test tests the null hypothesis that two
related paired samples come from the same distri