### Z-Test

In [16]:
import math
import numpy as np
from numpy.random import randn
from statsmodels.stats.weightstats import ztest

mean_iq = 110
sd_iq = 15/math.sqrt(50)
alpha = 0.05
mu0 = 100
data = sd_iq*randn(50)+mean_iq

print("mean = %.2f standard deviation = %.2f" % (np.mean(data), np.std(data)))

mean = 109.92 standard deviation = 2.24


In [17]:
ztest_score, pvalue = ztest(data,value = mu0, alternative= "larger")
if(pvalue < alpha):
    print("Reject Null Hypothesis")
else:
    print("Fail to Reject Null Hypothesis")

Reject Null Hypothesis


### One sample t-test

In [None]:
import numpy as np
x = [1.83,1.83,1.73,1.82, 1.83, 1.73, 1.99, 1.85, 1.68, 1.87]

x_bar = np.mean(x) # sample mean
mu0 = 1.75 # Hypothesized value
s = np.std(x, ddof=1) #sample standard deviation
n = len(x) # sample size

tcal = (x_bar - mu0)/(s/np.sqrt(n))
print(tcal)

In [None]:
import scipy.stats as stats
import matplotlib.pyplot as plt

tvalues = np.linspace(-10,10,100)
plt.plot(tvalues, stats.t.pdf(tvalues, n-1), 'b-', label = "T(n-1)")
upper_tval_tvalues = tvalues[tvalues > tcal]
plt.fill_between(upper_tval_tvalues, 0, stats.t.pdf(upper_tval_tvalues,n-1),alpha=0.8, label="p-value")
plt.legend()

### Testing pairwise associations

##### Pearson correlation test: test association between two quantitative variables

In [None]:
import numpy as np
import scipy.stats as stats
n = 50
x = np.random.normal(size=n)
y = 2+ x + np.random.normal(size=n)

# Compute with scipy
cor, pval = stats.pearsonr(x,y)

##### Two sample (Student) 𝑡-test: compare two means

In [None]:
import scipy.stats as stats

height = np.array([ 1.83, 1.83, 1.73, 1.82, 1.83, 1.73, 1.99, 1.85, 1.68, 1.87,
1.66, 1.71, 1.73, 1.64, 1.70, 1.60, 1.79, 1.73, 1.62, 1.77])
grp = np.array(["M"] * 10 + ["F"] * 10)

# Compute with scipy
print(stats.ttest_ind(height[grp == "M"], height[grp == "F"], equal_var = True))

##### Chi-square, 𝜒2 (categorial ~ categorial)

In [None]:
import numpy as np
import pandas as pd
import scipy.stats as stats

# 15 samples: 10 first with canalar tumor,last without
canalar_tumor = np.array([1] * 10 + [0] * 5)
# 8 first with metastasis, 6 without, the last with.
meta = np.array([1] * 8 + [0] * 6 + [1])

crosstab = pd.crosstab(canalar_tumor, meta, rownames = ["cabalar_tumor"], colnames = ["meta"])
print("Observed table:")
print("---------------")
print(crosstab)

chi2, pval, dof, expected = stats.chi2_contingency(crosstab)
print("Statisties:")
print("-----------")
print("Chi2 = %f, pval = %f" % (chi2, pval))
print("Expected talbe:")
print("---------------")
print(expected)

In [None]:
# Compute expected cross-table based on proportion
meta_marg = crosstab.sum(axis=0)
meta_freq = meta_marg / meta_marg.sum()

canalar_tumor_marg = crosstab.sum(axis=1)
canalar_tumor_freq = canalar_tumor_marg/canalar_tumor_marg.sum()

print("Canalar tumor frequency? Yes: %.2f" % canalar_tumor_freq[0], "No: %.2f" % canalar_tumor_freq[1])
print("Metastasis frequency? Yes: %.2f" % meta_freq[0], "No: %.2f" % meta_freq[1])

print("Expected frequencies:")
print(np.outer(canalar_tumor_freq, meta_freq))

print("Expected cross-table (frequencies * N): ")
print(np.outer(canalar_tumor_freq, meta_freq) * len(canalar_tumor))