## Import Libraries

In [None]:
from random import randrange
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind
from scipy.stats import ttest_1samp
from scipy.stats import chi2_contingency
from scipy.stats import f_oneway
from statsmodels.stats.power import TTestIndPower

## One-way One Sample t-Test (team height example from the slides in topic 4 part 2)

In [None]:
## generate some heights for 18 players in our team
## Notice that your results can be different as we're generating random numbers
team_heights = []
for i in range(18):
    team_heights.append(randrange(180,205))

print(team_heights)
print(f"Average player height in our team = {np.mean(team_heights):.5f}")
print(f"STD of player height in our team = {np.std(team_heights):.5f}")

In [None]:
## popmean is the mean player height in the league as was explained in the lecture
tscore, pvalue = ttest_1samp(team_heights, popmean=201)
print("t Statistic: ", tscore)  
print(f'p-value for single sided test: {pvalue / 2:.5f}')

## One-way Two Sample t-Test (Sleep apnoea events example from the slides in topic 4 part 2)

In [None]:
## generate some numbers for people who followed the diet
## Notice that your results can be different as we're generating random numbers
diet = []
for i in range(13):
    diet.append(randrange(3,9))
print(diet)

In [None]:
## generate some numbers for people who did not follow the diet
## Notice that your results can be different as we're generating random numbers
nodiet = []
for i in range(14):
    nodiet.append(randrange(4,10))
print(nodiet)

In [None]:
## Run the t-test and extract the p-value
## We divide the resulting p-value by 2 beause this is a single-sided t-test
## Please read the documentation of the ttest_ind function
res = ttest_ind(diet, nodiet, equal_var=False)
print(f'p-value for single sided test: {res.pvalue / 2:.5f}')

## One-way ANOVA for multiple groups
https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.f_oneway.html

Data from: http://www.biostathandbook.com/onewayanova.html

In [None]:
tillamook = [0.0571, 0.0813, 0.0831, 0.0976, 0.0817, 0.0859, 0.0735,
             0.0659, 0.0923, 0.0836]
newport = [0.0873, 0.0662, 0.0672, 0.0819, 0.0749, 0.0649, 0.0835,
           0.0725]
petersburg = [0.0974, 0.1352, 0.0817, 0.1016, 0.0968, 0.1064, 0.105]
magadan = [0.1033, 0.0915, 0.0781, 0.0685, 0.0677, 0.0697, 0.0764,
           0.0689]
tvarminne = [0.0703, 0.1026, 0.0956, 0.0973, 0.1039, 0.1045]
res = f_oneway(tillamook, newport, petersburg, magadan, tvarminne)
print(f'p-value for one-sided ANOVA : {res.pvalue :.5f}')

## Chi-square test

In [None]:
a1 = [8, 6, 6, 4]
a2 = [6, 6, 6, 6]
a3 = [4, 5, 5, 7]
a4 = [5, 6, 6, 7]
a5 = [7, 7, 5, 4]
a6 = [5, 5, 7, 7]
dice = np.array([a1, a2, a3, a4, a5, a6])
df = pd.DataFrame(dice, columns=['Exp1','Exp2','Exp3','Exp4'])
df.index = ['One','Two','Three','Four','Five','Six']
#df.sum(axis=1)

In [None]:
df

In [None]:
chi2_stat, p_val, dof, ct = chi2_contingency(dice)
print("Chi2 Stat = ", chi2_stat)

print("Degrees of Freedom =", dof)

print("p-value =", p_val)

print("Contingency Table:")
print(ct)

## Power Analysis

* Effect Size: The minimum size of the effect that you hope to be able to detect in a statistical test, such as “a 20% improvement in click rates”.
* Statistical Power: The probability of detecting a given effect size with a given sample size (i.e. The probability of accepting the alternative hypothesis if it is true).
* Sample Size: The number of observations in the sample.
* Significance level: The statistical significance level at which the test will be conducted (e.g. alpha. which is often set to 5% or 0.05).


In [None]:
# estimate required sample size using power analysis
# parameters for power analysis
effect = 0.88
power = 0.85
alpha = 0.05

# perform power analysis
pow_analysis = TTestIndPower()
result = pow_analysis.solve_power(effect, power=power, alpha=alpha)

print(f'Sample Size: {result:.2f}')