#### Libraries

In [1]:
import numpy as np
import pandas as pd
from scipy import stats

# 1-sample z-test for mean

Sources:<br/>
&nbsp;&nbsp;&nbsp;&nbsp;<a href="https://www.youtube.com/watch?v=2GU_R7G5m-8">zedstatistics - Youtube</a><br/>
&nbsp;&nbsp;&nbsp;&nbsp;<a href="https://www.youtube.com/watch?v=zJ8e_wAWUzE">The Organic Chemistry Tutor - Youtube</a>

$$
z=\frac{\bar{x}-\mu}{\frac{\sigma}{\sqrt{n}}}
$$

$$
\begin{array}{l}
z - \text{z-statistic} \\
\bar{x} - \text{samples mean} \\
\mu - \text{population mean} \\
\sigma - \text{population standard deviation} \\
n - \text{number of samples}
\end{array}
$$

In [57]:
data = np.random.normal(loc=3.6, size=25)
populationMean = 4
populationStd = 2

In [22]:
z = (data.mean() - populationMean) / (populationStd / np.sqrt(len(data)))

pValue = stats.norm.sf(abs(z))

print("one-tailed: {:.1f}%".format(pValue*100))
print("two-tailed: {:.1f}%".format(pValue*2*100))

one-tailed: 7.1%
two-tailed: 14.3%


# 1-sample t-test for mean

Sources:<br/>
&nbsp;&nbsp;&nbsp;&nbsp;<a href="https://www.youtube.com/watch?v=Z8E3hi8INZk">zedstatistics - Youtube</a><br/>
&nbsp;&nbsp;&nbsp;&nbsp;<a href="https://www.youtube.com/watch?v=zJ8e_wAWUzE">The Organic Chemistry Tutor - Youtube</a>

$$
t=\frac{\bar{x}-\mu}{\frac{s}{\sqrt{n}}} \hspace{2em} df=n-1
$$

$$
\begin{array}{l}
t - \text{t-statistic} \\
\bar{x} - \text{samples mean} \\
\mu - \text{population mean} \\
s - \text{samples standard deviation} \\
n - \text{number of samples} \\
df - \text{degrees of freedom}
\end{array}
$$

In [None]:
data = np.random.normal(loc=3.6, size=25)
populationMean = 4

In [38]:
result = stats.ttest_1samp(data, populationMean)

print("one-tailed: {:.1f}%".format(result.pvalue*100))
print("two-tailed: {:.1f}%".format(result.pvalue*2*100))

one-tailed: 11.0%
two-tailed: 22.0%


# 1-sample z-test for proportions

Sources:<br/>
&nbsp;&nbsp;&nbsp;&nbsp;<a href="https://www.youtube.com/watch?v=bh_5Yr3YjO0">zedstatistics - Youtube</a><br/>
&nbsp;&nbsp;&nbsp;&nbsp;<a href="https://www.youtube.com/watch?v=76VruarGn2Q">The Organic Chemistry Tutor - Youtube</a>

$$
z=\frac{\hat{p}-p}{\sqrt{\frac{p(1-p)}{n}}}
$$

$$
\begin{array}{l}
z - \text{z-statistic} \\
\hat{p} - \text{proportion for samples} \\
p - \text{proportion for population} \\
n - \text{number of samples}
\end{array}
$$

In [61]:
from statsmodels.stats.proportion import proportions_ztest

In [157]:
data = np.random.normal(scale=1, size=100)
nSuccesses = (data < 1).sum()
nSamples = len(data)
popProportion = 0.9 # normally 0.84

In [158]:
result = proportions_ztest(count=nSuccesses, nobs=nSamples, value=popProportion)

print("p-value: {:.1f}%".format(result[1]*100))

p-value: 3.7%


# 2-sample independent test for mean

Sources:<br/>
&nbsp;&nbsp;&nbsp;&nbsp;<a href="https://www.youtube.com/watch?v=UcZwyzwWU7o">The Organic Chemistry Tutor - Youtube</a>

$$
t=\frac{\overline{x}_A-\overline{x}_B}{\sqrt{\frac{s_A^2}{n_A}+\frac{s_B^2}{n_B}}} \hspace{2em} df=n_A+n_B-2
$$

$$
\begin{array}{l}
t - \text{t-statistic} \\
\overline{x}_A - \text{mean of A group samples} \\
\overline{x}_B - \text{mean of B group samples} \\
s_A - \text{standard deviation of A group samples} \\
s_B - \text{standard deviation of B group samples} \\
n_A - \text{number of sample in A group} \\
n_B - \text{number of sample in B group} \\
df - \text{degrees of freedom}
\end{array}
$$

In [150]:
group1Data = np.random.normal(loc=3.8, size=25)
group2Data = np.random.normal(loc=4.2, size=30)

In [152]:
result = stats.ttest_ind(group1Data, group2Data)

print("p-value: {:.1f}%".format(result.pvalue*100))

p-value: 7.2%


# 2-sample independent test for proportions

Sources:<br/>
&nbsp;&nbsp;&nbsp;&nbsp;<a href="https://www.youtube.com/watch?v=pCbNUnZ98oE">The Organic Chemistry Tutor - Youtube</a>

$$
z=\frac{\hat{p}_1-\hat{p}_2}{\sqrt{\hat{p}(1-\hat{p})(\frac{1}{n_1}+\frac{1}{n_2})}} \hspace{2em} \hat{p}=\frac{x_1+x_2}{n_1+n_2}
$$

$$
\begin{array}{l}
z - \text{z-statistic} \\
x_1 - \text{number of sample in first group that meet the condition} \\
x_2 - \text{number of sample in second group that meet the condition} \\
n_1 - \text{number of sample in first group} \\
n_2 - \text{number of sample in second group} \\
\hat{p}_1 - \text{proportion for first group} \\
\hat{p}_2 - \text{proportion for second group} \\
\hat{p} - \text{pooled proportion}
\end{array}
$$

In [145]:
group1Data = np.random.normal(loc=0, size=25)
nSuccessesGroup1 = (group1Data < 1).sum()
nSamplesGroup1 = len(group1Data)

group2Data = np.random.normal(loc=0.5, size=30)
nSuccessesGroup2 = (group2Data < 1).sum()
nSamplesGroup2 = len(group2Data)

In [147]:
result = proportions_ztest(count=[nSuccessesGroup1, nSuccessesGroup2], nobs=[nSamplesGroup1, nSamplesGroup2])

print("p-value: {:.1f}%".format(result[1]*100))

p-value: 1.1%


# chi-squared test

Sources:<br/>
&nbsp;&nbsp;&nbsp;&nbsp;<a href="https://www.youtube.com/watch?v=NTHA9Qa81R8">zedstatistics - Youtube</a><br/>
&nbsp;&nbsp;&nbsp;&nbsp;<a href="https://www.youtube.com/watch?v=HKDqlYSLt68">The Organic Chemistry Tutor - Youtube</a><br/>
&nbsp;&nbsp;&nbsp;&nbsp;<a href="https://www.geeksforgeeks.org/python-pearsons-chi-square-test/">Geeks for Geeks</a>

$$
\chi^2=\sum_{i=1}^k\frac{(o_i-e_i)^2}{e_i} \hspace{2em} df=(n_{cols}-1)(n_{rows}-1)
$$

$$
\begin{array}{l}
\chi^2 - \text{suared chi statistic} \\
o_i - \text{observed value on }i\text{-th position} \\
e_i - \text{expected value on }i\text{-th position} \\
df - \text{degrees of freedom}
\end{array}
$$

In [160]:
from scipy.stats import chi2_contingency

In [159]:
data = pd.DataFrame({'men':[207, 282, 241], 'women':[234, 242, 232]}, index=['dog', 'cat', 'bird'])
data

Unnamed: 0,men,women
dog,207,234
cat,282,242
bird,241,232


In [163]:
result = chi2_contingency(data)

print("p-value: {:.1f}%".format(result.pvalue*100))

p-value: 10.3%


# oneway ANOVA test

Sources:<br/>
&nbsp;&nbsp;&nbsp;&nbsp;<a href="https://www.youtube.com/watch?v=9cnSWads6oo">zedstatistics</a><br/>
&nbsp;&nbsp;&nbsp;&nbsp;<a href="https://chat.openai.com/share/d6d40a73-3081-4483-8d92-2573c7d8630c">Chat GPT</a>

$$
SSB=\sum_{i=1}^kn_i(\bar{X}_i-\bar{X})^2
$$
$$
SSW=\sum_{i=1}^k\sum_{j=1}^{n_i}(X_{ij}-\bar{X}_i)^2
$$
$$
MSB=\frac{SSB}{df_B} \hspace{2em} df_B=k-1
$$
$$
MSW=\frac{SSW}{df_W} \hspace{2em} df_W=N-k
$$
$$
F = \frac{MSB}{MSW}
$$
$$
\begin{array}{rl}
\begin{array}{l}
SSB - \text{sum of squares between groups} \\
SSW - \text{sum of squares within groups} \\
df_B - \text{degrees of freedom between groups} \\
df_W - \text{degrees of freedom within groups} \\
MSB - \text{mean square between groups} \\
MSW - \text{mean square within groups} \\
F - \text{f-statistic}
\end{array} &
\begin{array}{l}
n_i - \text{number of sample in group} i \\
\bar{X}_i - \text{mean in group} i \\
\bar{X} - \text{mean for all groups} \\
X_{ij} - j\text{-th observation in group} i \\
k - \text{number of groups} \\
N - \text{total number of all observations.}
\end{array}
\end{array}
$$

In [174]:
data = pd.DataFrame({
    'group1': [1, 2, 3],
    'group2': [2, 3, 4],
    'group3': [3, 4, 5],
    'group4': [4, 5, 6]
})

In [175]:
result = stats.f_oneway(*data.to_numpy().T.tolist())

print("p-value: {:.1f}%".format(result.pvalue*100))

p-value: 3.1%
