# Statistical Tests - Topic 02: Parametric Statistical Tests

## Topic Objectives
Use and interpret statistical tests using T-test, Paired T-test and ANOVA

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
import pingouin as pg
import scipy

## T-test: Comparing two independent groups

In [None]:
np.random.seed(123)
size = 250
df = pd.DataFrame(data={'Col1': np.random.normal(loc=7, scale=1, size=size),
                        "Col2": np.random.normal(loc=8, scale=1.2, size=size)})
df.head()

In [None]:
pg.normality(data=df, alpha=0.05)

In [None]:
fig, axes = plt.subplots(nrows=1 ,ncols=2 ,figsize=(12,5))
sns.histplot(data=df, kde=True, ax=axes[0])
for col in df.columns: 
    axes[0].axvline(df[col].mean(), color='r', linestyle='dashed', linewidth=1)
sns.boxplot(data=df, ax=axes[1])
plt.show()

In [None]:
ttest_result = pg.ttest(x=df['Col1'], y=df['Col2'])
print(ttest_result)
ttest_result.loc['T-test','p-val']

## Another T-test example

In [None]:
np.random.seed(3)
size = 250
df = pd.DataFrame(data={'Col3':  np.random.normal(loc=7, scale=1, size=size),
                        "Col4":np.random.normal(loc=7.2, scale=1, size=size)})
pg.normality(df, alpha=0.05)

In [None]:
fig, axes = plt.subplots(nrows=1 ,ncols=2 ,figsize=(12,5))
sns.histplot(data=df, kde=True, ax=axes[0])
for col in df.columns: 
    axes[0].axvline(df[col].mean(), color='r', linestyle='dashed', linewidth=1)
sns.boxplot(data=df, ax=axes[1])
plt.show()

In [None]:
pg.ttest(df['Col3'],df['Col4']).loc['T-test','p-val']

## Paired T-test: Comparing same group across two time points

In [None]:
df = (pg.read_dataset('mixed_anova')
    .query("Group == 'Meditation' and Time != 'January'")
    .drop(['Group'], axis=1)
    .reset_index(drop=True))
df['Month'] = df['Time'].replace({"August":8, "June":6})
df.sort_values(by='Month', ascending=True, inplace=True)
pg.normality(data=df, dv='Scores', group='Month', alpha=0.05)

In [None]:
pg.pairwise_ttests(data=df, dv='Scores', within='Month', subject='Subject', effsize='cohen').loc[0,'p-unc']

In [None]:
pg.plot_paired(data=df, dv='Scores', within='Month', subject='Subject', dpi=150)
plt.show()

## Paired T-test: Comparing group across three months

In [None]:
df = (pg.read_dataset('mixed_anova')
    .query("Group == 'Meditation'")
    .drop(['Group'], axis=1))
df['Month'] = df['Time'].replace({"January":1, "June":6, "August":8})
df.sort_values(by='Month', ascending=True, inplace=True)
pg.normality(data=df, dv='Scores', group='Month', alpha=0.05)

In [None]:
pg.pairwise_ttests(data=df, dv='Scores', within='Month', subject='Subject', effsize='cohen')

In [None]:
pg.plot_paired(data=df, dv='Scores', within='Month', subject='Subject', dpi=150)
plt.show()

## ANOVA: Comparing multiple independent groups

In [None]:
df = pg.read_dataset('anova')
pg.normality(df, dv='Pain threshold',group='Hair color', alpha=0.05)

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(14,5))
sns.boxplot(data=df,x="Hair color", y="Pain threshold", ax=axes[0])
sns.swarmplot(data=df,x="Hair color", y="Pain threshold", dodge=True, ax=axes[1])
plt.show()

In [None]:
pg.anova(data=df, dv='Pain threshold', between='Hair color', detailed=True)