In [None]:
'''
paired test on scale data

Event - workshop, training, magic


Person      Before          After
A           12              15
B           14              23
C           16              16

H0 - null hypothesis -    no effect of event
Ha - alternate hypothesis -  there is effect of event
'''

In [None]:
import numpy as np
import pandas
import matplotlib.pyplot as plt

# Set random seed for reproducibility
np.random.seed(0)

# Generate paired scale values for weight before and after intervention
num_individuals = 50
weight_before = np.random.normal(loc=70, scale=10, size=num_individuals)  # Mean weight before intervention: 70 kg, Standard deviation: 10 kg
weight_after = weight_before + np.random.normal(loc=-2, scale=3, size=num_individuals)  # Assuming an average weight loss of 2 kg with some variability

# Display the generated data
print("Weight Before Intervention:")
print(weight_before)
print("\nWeight After Intervention:")
print(weight_after)


In [None]:
data = pandas.DataFrame({
    'before':weight_before,
    'after':weight_after,
})
data

In [None]:
data.describe()

In [None]:
fig, ax = plt.subplots(2,1,figsize=(8,8))
ax[0].hist(data['before'], bins=10, color='blue', edgecolor='black')
ax[1].hist(data['after'], bins=10, color='red', edgecolor='black')
plt.tight_layout()
plt.show()

In [None]:
from scipy.stats import ttest_rel

ttest_rel(data['before'], data['after'])

In [None]:
statistic=5.551190822774982
pvalue=1.140956501688348e-06
'''
null hypothesis is rejected
event is having effect
'''

# Cohen's d (for paired samples t-test)
*By P. Stikker*<br>


## Introduction

After a paired samples t-test we might also like to know the effect size. There are many options and different authors suggest different ones. Cohen's is probably the most frequently used one, but there are several variations on Cohen's d. I'll use the one that is based on the differences (sometimes denoted as d<sub>z</sub>), but with a correction proposed by Hedges for small sample sizes.

Then for the interpretation, Cohen (1988, p. 40) has some rules of thumb for the small, medium and large, and Sawilowsky (2009, p. 599) expanded these with very small, very large and huge:

|\|Cohen d\|| Interpretation|
|-------|---------------|
|0 < .01| Negligible|
|0.01 < .20| Very small|
|0.20 < 0.50 |Small|
|0.50 < 0.80| Medium|
|0.80 < 1.20| Large|
|1.20 < 2.00| Very large|
|2.00 or more| Huge|

These are however for Cohen's d, not d<sub>z</sub>. Cohen (1988, p. 48) notes that:
\begin{equation*}
d = d_z\times\sqrt{2}
\end{equation*}

So we will need to multiply our found d<sub>z</sub> with $\sqrt{2}$ to use the table.

Lets, find out how to do this with Python

In [None]:
!pip install researchpy

In [None]:
from researchpy import ttest as rpttest

In [None]:
rpttest(data['before'], data['after'], equal_variances=True, paired=True)

In [None]:
#Cohen's d =    0.1803
dz = 0.1803 * (2 ** 0.5)
dz

In [None]:
cohenDnd = abs(dz)

if cohenDnd < .01:
    print('Negligible')
elif cohenDnd < .20:
    print('Very small')
elif cohenDnd < .50:
    print('Small')
elif cohenDnd < .80:
    print('Medium')
elif cohenDnd < 1.20:
    print('Large')
elif cohenDnd < 2.00:
    print('Very large')
else:
    print('Huge')

#Homework

In [None]:
import pandas as pd

# Given data
data = {
    'Student': range(1, 21),
    'Before_mark': [18, 21, 16, 22, 19, 24, 17, 21, 23, 18, 14, 16, 16, 19, 18, 20, 12, 22, 15, 17],
    'After_mark': [22, 25, 17, 24, 16, 29, 20, 23, 19, 20, 15, 15, 18, 26, 18, 24, 18, 25, 19, 16],

}

# Create DataFrame
df = pd.DataFrame(data)

print(df)
