In [2]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_rel

# ----------------------------------------
# 1Ô∏è‚É£ Create a synthetic paired dataset
# ----------------------------------------

np.random.seed(42)
n = 300   # number of students

# Pre-test scores (mean ~60, sd ~10)
pre_scores = np.random.normal(60, 10, n)

# Post-test scores (mean ~65, sd ~10, improvement + random noise)
post_scores = pre_scores + np.random.normal(5, 5, n)

# Build DataFrame
paired_df = pd.DataFrame({
    "Student_ID": np.arange(1, n+1),
    "Pre_Test_Score": pre_scores.round(1),
    "Post_Test_Score": post_scores.round(1)
})

# Save file if needed
paired_df.to_csv("PairedTestDataset.csv", index=False)

print("Paired dataset created:")
print(paired_df.head())


# ----------------------------------------
# 2Ô∏è‚É£ Perform Paired T-test
# ----------------------------------------

t_stat, p_value = ttest_rel(paired_df["Pre_Test_Score"],
                            paired_df["Post_Test_Score"])

print("\nüîπ Paired T-test Results")
print(f"T-statistic: {t_stat}")
print(f"P-value: {p_value}")

# Interpretation
alpha = 0.05
if p_value < alpha:
    print("\n‚úÖ Reject the null hypothesis:")
    print("   There is a significant difference between Pre & Post test scores.")
else:
    print("\n‚ùå Fail to reject the null hypothesis:")
    print("   No significant difference between Pre & Post test scores.")


Paired dataset created:
   Student_ID  Pre_Test_Score  Post_Test_Score
0           1            65.0             65.8
1           2            58.6             60.8
2           3            66.5             75.2
3           4            75.2             83.3
4           5            57.7             62.6

üîπ Paired T-test Results
T-statistic: -17.629415106089585
P-value: 3.4390488679875365e-48

‚úÖ Reject the null hypothesis:
   There is a significant difference between Pre & Post test scores.
