In [17]:
import pandas as pd
import numpy as np
from scipy import stats

# Load the SOL data
data = pd.read_excel('State_Test_by_level_2023_2024.xlsx')

# subjects to analyze
subjects = ['English: Reading', 'Mathematics']
grades = ['Gr 3', 'Gr 4', 'Gr 5', 'Gr 6', 'Gr 7', 'Gr 8']  # Adjust based on your analysis

# Functions to analyze
def analyze_data(subject):
    # Filter data
    filtered_data = data[
        (data['Subject'] == subject) & 
        (data['Grade'].isin(grades))
    ]

    # Extract pass rates for each year
    yearly_pass_rates = filtered_data[['2021-2022 Pass Rate', '2022-2023 Pass Rate', '2023-2024 Pass Rate']]

    # Calculate mean pass rates
    mean_pass_rates = yearly_pass_rates.mean(axis=1)

    # Calculate mean and standard deviation
    mean = mean_pass_rates.mean()
    std_dev = mean_pass_rates.std()

    # Hypothesis test: H0: mean = overall_mean; H1: mean â‰  overall_mean
    overall_mean = data[
        (data['Subject'] == subject) & 
        (data['Grade'].isin(grades))
    ][['2021-2022 Pass Rate', '2022-2023 Pass Rate', '2023-2024 Pass Rate']].mean(axis=1).mean()

    t_stat, p_value = stats.ttest_1samp(mean_pass_rates, overall_mean)

    return {
        'Mean': mean,
        'Standard Deviation': std_dev,
        'T-statistic': t_stat,
        'P-value': p_value,
        'Yearly Pass Rates': yearly_pass_rates.mean().to_dict()  # Convert to dictionary for easy access
    }

# Analyze data for each subject
results_summary = {}
for subject in subjects:
    results_summary[subject] = analyze_data(subject)

# Print results
for subject, metrics in results_summary.items():
    print(f"Results for {subject}:")
    print(f"  Mean: {metrics['Mean']:.2f}")
    print(f"  Standard Deviation: {metrics['Standard Deviation']:.2f}")
    print(f"  T-statistic: {metrics['T-statistic']:.2f}")
    print(f"  P-value: {metrics['P-value']:.4f}")
    print(f"  Yearly Pass Rates: {metrics['Yearly Pass Rates']}")
    if metrics['P-value'] < 0.05:
        print("  Conclusion: Reject the null hypothesis.")
    else:
        print("  Conclusion: Fail to reject the null hypothesis.")
    print("\n")


Results for English: Reading:
  Mean: 70.83
  Standard Deviation: 1.99
  T-statistic: 0.00
  P-value: 1.0000
  Yearly Pass Rates: {'2021-2022 Pass Rate': 71.0, '2022-2023 Pass Rate': 70.33333333333333, '2023-2024 Pass Rate': 71.16666666666667}
  Conclusion: Fail to reject the null hypothesis.


Results for Mathematics:
  Mean: 63.78
  Standard Deviation: 4.76
  T-statistic: 0.00
  P-value: 1.0000
  Yearly Pass Rates: {'2021-2022 Pass Rate': 61.0, '2022-2023 Pass Rate': 64.33333333333333, '2023-2024 Pass Rate': 66.0}
  Conclusion: Fail to reject the null hypothesis.


