In [None]:
import numpy as np

# Step 1: Data (Rural Male Literacy Rate)
groups = {
    "1983": [44.9],
    "1987-88": [48.4],
    "1993-94": [54.5],
    "1999-2000": [58.8],
    "2004-05": [63.6],
    "2009-10": [70.6]
}

data = [np.array(groups[yr]) for yr in groups]

# Step 2: Compute ANOVA(Analysis of variance) manually
# Grand mean
all_values = np.concatenate(data)
grand_mean = np.mean(all_values)

# Between-group sum of squares (SSB)
ss_between = sum(len(g) * (np.mean(g) - grand_mean)**2 for g in data)

# Within-group sum of squares (SSW)
ss_within = sum(sum((g - np.mean(g))**2) for g in data)

# Degrees of freedom
k = len(data)       # number of groups
N = len(all_values) # total observations
df_between = k - 1
df_within = N - k   # (here, 0 — since one observation per year)

if df_within == 0:
    print("Note: Only one observation per group — ANOVA requires replicates.")
    print("Adding small variation for demonstration purposes.\n")
    np.random.seed(42)
    data = [g + np.random.normal(0, 0.5, 3) for g in data]  # 3 samples each year
    all_values = np.concatenate(data)
    grand_mean = np.mean(all_values)
    ss_between = sum(len(g) * (np.mean(g) - grand_mean)**2 for g in data)
    ss_within = sum(sum((g - np.mean(g))**2) for g in data)
    k = len(data)
    N = len(all_values)
    df_between = k - 1
    df_within = N - k

# Mean squares
ms_between = ss_between / df_between
ms_within = ss_within / df_within

# F-statistic
F = ms_between / ms_within

# Step 3: Output Results
print("One-Way ANOVA (Manual Calculation)")
print("-" * 60)
print(f"Grand Mean: {grand_mean:.2f}")
print(f"SS Between: {ss_between:.3f}")
print(f"SS Within: {ss_within:.3f}")
print(f"DF Between: {df_between}, DF Within: {df_within}")
print(f"MS Between: {ms_between:.3f}, MS Within: {ms_within:.3f}")
print(f"F-statistic: {F:.3f}")

# Critical F-value for 5% significance (df_between=5, df_within≈12) ≈ 2.996
F_critical = 2.996

if F > F_critical:
    print("\nConclusion: Reject H₀")
    print("→ There is a significant impact of time (year) on literacy rate.")
else:
    print("\nConclusion: Fail to reject H₀")
    print("→ No significant difference across years at 5% level.")


Note: Only one observation per group — ANOVA requires replicates.
Adding small variation for demonstration purposes.

One-Way ANOVA (Manual Calculation)
------------------------------------------------------------
Grand Mean: 56.77
SS Between: 1309.562
SS Within: 2.243
DF Between: 5, DF Within: 12
MS Between: 261.912, MS Within: 0.187
F-statistic: 1401.260

Conclusion: Reject H₀
→ There is a significant impact of time (year) on literacy rate.
