In [1]:
import numpy as np

# Step 1: Data – "Not Literate" percentages from Table 5
years = ["1983", "1987-88", "1993-94", "1999-2000", "2004-05", "2009-10"]

rural_males   = [52.9, 49.8, 44.4, 40.0, 34.2, 26.0]
rural_females = [87.0, 84.8, 79.9, 75.3, 67.8, 49.8]
urban_males   = [22.6, 20.5, 18.5, 16.1, 13.2, 10.4]
urban_females = [60.3, 54.5, 48.3, 43.7, 37.6, 23.6]

groups = [
    [rural_males[i], rural_females[i], urban_males[i], urban_females[i]]
    for i in range(len(years))
]


# Step 2: Rank all data together (for Kruskal–Wallis)
# Flatten all data
flat_data = [x for group in groups for x in group]
ranks = np.argsort(np.argsort(flat_data)) + 1  # rank from 1 upwards
print(ranks)

# Assign ranks back to each year’s group
ranked_groups = []
index = 0
for group in groups:
    ranked_groups.append(ranks[index:index + len(group)])
    index += len(group)


# Step 3: Compute H statistic manually
k = len(groups)               # number of groups (years)
N = len(flat_data)            # total observations
sum_Ri2_over_ni = sum((np.sum(g) ** 2) / len(g) for g in ranked_groups)
H = (12 / (N * (N + 1))) * sum_Ri2_over_ni - 3 * (N + 1)


# Step 4: Degrees of Freedom and Critical Value
df = k - 1
critical_value = 11.07   # Chi-square(0.05, df=5)

# Step 5: Output
print("Kruskal–Wallis Test (Manual Calculation)")
print("-" * 60)
for i, yr in enumerate(years):
    print(f"{yr}: Data = {groups[i]}, Ranks = {ranked_groups[i]}")
print("-" * 60)
print(f"H statistic = {H:.4f}")
print(f"Degrees of freedom = {df}")
print(f"Critical χ²(0.05, {df}) = {critical_value}")

if H > critical_value:
    print("\nConclusion: Reject H₀")
    print("→ There is a significant impact of time on non-literacy.")
else:
    print("\nConclusion: Fail to reject H₀")
    print("→ No significant impact of time on non-literacy at 5% level.")


[17 24  6 19 15 23  5 18 13 22  4 14 11 21  3 12  9 20  2 10  8 16  1  7]
Kruskal–Wallis Test (Manual Calculation)
------------------------------------------------------------
1983: Data = [52.9, 87.0, 22.6, 60.3], Ranks = [17 24  6 19]
1987-88: Data = [49.8, 84.8, 20.5, 54.5], Ranks = [15 23  5 18]
1993-94: Data = [44.4, 79.9, 18.5, 48.3], Ranks = [13 22  4 14]
1999-2000: Data = [40.0, 75.3, 16.1, 43.7], Ranks = [11 21  3 12]
2004-05: Data = [34.2, 67.8, 13.2, 37.6], Ranks = [ 9 20  2 10]
2009-10: Data = [26.0, 49.8, 10.4, 23.6], Ranks = [ 8 16  1  7]
------------------------------------------------------------
H statistic = 4.0000
Degrees of freedom = 5
Critical χ²(0.05, 5) = 11.07

Conclusion: Fail to reject H₀
→ No significant impact of time on non-literacy at 5% level.
