# Hypothesis testing

In [2]:
import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency, ttest_ind, f_oneway



In [4]:
df = pd.read_csv('../data/MachineLearningRating_v3.txt', delimiter='|')

  df = pd.read_csv('../data/MachineLearningRating_v3.txt', delimiter='|')


In [5]:
df['HasClaim'] = df['TotalClaims'] > 0
df['Margin'] = df['TotalPremium'] - df['TotalClaims']


🔹 H₀₁: Provinces have equal risk (Claim Frequency)

In [6]:
province_claim_freq = df.groupby('Province')['HasClaim'].mean()
print(province_claim_freq)

# ANOVA test
groups = [group['HasClaim'].values for name, group in df.groupby('Province')]
f_stat, p_val = f_oneway(*groups)

print(f"ANOVA F-stat: {f_stat:.3f}, p-value: {p_val:.5f}")


Province
Eastern Cape     0.001648
Free State       0.001358
Gauteng          0.003356
KwaZulu-Natal    0.002845
Limpopo          0.002698
Mpumalanga       0.002428
North West       0.002436
Northern Cape    0.001254
Western Cape     0.002166
Name: HasClaim, dtype: float64
ANOVA F-stat: 13.025, p-value: 0.00000


🔹 H₀₂: Zipcodes have equal claim severity

In [7]:
# Only those who made a claim
claimed = df[df['HasClaim'] == True]

groups = [group['TotalClaims'].values for name, group in claimed.groupby('PostalCode') if len(group) > 10]
f_stat, p_val = f_oneway(*groups)

print(f"ANOVA F-stat (ZipCode Severity): {f_stat:.3f}, p-value: {p_val:.5f}")


ANOVA F-stat (ZipCode Severity): 2.562, p-value: 0.00000


H₀₃: Zipcodes have equal profit margin

In [8]:
groups = [group['Margin'].values for name, group in df.groupby('PostalCode') if len(group) > 10]
f_stat, p_val = f_oneway(*groups)

print(f"ANOVA F-stat (Margin by Zip): {f_stat:.3f}, p-value: {p_val:.5f}")


ANOVA F-stat (Margin by Zip): 0.910, p-value: 0.97031


 H₀₄: No difference in claim frequency between genders (Chi-squared)

In [9]:
contingency = pd.crosstab(df['Gender'], df['HasClaim'])
chi2, p_val, dof, expected = chi2_contingency(contingency)

print("Chi-squared test:")
print(f"Chi2 = {chi2:.3f}, p-value = {p_val:.5f}")


Chi-squared test:
Chi2 = 7.256, p-value = 0.02657
