In [3]:
import pandas as pd
from scipy.stats import chi2_contingency, ttest_ind, mannwhitneyu, pearsonr, spearmanr

# =====================================
# Sample dataset embedded in code
# =====================================
data = {
    "time_of_day": [
        "Morning","Morning","Afternoon","Afternoon","Evening","Evening","Night","Night",
        "Morning","Morning","Afternoon","Afternoon","Evening","Evening","Night","Night",
        "Morning","Morning","Afternoon","Afternoon","Evening","Evening","Night","Night"
    ],
    "weather_condition": [
        "Rain","Fog","Rain","Fog","Rain","Fog","Rain","Fog",
        "Rain","Fog","Rain","Fog","Rain","Fog","Rain","Fog",
        "Rain","Fog","Rain","Fog","Rain","Fog","Rain","Fog"
    ],
    "severity": [
        3,2,4,3,5,4,2,3,
        4,3,5,2,3,2,4,2,
        3,4,2,3,5,3,4,2
    ],
    "visibility": [
        220,120,250,140,180,90,300,100,
        210,130,270,150,200,110,280,80,
        230,100,240,160,190,95,260,85
    ]
}

df = pd.DataFrame(data)

# =====================================
# Q1: Time of Day vs Number of Accidents
# =====================================
# H0: Accident frequency is the same across all times of day
# H1: Accident frequency differs depending on the time of day
# Assumptions: Observations are independent, expected frequency > 5 for each category
time_counts = df['time_of_day'].value_counts().to_frame().T
chi2, p, dof, expected = chi2_contingency(time_counts)

print("\n--- Q1: Time of Day vs Accidents ---")
print(f"Chi-square statistic = {chi2:.3f}, p-value = {p:.5f}, dof = {dof}")
print("Expected counts:\n", expected)

# =====================================
# Q2: Severity of Accidents (Rain vs Fog)
# =====================================
# H0: No difference in severity between Rain and Fog
# H1: Severity differs between Rain and Fog
# Assumptions: Random samples, severity is continuous/ordinal,
# for T-test assume roughly normal distribution; Mann-Whitney does not require normality
rain = df[df['weather_condition'] == "Rain"]['severity']
fog = df[df['weather_condition'] == "Fog"]['severity']

# Welch’s t-test
t_stat, t_p = ttest_ind(rain, fog, equal_var=False, nan_policy='omit')
# Mann–Whitney U test
u_stat, u_p = mannwhitneyu(rain, fog, alternative="two-sided")

print("\n--- Q2: Severity in Rain vs Fog ---")
print(f"T-test: statistic = {t_stat:.3f}, p-value = {t_p:.5f}")
print(f"Mann–Whitney U: statistic = {u_stat:.3f}, p-value = {u_p:.5f}")

# =====================================
# Q3: Visibility vs Accident Severity
# =====================================
# H0: Visibility has no correlation with severity
# H1: Visibility and severity are correlated
# Assumptions: Variables are continuous, paired observations; linearity for Pearson
subset = df[['visibility', 'severity']].dropna()

pearson_r, pearson_p = pearsonr(subset['visibility'], subset['severity'])
spearman_r, spearman_p = spearmanr(subset['visibility'], subset['severity'])

print("\n--- Q3: Visibility vs Severity ---")
print(f"Pearson correlation = {pearson_r:.3f}, p-value = {pearson_p:.5f}")
print(f"Spearman correlation = {spearman_r:.3f}, p-value = {spearman_p:.5f}")


--- Q1: Time of Day vs Accidents ---
Chi-square statistic = 0.000, p-value = 1.00000, dof = 0
Expected counts:
 [[6. 6. 6. 6.]]

--- Q2: Severity in Rain vs Fog ---
T-test: statistic = 2.421, p-value = 0.02523
Mann–Whitney U: statistic = 107.500, p-value = 0.03536

--- Q3: Visibility vs Severity ---
Pearson correlation = 0.311, p-value = 0.13923
Spearman correlation = 0.311, p-value = 0.13954
