In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
DATA_PATH = '/content/drive/MyDrive/RoadSafety_Nov25/data/raw/US_Accidents_March23.csv'
import pandas as pd
df = pd.read_csv(DATA_PATH)

### Q6. Weekday vs Weekend Severity

- Does the average accident severity differ between weekdays and weekends in the US Accidents dataset?

In [4]:
import pandas as pd
from scipy import stats

# H0: The mean accident severity on weekdays equals the mean accident severity on weekends (μ_weekday = μ_weekend).
# H1: The mean accident severity on weekdays differs from the mean accident severity on weekends (μ_weekday ≠ μ_weekend).

df['Start_Time'] = pd.to_datetime(df['Start_Time'], errors='coerce')

df6 = df[['Severity', 'Start_Time']].dropna()
df6['is_weekend'] = df6['Start_Time'].dt.dayofweek >= 5  # Sat=5, Sun=6

weekday_sev = df6[~df6['is_weekend']]['Severity']
weekend_sev = df6[df6['is_weekend']]['Severity']

t_stat, p_val = stats.ttest_ind(weekday_sev, weekend_sev, equal_var=False)
alpha = 0.05
print(f"T-stat: {t_stat:.3f}, p-value: {p_val:.4g}")
print("Reject H0" if p_val < alpha else "Fail to reject H0")

T-stat: -103.655, p-value: 0
Reject H0


#### Q7. Urban Feature (Amenity) and Accident Occurrence

- Is the occurrence of accidents independent of the presence of nearby amenities (e.g., amenity flag Amenity = True/False) in the US Accidents dataset?

In [5]:
import numpy as np
from scipy.stats import chi2_contingency
import pandas as pd

# H0: Accident severity level (high vs low) is independent of amenity presence near the accident location.
# H1: Accident severity level (high vs low) is associated with amenity presence near the accident location.

df7 = df[['Severity', 'Amenity']].dropna().copy()
df7['high_sev'] = (df7['Severity'] >= 3).astype(int)

cont_table = pd.crosstab(df7['Amenity'], df7['high_sev'])
print(cont_table)

chi2, p_val, dof, expected = chi2_contingency(cont_table)
alpha = 0.05
print(f"Chi2: {chi2:.3f}, p-value: {p_val:.4g}")
print("Reject H0" if p_val < alpha else "Fail to reject H0")


high_sev        0        1
Amenity                   
False     6134076  1497984
True        90271     6063
Chi2: 10790.174, p-value: 0
Reject H0


#### Q8. Humidity and State Differences

- Does the mean humidity at the time of accidents differ between two states, for example, Florida (FL) and Arizona (AZ), in the US Accidents dataset?

In [6]:
from scipy import stats

# H0: The mean humidity for accidents in Florida equals the mean humidity for accidents in Arizona (μ_FL = μ_AZ).
# H1: The mean humidity for accidents in Florida differs from the mean humidity for accidents in Arizona (μ_FL ≠ μ_AZ).

df8 = df[['State', 'Humidity(%)']].dropna()

fl = df8[df8['State'] == 'FL']['Humidity(%)']
az = df8[df8['State'] == 'AZ']['Humidity(%)']

t_stat, p_val = stats.ttest_ind(fl, az, equal_var=False)
alpha = 0.05
print(f"T-stat: {t_stat:.3f}, p-value: {p_val:.4g}")
print("Reject H0" if p_val < alpha else "Fail to reject H0")


T-stat: 670.785, p-value: 0
Reject H0


#### Q9. Wind Speed Effect on Visibility

- Does higher wind speed during accidents correspond to different mean visibility compared to lower wind speed conditions in the US Accidents dataset?

In [9]:
from scipy import stats
import numpy as np

# H0: The mean visibility under high wind speed conditions equals the mean visibility under low wind speed conditions (μ_high_wind = μ_low_wind).
# H1: The mean visibility under high wind speed conditions differs from the mean visibility under low wind speed conditions (μ_high_wind ≠ μ_low_wind).

df10 = df[['Wind_Speed(mph)', 'Visibility(mi)']].dropna().copy()

threshold = df10['Wind_Speed(mph)'].median()
df10['high_wind'] = df10['Wind_Speed(mph)'] > threshold

vis_high = df10[df10['high_wind']]['Visibility(mi)']
vis_low  = df10[~df10['high_wind']]['Visibility(mi)']

t_stat, p_val = stats.ttest_ind(vis_high, vis_low, equal_var=False)
alpha = 0.05
print(f"T-stat: {t_stat:.3f}, p-value: {p_val:.4g}")
print("Reject H0" if p_val < alpha else "Fail to reject H0")


T-stat: 71.807, p-value: 0
Reject H0
