In [1]:
import pandas as pd
import numpy as np
import scipy.stats as stats

In [2]:
df = pd.read_csv("./data/accident_records_by_day.csv")

### The impact of the lockdown on number of traffic accidents

+ H0: total number of accidents in times without lockdowns = total number of accidents in times with lockdowns<br>
+ Ha: total number of accidents in times without lockdowns ≠ total number of accidents in times with lockdowns<br>

In [3]:
df.lockdown.value_counts()

0    2319
1     238
Name: lockdown, dtype: int64

In [4]:
df_no_lock = df[df.lockdown == 0]
df_lock = df[df.lockdown == 1]

In [5]:
sample1 = df_no_lock.sample(100, random_state=42)   # no lockdown
sample2 = df_lock.sample(100, random_state=42)      # lockdown

In [6]:
# sample mean
xbar1 = np.mean(sample1["combined_inj_seriousprop"]) 
xbar2 = np.mean(sample2["combined_inj_seriousprop"])

In [7]:
# S²
sample1["stdev"] = (xbar1 - sample1["combined_inj_seriousprop"])**2
sample2["stdev"] = (xbar2 - sample2["combined_inj_seriousprop"])**2

In [8]:
S1 = sum(sample1["stdev"]) / (len(sample1)-1)
S2 = sum(sample2["stdev"]) / (len(sample2)-1)

In [9]:
t = (xbar1 - xbar2) / ((S1/len(sample1)) + (S2/len(sample2)))**0.5
t

9.21369682262934

In [10]:
tc = stats.t.ppf(1-(0.05/2),df=100) 
tc

1.9839715184496334

### Conclusion

In [11]:
if abs(t) <= tc:
    print("As the calculated t is equal to or smaller than the tc (in absolutes), we fail to reject the H0.")
else:
    print("As the calculated t is larger than the tc (in absolutes), the H0 is rejected.")


As the calculated t is larger than the tc (in absolutes), the H0 is rejected.
