In [None]:
%run ../ImportsConstantsSparkInit.ipynb
sc.setJobGroup("PLT_StateLicense2021", "PLT_StateLicense2021")

In [None]:
driverLicense_df = spark.read.csv("./Licenses_Drivers_in_US_2021.csv")
# Displays the content of the DataFrame
print(driverLicense_df.count())
driverLicense_df = driverLicense_df.withColumnRenamed("_c0","State").withColumnRenamed("_c1","NumLicenses")
driverLicense_df = driverLicense_df.na.replace(stateAbbrDict)
driverLicense_df = driverLicense_df.withColumn("NumLicenses",col("NumLicenses").cast("int")).toPandas()
#display(driverLicense_df)

In [None]:
pipeline = "[{$match:{Start_Time:{$gte: ISODate('2021-01-01'),$lt: ISODate('2022-01-01')}}}, {$group:{_id: '$State',count: {$sum:1}}}]"
stateAcc_df = spark.read.format("mongo").option("collection", "TrafficData").option("pipeline", pipeline).load()
stateAcc_df = stateAcc_df.groupby("_id").sum()
stateAcc_df = stateAcc_df.withColumnRenamed("_id","State").withColumnRenamed("sum(count)","AccidentCount")
stateAcc_df = stateAcc_df.orderBy("State")
stateAcc_df = stateAcc_df.drop('_id').toPandas()
stateAcc_df = stateAcc_df.merge(driverLicense_df, left_on="State", right_on="State")
stateAcc_df["AccidentsByLicensesRaw"] = stateAcc_df["AccidentCount"]/stateAcc_df["NumLicenses"]
stateAcc_df["AccidentsPer100kLicenses"] = stateAcc_df["AccidentsByLicensesRaw"]*100000
stateAcc_df = stateAcc_df.sort_values('AccidentsPer100kLicenses', ascending=False)
display(stateAcc_df)

In [None]:
path = "./tl_2022_us_state/tl_2022_us_state.shp"
df = gpd.read_file(path)
df = df.to_crs("EPSG:4326")
df = df.merge(stateAcc_df, left_on="STUSPS", right_on="State")
df = df.drop("State", 1)
display(df)
#df.info()

In [None]:
non_continental = ['HI','VI','MP','GU','AK','AS','PR']
us49 = df
for n in non_continental:
    us49 = us49[us49.STUSPS != n]

In [None]:
fig = plt.figure(1, figsize=(25,15)) 
ax = fig.add_subplot()
us49.plot(ax=ax, column="AccidentsPer100kLicenses", cmap="plasma", legend=True, figsize=(25,15), legend_kwds={"shrink":.7})
us49.boundary.plot(ax=ax, color='Black', linewidth=1)
us49.apply(lambda x: ax.annotate(s=x.STUSPS, xy=x.geometry.centroid.coords[0], ha='center', fontsize=14),axis=1);
ax.set_title("Accident Rate Per 100,000 Licenses In 2021", fontsize=30)
ax.set_xlabel("Longitude", fontsize=20)
ax.set_ylabel("Latitude", fontsize=20)
cb_ax = fig.axes[1]
cb_ax.tick_params(labelsize=15)