# Personal Injury Accidents vs Population


In [1]:
import altair as alt
import fetch_data as fd
import pandas as pd
import os

In [2]:
city_info = fd.get_city_info()
city_info.head()

Unnamed: 0,regional key,city,sq km,population
0,11000000,Berlin,891.12,3685265
1,2000000,Hamburg,755.09,1862565
2,9162000,München,310.7,1505005
3,5315000,Köln,405.02,1024621
4,6412000,Frankfurt am Main,248.31,756021


In [3]:
df_dict = fd.get_dfs([y for y in range(2019, 2025)])
df = pd.concat(df_dict.values(), ignore_index=True)

## Data Cleaning


### One-Hot Encoding


Severity of accidents.


In [4]:
df = pd.get_dummies(df, columns=["UKATEGORIE"], prefix="inj", dtype=int)
df.rename(
    columns={
        "inj_3": "inj_light",
        "inj_2": "inj_serious",
        "inj_1": "inj_fatal",
    },
    inplace=True,
)

### Drop Columns


In [5]:
cols_to_drop = [
    "LINREFX",
    "LINREFY",
    "XGCSWGS84",
    "YGCSWGS84",
    "UWOCHENTAG",
    "OID_",
    "UID",
    "UREGBEZ",
    "UKREIS",
    "UGEMEINDE",
]

df.drop([c for c in cols_to_drop if c in df], axis=1, inplace=True)

Construct a new dataframe from our accident data, combined with our city_info.


In [6]:
agg_methods = {
    "inj_light": "sum",
    "inj_serious": "sum",
    "inj_fatal": "sum",
    "IstFuss": "sum",
    "IstRad": "sum",
    "IstKrad": "sum",
    "IstGkfz": "sum",
}


df_grouped = df.groupby(["Community_key"]).agg(agg_methods).reset_index()

Perform an inner join on `"regional key"` with `df_grouped` and `city_info`.


In [7]:
df_grouped.rename(columns={"Community_key": "regional key"}, inplace=True)
df_merged = df_grouped.merge(city_info, on="regional key", how="inner")

Calculate total injuries.


In [8]:
df_merged["inj_total"] = (
    df_merged["inj_light"] + df_merged["inj_serious"] + df_merged["inj_fatal"]
)

## Exploring the Data w/ Altair


In [9]:
alt_tooltip = [
    "city",
    alt.Tooltip("population", title="Population", format=","),
]

### Plotting vs Population


In [10]:
cols = [
    # injury category
    ("inj_total", "Total Injuries"),
    ("inj_light", "Light Injuries"),
    ("inj_serious", "Serious Injuries"),
    ("inj_fatal", "Fatal Injuries"),
    # participant type
    ("IstFuss", "Injuries (Pedestrians)"),
    ("IstRad", "Injuries (Cyclists)"),
    ("IstKrad", "Injuries (Motorcyclists)"),
    ("IstGkfz", "Injuries (Delivery Vehicles)"),
]
charts: list[alt.LayerChart] = []

title_font_size = 20
axis_label_font_size = 16
r_2_font_size = 14

for c, title in cols:
    ch = (
        alt.Chart(df_merged)
        .mark_circle(size=30, fillOpacity=0.3)
        .encode(
            x=alt.X(
                "population",
                scale=alt.Scale(domain=(0, df_merged["population"].max() * 1.05)),
                axis=alt.Axis(titleFontSize=axis_label_font_size),
            ).title("Population"),
            y=alt.Y(
                c,
                scale=alt.Scale(domain=(0, df_merged[c].max() * 1.05)),
                axis=alt.Axis(titleFontSize=axis_label_font_size),
            ).title(f"Average {title}"),
            tooltip=[
                *alt_tooltip,
                alt.Tooltip(c, title=f"Average {title}", format=","),
            ],
        )
        .properties(
            width=500,
            height=300,
            title={
                "text": f"Average {title} vs Population (2019-2024)",
                "fontSize": title_font_size,
            },
        )
        .interactive()
    )
    regression = (
        alt.Chart(df_merged)
        .transform_regression(
            "population", c, extent=[0, df_merged["population"].max() * 1.2]
        )
        .mark_line(color="red", strokeDash=[4, 4])
        .encode(
            x="population",
            y=c,
        )
    )
    # https://stackoverflow.com/a/72901978
    regression_params = (
        alt.Chart(df_merged)
        .transform_regression(
            "population",
            c,
            extent=[0, df_merged["population"].max() * 1.2],
            params=True,
        )
        .mark_text(
            align="left",
            lineBreak="\n",
            size=r_2_font_size,
        )
        .encode(
            x=alt.value(5),  # pixels from left
            y=alt.value(10),  # pixels from top
            text="params:N",
        )
        .transform_calculate(params='"r² = " + round(datum.rSquared * 100)/100')
    )

    charts.append(ch + regression + regression_params)

chart = ((charts[0] | charts[1]) & (charts[2] | charts[3])).configure_concat(spacing=30)
chart.show()
chart.save(os.path.join("img", "acc_per_pop_categories.png"))
chart = ((charts[4] | charts[5]) & (charts[6] | charts[7])).configure_concat(spacing=30)
chart.show()
chart.save(os.path.join("img", "acc_per_pop_types.png"))