# 1. Data Load

In [2]:
# load required packages
import pandas as pd
import matplotlib

## 1.1 Charging Stations

In [40]:
# load charging station data from opendata portal of Rhein-Kreis-Neuss found via govdata.de
# skip bad lines
stations_raw = pd.read_csv("https://opendata.rhein-kreis-neuss.de/explore/dataset/rhein-kreis-neuss-ladesaulen-in-deutschland/download/?format=csv",
                           on_bad_lines = "skip",
                           sep = ";",
                           dtype = {"betreiber": str,
                                    "anzahl_ladepunkte": int,
                                    "anschlussleistung": float,
                                    "steckertypen1": str,
                                    "steckertypen2": str,
                                    "steckertypen3": str,
                                    "steckertypen4": str,
                                    "p1_kw": float,
                                    "p2_kw": float,
                                    "p3_kw": float,
                                    "p4_kw": float,
                                    "kreis_kreisfreie_stadt": str,
                                    "ort": str,
                                    "postleitzahl": str,
                                    "strasse": str,
                                    "hausnummer": str,
                                    "adresszusatz": str,
                                    "koordinaten": str,
                                    "normalladeeinrichtung": str},
                           parse_dates = ["inbetriebnahmedatum"])

## 1.2 Registered Vehicles

In [7]:
# load vehicle registration data from opendata portal from Kraftfahrtbundesamt
vehicles_raw = pd.read_excel("https://www.kba.de/SharedDocs/Downloads/DE/Statistik/Fahrzeuge/FZ3/fz3_2022.xlsx?__blob=publicationFile&v=3", sheet_name = "FZ 3.1")

## 1.3 Mapping Data: PLZ to Administrative Region

In [38]:
# load mapping data linking PLZ and Administrative Region
mapping_raw = pd.read_csv("https://downloads.suche-postleitzahl.org/v2/public/zuordnung_plz_ort.csv",
                          sep = ",",
                          dtype = {"osm_id": str,
                                   "ags": str,
                                   "ort": str,
                                   "plz": str,
                                   "landkreis": str,
                                   "bundesland": str})

# 2. Data Cleaning

owner                  object
count                   int64
connected_load        float64
connector_1            object
connector_2            object
connector_3            object
connector_4            object
p1                    float64
p2                    float64
p3                    float64
p4                    float64
district               object
municipality           object
zip                     int64
street_name            object
street_number          object
additional_address     object
installation_date      object
type                   object
latitude               object
longitude              object
dtype: object

## 2.1Charging Stations

In [82]:
# copy raw data to working df
stations = stations_raw.copy()

# replace column names with english names
stations.columns = ["owner",
                    "count",
                    "connected_load",
                    "connector_1",
                    "connector_2",
                    "connector_3",
                    "connector_4",
                    "p1",
                    "p2",
                    "p3",
                    "p4",
                    "district",
                    "municipality",
                    "zip",
                    "street_name",
                    "street_number",
                    "additional_address",
                    "installation_date",
                    "coordinates",
                    "type"]

# split coordinates into separate columns
stations[["latitude", "longitude"]] = stations.coordinates.str.split(",", expand = True)

# remove irrelevant columns
stations = stations.drop(["owner", "connected_load", "connector_1", "connector_2", "connector_3", "connector_4", "p1", "p2", "p3", "p4", "district", "additional_address", "coordinates"], axis = 1)

# summarise data into stations_summary
stations_summary = stations.groupby("zip").agg({"count": "sum"})

## 2.2 Registered Vehicles

In [77]:
# copy raw data to working df
vehicles = vehicles_raw.copy()

# remove empty first column
vehicles.drop(vehicles.columns[0], axis = 1, inplace = True)

# drop first rows containing no usable data
vehicles.drop(vehicles.index[0:8], axis = 0, inplace = True)

# drop last rows containing no usable data
vehicles.drop([11209, 11210, 11211, 11212, 11213, 11214, 11215 ,11216, 11217, 11218, 11219],
              axis = "index",
              inplace = True)

# reset index
vehicles.reset_index(drop = True, inplace = True)

# assign proper column names
vehicles.columns = ["state",
                    "district_raw",
                    "municipality",
                    "motorcycles",
                    "cars_total",
                    "cars_business",
                    "trucks",
                    "tractors_total",
                    "tractors_agri_forest",
                    "other_vehicles_buses",
                    "vehicles_total",
                    "trailers"]

# drop irrelevant columns
vehicles.drop(["motorcycles",
               "cars_business",
               "trucks",
               "tractors_total",
               "tractors_agri_forest",
               "other_vehicles_buses",
               "vehicles_total",
               "trailers" ],
              axis = 1,
              inplace = True)

# fill NaN values
# replace in these columns
for column in ["state", "district_raw"]:
    # loop through all the rows
    for row in range(0,len(vehicles)):
        # check if value is NaN
        if vehicles.iloc[row][column] != vehicles.iloc[row][column]:
            # replace with value from previous row if NaN
            vehicles.iloc[row][column] = vehicles.iloc[row-1][column]

# remove summary rows
# create list to store row indexes to be dropped
drop = []

# search in municipality column
column = "municipality"

# loop through all the rows
for row in range(0, len(vehicles)):
    # check if row is summary row
    if vehicles.iloc[row][column] == "ZUSAMMEN":
        # append to list to be dropped later
        drop.append(row)

# search in state column
column = "state"
# loop through all the rows
for row in range(0, len(vehicles)):
    # check if row is summary row
    if "INSGESAMT" in vehicles.iloc[row][column]:
        # append to list to be dropped later
        drop.append(row)

# iterate through droppable list and execute drop
for row in drop:
    vehicles.drop([row], axis = "index", inplace = True)

# reset index
vehicles.reset_index(drop = True, inplace = True)

# extract district id
vehicles["district_id"] = vehicles["district_raw"].str[-8:].str[2:7]

#extract district name
vehicles["district"] = vehicles["district_raw"].str[:-8]

# drop irrelevant columns
vehicles.drop(["state",
               "municipality",
               "district_raw",
               "district"],
              axis = 1,
              inplace = True)

# reorder columns
vehicles = vehicles[["district_id", "cars_total"]]

# replace "." & "-" value with 0
vehicles.loc[vehicles["cars_total"] == ".", "cars_total"] = 0
vehicles.loc[vehicles["cars_total"] == "-", "cars_total"] = 0

# assign correct column types
vehicles = vehicles.astype({"district_id": str, "cars_total": int})

# summarize amount of cars per district_id
vehicles = vehicles.groupby("district_id").agg({"cars_total": "sum"})

## 2.3 Mapping Data: PLZ to Administrative Region

In [80]:
# copy raw data to working df
mapping = mapping_raw.copy()

# assign proper column names
mapping.columns =["osm_id","district_id","city","zip","district","state"]

# fill district column with city name if empty
for row in range(0, len(mapping)):
    # check if district column is empty
    if mapping.iloc[row]["district_id"] != mapping.iloc[row]["district_id"]:
        # insert city name
        mapping.iloc[row]["district_id"] = mapping.iloc[row]["city"]

# drop irrelevant columns
mapping.drop(["osm_id", "city"], axis = 1, inplace = True)

# strip last 3 digits of ags code
mapping["district_id"] = mapping["district_id"].str[:-3]

Unnamed: 0,count,municipality,zip,street_name,street_number,installation_date,type,latitude,longitude
0,2,Gößweinstein,91327,Viktor-von-Scheffel Straße,3,2018-10-08,Normalladeeinrichtung,49.76918,11.33907
1,2,Gräfenberg,91322,Bayreuther Str.,38,2021-11-10,Normalladeeinrichtung,49.645468,11.255904
2,2,Hallerndorf,91352,Von-Seckendorf-Str.,10,2019-01-29,Normalladeeinrichtung,49.761162,10.981606
3,2,Heroldsbach,91336,Hauptstraße,9,2016-12-16,Normalladeeinrichtung,49.692019,11.00026
4,2,Heroldsbach,91336,Untere Hauptstraße,2,2021-05-01,Normalladeeinrichtung,49.690454,11.004974
...,...,...,...,...,...,...,...,...,...
29728,2,Wutha-Farnroda,99848,Ruhlaer Straße,41,2018-05-01,Normalladeeinrichtung,50.951957,10.394761
29729,2,Apolda,99510,Parkplatz am Schloss,0,2018-06-18,Normalladeeinrichtung,51.021253,11.513847
29730,1,Apolda,99510,Adolf-Aber-Straße,0,2018-10-19,Schnellladeeinrichtung,51.020983,11.51048
29731,2,Bad Berka,99438,Weimarische Str.,1,2018-04-20,Normalladeeinrichtung,50.90074,11.2826


# 3. Data Merge

# 4. KPI Computation

# 5. Visualization

## 5.1 Visualization Preparation

## 5.2 Visualization Creation