In [1]:
import os
import pandas as pd
import geopandas as gpd

In [2]:
cwd = os.getcwd()

DATA_PATH = os.path.join(cwd, '..', '..', '..', 'data', 'input')

RAW_DATA_PATH = os.path.join(DATA_PATH, 'raw')
PROCESSED_DATA_PATH = os.path.join(DATA_PATH, 'processed')

In [3]:
def setup_nuts3_polygon() -> gpd.GeoDataFrame:
    """Get polygons geodataframe for Germany at NUTS3 territorial unit."""
    polygon_shp_path = os.path.join(
        PROCESSED_DATA_PATH, "shapefiles", "NUTS3.shp"
    )
    polygon_gdf = gpd.read_file(polygon_shp_path)
    polygon_gdf = polygon_gdf[polygon_gdf["prnt_code"].str.contains("DE")]
    polygon_gdf.drop(
        [
            col
            for col in polygon_gdf.columns
            if "geometry" not in col and "code" not in col
        ],
        axis=1,
        inplace=True,
    )
    polygon_gdf.drop(
        [col for col in polygon_gdf.columns if col.startswith("prnt")],
        axis=1,
        inplace=True,
    )
    polygon_gdf.rename(columns={"code": "NUTS3_region_code"}, inplace=True)
    polygon_gdf.reset_index(drop=True, inplace=True)
    return polygon_gdf
    

In [4]:
def setup_nuts_postalcodes():
    """Get pandas dataframe for postal codes of 2020 for each country"""
    postal_codes_nuts_path = os.path.join(
        PROCESSED_DATA_PATH, "pc2020_DE_NUTS-2021_v4.0", "pc2020_DE_NUTS-2021_v4.0.csv"
    )
    postal_codes_pd = pd.read_csv(postal_codes_nuts_path)
    postal_codes_pd.rename(columns={"CODE": "Postcode_municipality"}, inplace=True)
    return postal_codes_pd

In [5]:
def setup_vehicle_stock():
    """Get pandas dataframe for vehicle stcok for 2022 for each country"""
    vehicle_stock_path = os.path.join(
        RAW_DATA_PATH, 'Vehicle_Stock', 'countries', 'DE', 
        'Federal_Motor_Transport_Authority_KBA_Kraftfahrt_Bundesamt_KBA', 
        "number_of_vehicles_DE_2022_filtered_4.xlsx"  
    )
    vehicle_stock_pd = pd.read_excel(vehicle_stock_path)
    return vehicle_stock_pd

In [6]:
def join_vehicle_stock_by_postal_codes_and_nuts3():
    """Join vehicle stock distributed by postal code with nuts3 data."""
    setup_vehicle_stock_pd = setup_vehicle_stock()
    postal_codes_pd = setup_nuts_postalcodes()
    vehicle_stock_nuts3_destination = os.path.join(
        PROCESSED_DATA_PATH, 'Vehicle_Stock', 'countries', 'DE', 
        "vehicle_stock_by_NUTS3_df.csv"  
    )
    if not os.path.exists(vehicle_stock_nuts3_destination):
        vehicle_stock_pd = setup_vehicle_stock_pd.fillna(method='ffill')
        join_df = vehicle_stock_pd.set_index('Postcode_municipality').join(postal_codes_pd.set_index('Postcode_municipality'))
        print(join_df.head())
        join_df.reset_index(level=0, inplace=True)
        join_df.drop(
        [
        col
        for col in join_df.columns
        if "Motorcylces" not in col and "Agricultural Tractors" not in col
        and "Buses" not in col and "Passenger Vehicles" not in col 
        and "Motor Vehicles Other" not in col and "Motor Vehicles Total" not in col 
        and "Load Force Wagons" not in col and "Trailers Other" not in col 
        and "Trailers Total" not in col and "NUTS3_region_code" not in col
        ],
        axis=1,
        inplace=True,
        )
        # Errase all rows for column NUTS3 that has NaN values
        df = join_df.dropna(subset=['NUTS3'])
        # goup all rows by the same NUTS3 value applying each row sum
        vehicle_stock_nuts3_df = df.groupby(['NUTS3_region_code']).sum()
        vehicle_stock_nuts3_df.reset_index(drop=True, inplace=True)
        vehicle_stock_nuts3_df.to_csv(vehicle_stock_nuts3_destination)
    else:
        vehicle_stock_nuts3_df = pd.read_csv(vehicle_stock_nuts3_destination)
        vehicle_stock_nuts3_df.reset_index(drop=True, inplace=True)
        vehicle_stock_nuts3_df.drop(
        [
        col
        for col in vehicle_stock_nuts3_df.columns
        if "NUTS3_region_code" not in col and "Motorcylces" not in col 
        and "Agricultural Tractors" not in col and "Buses" not in col 
        and "Passenger Vehicles" not in col and "Motor Vehicles Other" not in col 
        and "Motor Vehicles Total" not in col and "Load Force Wagons" not in col 
        and "Trailers Other" not in col and "Trailers Total" not in col 
        ],
        axis=1,
        inplace=True,
        )
        return vehicle_stock_nuts3_df

In [7]:
def join_nuts3_polygn_gdf():
    """Join vehicle stock distributed by nuts3 data with polygon data."""
    vehicle_stock_gdf_nuts3_destination = os.path.join(
        PROCESSED_DATA_PATH, 'Vehicle_Stock', 'countries', 'DE', 
        "vehicle_stock_by_NUTS3_gdf.csv"  
    )
    vehicle_stock_nuts3_df = join_vehicle_stock_by_postal_codes_and_nuts3()
    polygon_gdf = setup_nuts3_polygon()
    vehicle_stock_nuts3_gdf = vehicle_stock_nuts3_df.merge(polygon_gdf, how='inner', on='NUTS3_region_code')
    vehicle_stock_nuts3_gdf.reset_index(drop=True, inplace=True)
    vehicle_stock_nuts3_gdf.to_csv(vehicle_stock_gdf_nuts3_destination)  
    return vehicle_stock_nuts3_gdf

In [8]:
vehicle_stock_nuts3_gdf = join_nuts3_polygn_gdf()
vehicle_stock_nuts3_gdf.sample(30)

                          Land   Registration district  \
Postcode_municipality                                    
1067                   SACHSEN  DRESDEN, STADT (14612)   
1445                   SACHSEN         MEISSEN (14627)   
1454                   SACHSEN         BAUTZEN (14625)   
1454                   SACHSEN         BAUTZEN (14625)   
1458                   SACHSEN         BAUTZEN (14625)   

                                         City  Motorcylces  \
Postcode_municipality                                        
1067                       DRESDEN, STADT            16760   
1445                      RADEBEUL, STADT             1635   
1454                      RADEBERG, STADT              844   
1454                              WACHAU               289   
1458                   OTTENDORF-OKRILLA               656   

                       Agricultural Tractors  Buses  Passenger Vehicles  \
Postcode_municipality                                                     
1067    

KeyError: ['NUTS3_region_code']