In [2]:
import os
import pandas as pd
import geopandas as gpd
from zoomin.data.constants import countries_dict, open_stack_list_NUTS3, open_stack_list_LAU, data_columns 
from zoomin.data.osmtags import networks_tags_dict, stations_tags_dict, name_tags_dict

In [3]:
cwd = os.getcwd()
DATA_PATH = os.path.join(cwd, '..', '..', '..', 'data', 'input')
RAW_DATA_PATH = os.path.join(DATA_PATH, 'raw')
PROCESSED_DATA_PATH = os.path.join(DATA_PATH, 'processed')

In [4]:
def setup_LAU_gdf(
) -> gpd.GeoDataFrame:
    """Get polygons geodataframe for each country at LAU territorial unit."""
    LAU_gdf_path = os.path.join(
        PROCESSED_DATA_PATH, "shapefiles", "LAU.shp"
    )
    LAU_gdf = gpd.read_file(LAU_gdf_path, converters={'code': str})    
    LAU_gdf.drop(
        [
            col
            for col in LAU_gdf.columns
            if "code" not in col
        ],
        axis=1,
        inplace=True,
    )
    LAU_gdf.rename(columns={"code": "LAU_region_code", "prnt_code": "NUTS3_region_code"}, inplace=True)
    LAU_gdf.reset_index(drop=True, inplace=True)
    return LAU_gdf

In [5]:
def setup_NUTS3_gdf(
) -> gpd.GeoDataFrame:
    """Get polygons geodataframe for each country at NUTS3 territorial unit."""
    NUTS3_gdf_path = os.path.join(
        PROCESSED_DATA_PATH, "shapefiles", "NUTS3.shp"
    )
    NUTS3_gdf = gpd.read_file(NUTS3_gdf_path, converters={'code': str})
    NUTS3_gdf.drop(
        [
            col
            for col in NUTS3_gdf.columns
            if "code" not in col
        ],
        axis=1,
        inplace=True,
    )
    NUTS3_gdf.rename(columns={"code": "NUTS3_region_code", "prnt_code": "NUTS2_region_code"}, inplace=True)
    NUTS3_gdf.reset_index(drop=True, inplace=True)
    return NUTS3_gdf

In [6]:
def setup_NUTS2_gdf(
) -> gpd.GeoDataFrame:
    """Get polygons geodataframe for each country at NUTS2 territorial unit."""
    NUTS2_gdf_path = os.path.join(
        PROCESSED_DATA_PATH, "shapefiles", "NUTS2.shp"
    )
    NUTS2_gdf = gpd.read_file(NUTS2_gdf_path, converters={'code': str})
    NUTS2_gdf.drop(
        [
            col
            for col in NUTS2_gdf.columns
            if "code" not in col
        ],
        axis=1,
        inplace=True,
    )
    NUTS2_gdf.rename(columns={"code": "NUTS2_region_code", "prnt_code": "NUTS1_region_code"}, inplace=True)
    NUTS2_gdf.reset_index(drop=True, inplace=True)
    return NUTS2_gdf

In [7]:
def setup_NUTS1_gdf(
) -> gpd.GeoDataFrame:
    """Get polygons geodataframe for each country at NUTS1 territorial unit."""
    NUTS1_gdf_path = os.path.join(
        PROCESSED_DATA_PATH, "shapefiles", "NUTS1.shp"
    )
    NUTS1_gdf = gpd.read_file(NUTS1_gdf_path, converters={'code': str})
    NUTS1_gdf.drop(
        [
            col
            for col in NUTS1_gdf.columns
            if "code" not in col
        ],
        axis=1,
        inplace=True,
    )
    NUTS1_gdf.rename(columns={"code": "NUTS1_region_code", "prnt_code": "NUTS0_region_code"}, inplace=True)
    NUTS1_gdf.reset_index(drop=True, inplace=True)
    return NUTS1_gdf

In [8]:
def setup_NUTS0_gdf(
) -> gpd.GeoDataFrame:
    """Get polygons geodataframe for each country at NUTS0 territorial unit."""
    NUTS0_gdf_path = os.path.join(
        PROCESSED_DATA_PATH, "shapefiles", "NUTS0.shp"
    )
    NUTS0_gdf = gpd.read_file(NUTS0_gdf_path, converters={'code': str})
    NUTS0_gdf.drop(
        [
            col
            for col in NUTS0_gdf.columns
            if "code" not in col
        ],
        axis=1,
        inplace=True,
    )
    NUTS0_gdf.rename(columns={"code": "NUTS0_region_code", "prnt_code": "EU_region_code"}, inplace=True)
    NUTS0_gdf.reset_index(drop=True, inplace=True)
    return NUTS0_gdf

# Join Territorial Unit Data Frame with the Netowrk Data Sets

In [9]:
territorial_unit_1 = input(
        'Please enter a character from: LAU, NUTS3, NUTS2, NUTS1, NUTS0, Europe')

In [10]:
def get_territorial_unit_dataframe():
    """Get a dataframe to add all the necessary data that is going to be used for the spatial dissagreggation model"""
    join_df_path_destination = os.path.join(
                        PROCESSED_DATA_PATH,
                        "ancilliary_data",
                        "join_EU_df.csv") 
    LAU_gdf = setup_LAU_gdf()
    NUTS3_gdf = setup_NUTS3_gdf()
    NUTS2_gdf = setup_NUTS2_gdf()
    NUTS1_gdf = setup_NUTS1_gdf()
    NUTS0_gdf = setup_NUTS0_gdf()
    print(f"The lenght of LAU is: ", len(LAU_gdf))
    print(f"The lenght of NUTS3_gdf is: ", len(NUTS3_gdf))
    print(f"The lenght of NUTS2_gdf is: ", len(NUTS2_gdf))
    print(f"The lenght of NUTS1_gdf is: ", len(NUTS1_gdf))
    print(f"The lenght of NUTS0_gdf is: ", len(NUTS0_gdf))
    join_NUTS2_df = LAU_gdf.merge(NUTS3_gdf, how='left', on='NUTS3_region_code')
    NUTS2_region_code = join_NUTS2_df.pop("NUTS2_region_code")
    join_NUTS2_df.insert(0, "NUTS2_region_code", NUTS2_region_code)
    join_NUTS1_df = join_NUTS2_df.merge(NUTS2_gdf, how='left', on='NUTS2_region_code')
    NUTS1_region_code = join_NUTS1_df.pop("NUTS1_region_code")
    join_NUTS1_df.insert(0, "NUTS1_region_code", NUTS1_region_code)
    join_NUTS0_df = join_NUTS1_df.merge(NUTS1_gdf, how='left', on='NUTS1_region_code')
    NUTS0_region_code = join_NUTS0_df.pop("NUTS0_region_code")
    join_NUTS0_df.insert(0, "NUTS0_region_code", NUTS0_region_code)
    join_EU_df = join_NUTS0_df.merge(NUTS0_gdf, how='left', on='NUTS0_region_code')
    EU_region_code = join_EU_df.pop("EU_region_code")
    join_EU_df.insert(0, "EU_region_code", EU_region_code)
    # join_EU_df.to_csv(join_df_path_destination)
    return join_EU_df

In [11]:
def merge_network_data_with_territorial_unit_dataframe(territorial_unit_1, networks_tags_dict):        
        """Fill all the territorial_unit dataframe with the network data"""
        join_df_path_destination = os.path.join(
                        PROCESSED_DATA_PATH,
                        "ancilliary_data",
                        "data_network_LAU_df.csv") 
        join_EU_df = get_territorial_unit_dataframe()
        osm_data_list = []
        for component_name in networks_tags_dict.keys():
                data_set_df_path_source = os.path.join(
                        PROCESSED_DATA_PATH,
                        "osm_data",
                        "countries",
                        "networks_overlap_df_data",
                        f"{component_name}_overlap_df_{territorial_unit_1}.csv")
                raw_data_set_df = pd.read_csv(data_set_df_path_source, 
                                              converters={f'{territorial_unit_1}_region_code': str, 
                                                          f'{component_name}_value': str}
                                              )
                raw_data_set_df[f'{component_name}_value'] = raw_data_set_df[f'{component_name}_value'].astype(float)
                raw_data_set_df = raw_data_set_df.groupby([f'{territorial_unit_1}_region_code']).mean()
                raw_data_set_df.reset_index(inplace=True)
                data_set = join_EU_df.merge(raw_data_set_df, how='left', on='LAU_region_code')
                osm_data = data_set[[f'{component_name}_value']]
                osm_data_list.append(osm_data)
        osm_data_frame = pd.concat(osm_data_list, axis=1)
        osm_data_frame.reset_index(drop=True, inplace=True)
        join_EU_df.reset_index(drop=True, inplace=True)
        data_network_LAU = pd.concat([join_EU_df, osm_data_frame], axis=1)
        # data_network_LAU.to_csv(join_df_path_destination)
        return data_network_LAU

In [12]:
def merge_network_data_with_territorial_unit_dataframe_2(territorial_unit_1, data_network_LAU):        
        """Fill all the territorial_unit dataframe with the network data"""
        join_df_path_destination = os.path.join(
                        PROCESSED_DATA_PATH,
                        "ancilliary_data",
                        "data_network_NUTS3_df.csv") 
        join_EU_df = get_territorial_unit_dataframe()
        join_EU_df = join_EU_df.drop(
        ['LAU_region_code'],
        axis=1,
        )
        data_network_NUTS3 = data_network_LAU.groupby(['NUTS3_region_code']).sum()
        data_network_NUTS3.reset_index(inplace=True)
        data_network_NUTS3 = join_EU_df.merge(data_network_NUTS3, how='left', on='NUTS3_region_code')
        data_network_NUTS3 = data_network_NUTS3.drop_duplicates()
        # data_network_NUTS3.to_csv(join_df_path_destination)
        return data_network_NUTS3

In [None]:
data_network_LAU = merge_network_data_with_territorial_unit_dataframe(territorial_unit_1, networks_tags_dict)
data_network_LAU.head()

The lenght of LAU is:  95743
The lenght of NUTS3_gdf is:  1169
The lenght of NUTS2_gdf is:  240
The lenght of NUTS1_gdf is:  92
The lenght of NUTS0_gdf is:  27


Unnamed: 0,EU_region_code,NUTS0_region_code,NUTS1_region_code,NUTS2_region_code,NUTS3_region_code,LAU_region_code,bicycle_network_value,bus_network_value,railways_network_value,road_major_network_value,shipping_network_value
0,EU,BE,BE3,BE35,BE353,93088,0.0,,27302.738793,119068.328943,0.0
1,EU,BE,BE3,BE35,BE353,93010,0.0,0.0,12278.122065,36670.353389,0.0
2,EU,BE,BE3,BE35,BE353,93014,2766.77525,0.0,3144.253705,61603.030326,0.0
3,EU,BE,BE3,BE35,BE353,93018,0.0,0.0,414.568806,31836.235776,0.0
4,EU,BE,BE3,BE35,BE353,93022,0.0,0.0,6541.637125,63358.953734,0.0


In [None]:
data_network_NUTS3 = merge_network_data_with_territorial_unit_dataframe_2(territorial_unit_1, data_network_LAU)

In [None]:
data_network_NUTS3[95:100]

Unnamed: 0,EU_region_code,NUTS0_region_code,NUTS1_region_code,NUTS2_region_code,NUTS3_region_code,bicycle_network_value,bus_network_value,railways_network_value,road_major_network_value,shipping_network_value
6712,EU,EL,EL3,EL30,EL303,0.0,0.0,12464.75588,479317.8,0.0
6722,,,,,ELZZZ,0.0,0.0,0.0,0.0,0.0
6723,EU,LT,LT0,LT02,LT026,309.322976,0.0,404955.496257,1726203.0,5815.234302
6730,EU,LT,LT0,LT02,LT027,0.0,0.0,93803.543328,911319.0,140.528679
6734,EU,LT,LT0,LT02,LT023,531.830155,0.0,262673.589476,1207962.0,10885.203103


In [None]:
len(data_network_NUTS3)

1170

In [None]:
# show rows where at least one value is NaN
print(data_network_NUTS3[data_network_NUTS3.isna().any(axis=1)])

     EU_region_code NUTS0_region_code NUTS1_region_code NUTS2_region_code  \
6722            NaN               NaN               NaN               NaN   

     NUTS3_region_code  bicycle_network_value  bus_network_value  \
6722             ELZZZ                    0.0                0.0   

      railways_network_value  road_major_network_value  shipping_network_value  
6722                     0.0                       0.0                     0.0  


# Join Territorial Unit Data Frame with the Stations Data Sets

In [None]:
def merge_stations_data_with_territorial_unit_dataframe(territorial_unit_1, stations_tags_dict):        
        """Fill all the territorial_unit dataframe with the stations data"""
        join_df_path_destination = os.path.join(
                        PROCESSED_DATA_PATH,
                        "ancilliary_data",
                        "data_stations_LAU_df.csv") 
        join_EU_df = get_territorial_unit_dataframe()
        osm_data_list = []
        for component_name in stations_tags_dict.keys():
                data_set_df_path_source = os.path.join(
                        PROCESSED_DATA_PATH,
                        "osm_data",
                        "countries",
                        "stations_overlap_df_data",
                        f"{component_name}_OverlapDf_{territorial_unit_1}.csv")
                raw_data_set_df = pd.read_csv(data_set_df_path_source, 
                                              converters={f'{territorial_unit_1}_region_code': str, 
                                                          f'{component_name}_value': str}
                                              )
                raw_data_set_df[f'{component_name}_value'] = raw_data_set_df[f'{component_name}_value'].astype(float)
                # raw_data_set_df.reset_index(inplace=True)
                data_set = join_EU_df.merge(raw_data_set_df, how='left', on='LAU_region_code')
                osm_data = data_set[[f'{component_name}_value']]
                osm_data_list.append(osm_data)
        osm_data_frame = pd.concat(osm_data_list, axis=1)
        osm_data_frame.reset_index(drop=True, inplace=True)
        join_EU_df.reset_index(drop=True, inplace=True)
        data_stations_LAU = pd.concat([join_EU_df, osm_data_frame], axis=1)
        # data_stations_LAU.to_csv(join_df_path_destination)
        return data_stations_LAU

In [None]:
def merge_stations_data_with_territorial_unit_dataframe_2(territorial_unit_1, stations_tags_dict):        
        """Fill all the territorial_unit dataframe with the stations data"""
        join_df_path_destination = os.path.join(
                        PROCESSED_DATA_PATH,
                        "ancilliary_data",
                        "data_stations_NUTS3_df.csv") 
        join_EU_df = get_territorial_unit_dataframe()
        data_stations_LAU = merge_stations_data_with_territorial_unit_dataframe(territorial_unit_1, stations_tags_dict)
        join_EU_df = join_EU_df.drop(
        ['LAU_region_code'],
        axis=1,
        )
        data_stations_NUTS3 = data_stations_LAU.groupby(['NUTS3_region_code']).sum()
        data_stations_NUTS3.reset_index(inplace=True)
        data_stations_NUTS3 = join_EU_df.merge(data_stations_NUTS3, how='left', on='NUTS3_region_code')
        data_stations_NUTS3 = data_stations_NUTS3.drop_duplicates()
        # data_stations_NUTS3.to_csv(join_df_path_destination)
        return data_stations_NUTS3

In [None]:
data_stations_LAU = merge_stations_data_with_territorial_unit_dataframe(territorial_unit_1, stations_tags_dict)

The lenght of LAU is:  95743
The lenght of NUTS3_gdf is:  1169
The lenght of NUTS2_gdf is:  240
The lenght of NUTS1_gdf is:  92
The lenght of NUTS0_gdf is:  27


In [None]:
data_stations_LAU.head()

Unnamed: 0,EU_region_code,NUTS0_region_code,NUTS1_region_code,NUTS2_region_code,NUTS3_region_code,LAU_region_code,fuel_stations_value,charging_stations_value,bicycle_stations_value,bus_stations_value,airport_stations_value,railway_station_value,train_station_value,subway_station_value,lightrail_station_value,shipping_station_value,helicopter_station_value
0,EU,BE,BE3,BE35,BE353,93088,4.0,,,193.0,,17.0,,,,,
1,EU,BE,BE3,BE35,BE353,93010,5.0,,,173.0,1.0,24.0,2.0,,7.0,,1.0
2,EU,BE,BE3,BE35,BE353,93014,8.0,2.0,,204.0,,35.0,2.0,,12.0,,
3,EU,BE,BE3,BE35,BE353,93018,,,,71.0,1.0,,,,,,
4,EU,BE,BE3,BE35,BE353,93022,,,,121.0,1.0,,,,,,


In [None]:
len(data_stations_LAU)

95743

In [None]:
data_stations_NUTS3 = merge_stations_data_with_territorial_unit_dataframe_2(territorial_unit_1, stations_tags_dict)

The lenght of LAU is:  95743
The lenght of NUTS3_gdf is:  1169
The lenght of NUTS2_gdf is:  240
The lenght of NUTS1_gdf is:  92
The lenght of NUTS0_gdf is:  27
The lenght of LAU is:  95743
The lenght of NUTS3_gdf is:  1169
The lenght of NUTS2_gdf is:  240
The lenght of NUTS1_gdf is:  92
The lenght of NUTS0_gdf is:  27


  data_stations_NUTS3 = data_stations_LAU.groupby(['NUTS3_region_code']).sum()


In [None]:
data_stations_NUTS3.head()

Unnamed: 0,EU_region_code,NUTS0_region_code,NUTS1_region_code,NUTS2_region_code,NUTS3_region_code,fuel_stations_value,charging_stations_value,bicycle_stations_value,bus_stations_value,airport_stations_value,railway_station_value,train_station_value,subway_station_value,lightrail_station_value,shipping_station_value,helicopter_station_value
0,EU,BE,BE3,BE35,BE353,22.0,3.0,0.0,982.0,3.0,85.0,6.0,0.0,19.0,0.0,1.0
7,EU,BE,BE1,BE10,BE100,185.0,168.0,364.0,2179.0,5.0,1290.0,22.0,60.0,0.0,15.0,11.0
26,EU,BE,BE3,BE33,BE334,31.0,9.0,0.0,972.0,3.0,32.0,2.0,0.0,0.0,0.0,2.0
40,EU,BE,BE3,BE33,BE335,72.0,25.0,3.0,2212.0,4.0,66.0,9.0,0.0,0.0,0.0,9.0
60,EU,BE,BE3,BE33,BE336,36.0,6.0,1.0,1095.0,2.0,16.0,3.0,0.0,0.0,0.0,3.0


In [None]:
len(data_stations_NUTS3)

1170

In [None]:
# show rows where at least one value is NaN
print(data_stations_NUTS3[data_stations_NUTS3.isna().any(axis=1)])

     EU_region_code NUTS0_region_code NUTS1_region_code NUTS2_region_code  \
6722            NaN               NaN               NaN               NaN   

     NUTS3_region_code  fuel_stations_value  charging_stations_value  \
6722             ELZZZ                  0.0                      0.0   

      bicycle_stations_value  bus_stations_value  airport_stations_value  \
6722                     0.0                 0.0                     0.0   

      railway_station_value  train_station_value  subway_station_value  \
6722                    0.0                  0.0                   0.0   

      lightrail_station_value  shipping_station_value  \
6722                      0.0                     0.0   

      helicopter_station_value  
6722                       0.0  


# Join Territorial Unit Data Frame with the Freight transport Data Sets

In [None]:
freight_transport_list = ['Distance_from_origin_region_to_E_road', 
                          'Distance_within_E_road',
                          'Distance_from_E_road_to_destination_region',
                          'Total_distance',
                          'Traffic_flow_trucks_2010',
                          'Traffic_flow_trucks_2019',
                          'Traffic_flow_trucks_2030',
                          'Traffic_flow_tons_2010',
                          'Traffic_flow_tons_2019',
                          'Traffic_flow_tons_2030']

In [None]:
def merge_freight_transport_data_with_territorial_unit_dataframe():        
        """Fill all the territorial_unit dataframe with the freight transport data"""
        join_EU_df = get_territorial_unit_dataframe()
        data_set_df_path_source = os.path.join(
                        PROCESSED_DATA_PATH,
                        "freight_traffic_flow_ETISPLUS_EUROSTAT",
                        "countries",
                        "freight_traffic_flow_Overlap_df_NUTS3.csv")
        join_df_path_destination = os.path.join(
                        PROCESSED_DATA_PATH,
                        "ancilliary_data",
                        "data_freight_transport_NUTS3_df.csv")
        raw_data_set_df = pd.read_csv(data_set_df_path_source, 
                                        converters={'NUTS3_region_code': str,
                                        })
        for column_name in freight_transport_list:
                raw_data_set_df[f'{column_name}'] = raw_data_set_df[f'{column_name}'].astype(float)
        join_EU_df = join_EU_df.drop(
        ['LAU_region_code'],
        axis=1,
        )
        data_set_freight_transport_NUTS3 = join_EU_df.merge(raw_data_set_df, how='left', on='NUTS3_region_code')
        data_set_freight_transport_NUTS3 = data_set_freight_transport_NUTS3.drop_duplicates()
        # data_set_freight_transport_NUTS3.to_csv(join_df_path_destination)
        return data_set_freight_transport_NUTS3

In [None]:
data_set_freight_transport_NUTS3 = merge_freight_transport_data_with_territorial_unit_dataframe()

The lenght of LAU is:  95743
The lenght of NUTS3_gdf is:  1169
The lenght of NUTS2_gdf is:  240
The lenght of NUTS1_gdf is:  92
The lenght of NUTS0_gdf is:  27


In [None]:
data_set_freight_transport_NUTS3.head(10)

Unnamed: 0,EU_region_code,NUTS0_region_code,NUTS1_region_code,NUTS2_region_code,NUTS3_region_code,Distance_from_origin_region_to_E_road,Distance_within_E_road,Distance_from_E_road_to_destination_region,Total_distance,Traffic_flow_trucks_2010,Traffic_flow_trucks_2019,Traffic_flow_trucks_2030,Traffic_flow_tons_2010,Traffic_flow_tons_2019,Traffic_flow_tons_2030
0,EU,BE,BE3,BE35,BE353,7.0,833.545238,15.561905,856.107143,175.566964,172.27381,168.715774,2387.710714,2342.92381,2294.534524
7,EU,BE,BE1,BE10,BE100,5.0,1130.588785,131.744548,1267.333333,321.854556,322.320872,324.445093,4377.221963,4383.563863,4412.453271
26,EU,BE,BE3,BE33,BE334,5.0,826.866157,15.296367,847.162524,88.724904,88.371176,88.318595,1206.6587,1201.847992,1201.132887
40,EU,BE,BE3,BE33,BE335,1.0,1105.896346,127.798658,1234.695004,166.535235,173.247576,183.052759,2264.879195,2356.16704,2489.517524
60,EU,BE,BE3,BE33,BE336,10.0,1094.505255,126.722973,1231.228228,144.57958,151.983859,162.546922,1966.282282,2066.98048,2210.638138
69,EU,BE,BE2,BE21,BE211,4.0,1165.943107,125.765135,1295.708242,2024.779358,2124.772976,2268.658826,27536.999271,28896.912473,30853.760029
99,EU,BE,BE2,BE21,BE212,6.0,1156.476831,126.439462,1288.916293,243.672459,252.123505,264.596413,3313.945441,3428.879671,3598.511211
112,EU,BE,BE2,BE21,BE213,5.0,1151.583578,125.774927,1282.358504,805.605755,842.273644,895.313416,10956.23827,11454.921554,12176.262463
139,EU,BE,BE2,BE22,BE221,1.0,1140.939015,125.788391,1267.727406,730.038575,751.126929,782.855437,9928.524614,10215.326231,10646.833946
157,EU,BE,BE2,BE22,BE222,18.0,1125.899254,126.302985,1270.202239,276.111007,289.239739,308.226679,3755.109701,3933.660448,4191.882836


In [None]:
len(data_set_freight_transport_NUTS3)

1170

In [None]:
# show rows where at least one value is NaN
print(data_set_freight_transport_NUTS3[data_set_freight_transport_NUTS3.isna().any(axis=1)])

      EU_region_code NUTS0_region_code NUTS1_region_code NUTS2_region_code  \
589               EU                EL               EL6              EL62   
668               EU                EL               EL6              EL62   
713               EU                EL               EL5              EL52   
781               EU                EL               EL5              EL51   
860               EU                EL               EL5              EL52   
...              ...               ...               ...               ...   
94704             EU                PL               PL9              PL92   
95113             EU                PL               PL6              PL61   
95174             EU                PL               PL6              PL61   
95213             EU                PL               PL9              PL92   
95434             EU                PL               PL6              PL63   

      NUTS3_region_code  Distance_from_origin_region_to_E_road 

# Join Territorial Unit Data Frame with the Vehicle Stock Data Set for Germany

In [None]:
Vehicle_stock_list = ['Motorcylces',
                      'Agricultural Tractors',
                      'Buses',
                      'Passenger Vehicles',
                      'Motor Vehicles Other',
                      'Motor Vehicles Total',
                      'Load Force Wagons',
                      'Trailers Other',
                      'Trailers Total'
]

In [None]:
def merge_vehicle_stock_data_with_territorial_unit_dataframe():        
        """Fill all the territorial_unit dataframe with the vehicle stock data"""
        join_EU_df = get_territorial_unit_dataframe()
        data_set_df_path_source = os.path.join(
                        PROCESSED_DATA_PATH,
                        "Vehicle_Stock",
                        "countries",
                        "DE",
                        "vehicle_stock_by_nuts3_df.csv")
        join_df_path_destination = os.path.join(
                        PROCESSED_DATA_PATH,
                        "ancilliary_data",
                        "data_vehicle_stock_NUTS3_df.csv")
        raw_data_set_df = pd.read_csv(data_set_df_path_source, 
                                        converters={'NUTS3_region_code': str,
                                        })
        # for column_name in Vehicle_stock_list:
        #         raw_data_set_df[f'{column_name}'] = raw_data_set_df[f'{column_name}'].astype(float)
        raw_data_set_df.drop(
        [
        col
        for col in raw_data_set_df.columns
        if "Unnamed: 0" in col
        ],
        axis=1,
        inplace=True,
        )
        join_EU_df = join_EU_df.drop(
        ['LAU_region_code'],
        axis=1,
        )
        data_set_vehicle_stock_NUTS3 = join_EU_df.merge(raw_data_set_df, how='left', on='NUTS3_region_code')
        data_set_vehicle_stock_NUTS3 = data_set_vehicle_stock_NUTS3.drop_duplicates()
        # data_set_vehicle_stock_NUTS3.to_csv(join_df_path_destination)
        return data_set_vehicle_stock_NUTS3

In [None]:
data_set_vehicle_stock_NUTS3 = merge_vehicle_stock_data_with_territorial_unit_dataframe()

The lenght of LAU is:  95743
The lenght of NUTS3_gdf is:  1169
The lenght of NUTS2_gdf is:  240
The lenght of NUTS1_gdf is:  92
The lenght of NUTS0_gdf is:  27


In [None]:
data_set_vehicle_stock_NUTS3.sample(15)

Unnamed: 0,EU_region_code,NUTS0_region_code,NUTS1_region_code,NUTS2_region_code,NUTS3_region_code,Motorcylces,Agricultural Tractors,Buses,Passenger Vehicles,Motor Vehicles Other,Motor Vehicles Total,Load Force Wagons,Trailers Other,Trailers Total
73103,EU,DE,DE2,DE22,DE229,7660.0,7805.0,413.0,50961.0,21281.0,72242.0,2939.0,9885.0,12824.0
79067,EU,DE,DEA,DEA1,DEA12,,,,,,,,,
89853,EU,FI,FI1,FI1D,FI1D9,,,,,,,,,
188,EU,BE,BE2,BE23,BE231,,,,,,,,,
487,EU,BE,BE3,BE32,BE322,,,,,,,,,
33031,EU,FR,FRY,FRY2,FRY20,,,,,,,,,
19163,EU,RO,RO2,RO21,RO212,,,,,,,,,
139,EU,BE,BE2,BE22,BE221,,,,,,,,,
71559,EU,DE,DE2,DE26,DE266,6946.0,5990.0,436.0,55300.0,18683.0,73983.0,2559.0,12333.0,14892.0
74974,EU,DE,DEE,DEE0,DEE05,8179.0,2651.0,1290.0,97993.0,22456.0,120449.0,9405.0,13581.0,22986.0


In [None]:
len(data_set_vehicle_stock_NUTS3)

1170

In [None]:
# show rows where at least one value is NaN
print(data_set_vehicle_stock_NUTS3[data_set_vehicle_stock_NUTS3.isna().any(axis=1)])

      EU_region_code NUTS0_region_code NUTS1_region_code NUTS2_region_code  \
0                 EU                BE               BE3              BE35   
7                 EU                BE               BE1              BE10   
26                EU                BE               BE3              BE33   
40                EU                BE               BE3              BE33   
60                EU                BE               BE3              BE33   
...              ...               ...               ...               ...   
95609             EU                PL               PL6              PL63   
95612             EU                PL               PL7              PL71   
95639             EU                PL               PL2              PL21   
95640             EU                PL               PL8              PL81   
95692             EU                PL               PL9              PL92   

      NUTS3_region_code  Motorcylces  Agricultural Tractors  Bu

# Query data from OpenStack

        The NACE (Nomenclature of Economic Activities in the European Community) sector related to transport is "H". The sector "H" covers the following activities:

        Water transport
        Air transport
        Supporting and auxiliary transport activities; activities of travel agencies
        Postal and courier activities
        So, any business or organization that is involved in these activities would fall under the NACE sector "H".

### NUTS3

In [14]:
def fill_territorial_unit_dataframe():
    """Fill territorial unit dataframe with the given data"""
    join_EU_df = get_territorial_unit_dataframe()
    data_length = {}
    for files in open_stack_list_NUTS3:
        file_path_source = os.path.join(cwd, '..', '..', '..', 'data_search_NUTS3', 'processed',
                            f"{files}",
                            "data.csv")
        file_path_destination = os.path.join(cwd, '..', '..', '..', 'data', 'input',
                            'processed',
                            'ancilliary_data',
                            f"data_{files}_NUTS3_df.csv")
        data = pd.read_csv(file_path_source) #, converters={region_code: str, data_value_name: float})
        column_names = data.columns
        for column_name in column_names:
            if "reg" in column_name:
                region_code = column_name
            if "value" in column_name:
                data_value_name = column_name
        data.rename(columns = {region_code: 'NUTS3_region_code', data_value_name: f"{files}_value"}, inplace = True)
        # Erase any other columns
        columns_to_keep = ['NUTS3_region_code', f"{files}_value"]
        data = data[columns_to_keep]
        # print(f"The length of the data_{files}_NUTS3_df is: ", len(data))
        data_length[files] = len(data)
        if data[f"{files}_value"].isna().any():
            missing_values = data[f"{files}_value"].isna().sum()
            print(f"Missing values found in the data_df in the column name {files}. Number of missing values: {missing_values}")
        else:
            data[f"{files}_value"] = data[f"{files}_value"].astype(int)
        join_EU_NUTS3_df = join_EU_df.drop(columns=['LAU_region_code'])    
        join_EU_NUTS3_df = join_EU_NUTS3_df.groupby('NUTS3_region_code').first()
        data_merged = join_EU_NUTS3_df.merge(data, how='left', on='NUTS3_region_code')
        if data_merged[f"{files}_value"].isna().any():
            missing_values_2 = data_merged[f"{files}_value"].isna().sum()
            print(f"Missing values found in the data_merged in the column name {files}. Number of missing values: {missing_values_2}")
        else:
            data_merged[f"{files}_value"] = data_merged[f"{files}_value"].astype(int)
        cols = ['EU_region_code', 'NUTS0_region_code', 'NUTS1_region_code', 'NUTS2_region_code', 'NUTS3_region_code', f"{files}_value"]
        data_merged = data_merged[cols]
        # data_merged.to_csv(file_path_destination)
    return data, data_merged, join_EU_df, join_EU_NUTS3_df, data_length

### LAU

In [13]:
def fill_territorial_unit_dataframe_LAU(territorial_unit):
    """Fill territorial unit dataframe with the given data"""
    join_EU_df = get_territorial_unit_dataframe()
    data_length = {}
    dfs = []
    for files in open_stack_list_LAU:
        file_path_source = os.path.join(cwd, '..', '..', '..', 'data_search_NUTS3', 'processed',
                            f"{files}",
                            "data.csv")
        data = pd.read_csv(file_path_source)
        column_names = data.columns
        for column_name in column_names:
            if "reg" in column_name:
                region_code = column_name
            if "value" or "area" in column_name:
                data_value_name = column_name
        data.rename(columns={region_code: f"{territorial_unit}_region_code", data_value_name: f"{files}_value"}, inplace=True)
        columns_to_keep = [f"{territorial_unit}_region_code", f"{files}_value"]
        data = data[columns_to_keep]
        data_length[files] = len(data)
        if data[f"{files}_value"].isna().any():
            missing_values = data[f"{files}_value"].isna().sum()
            print(f"Missing values found in the data_df in the column name {files}. Number of missing values: {missing_values}")
        else:
            # data[f"{files}_value"] = pd.to_numeric(data[f"{files}_value"], errors='coerce')
            data[f"{files}_value"] = data[f"{files}_value"].astype('Int64')
        dfs.append(data)
    data_merged = pd.concat(dfs, axis=1)
    # cols = ['EU_region_code', 'NUTS0_region_code', 'NUTS1_region_code', 'NUTS2_region_code', 'NUTS3_region_code']
    # cols.extend([f"{files}_value" for files in open_stack_list_LAU])
    # data_merged = data_merged[cols]
    return data, data_merged, join_EU_df, data_length


In [14]:
file_path_source = os.path.join(cwd, '..', '..', '..', 'data_search_NUTS3', 'processed',
                            'residential energy demand',
                            "data.csv")
data = pd.read_csv(file_path_source)

In [15]:
data.head()

Unnamed: 0,reg_code,prnt_code,year,value,quality_rating
0,2000000,DE600,2022,1950172.0,good
1,6411000,DE711,2022,1078707.0,good
2,6412000,DE712,2022,3190672.0,good
3,6413000,DE713,2022,723624.4,good
4,6414000,DE714,2022,1737275.0,good


In [18]:
def fill_territorial_unit_dataframe_LAU(territorial_unit):
    """Fill territorial unit dataframe with the given data"""
    join_EU_df = get_territorial_unit_dataframe()
    data_length = {}
    for files in open_stack_list_LAU:
        file_path_source = os.path.join(cwd, '..', '..', '..', 'data_search_NUTS3', 'processed',
                            f"{files}",
                            "data.csv")
        file_path_destination = os.path.join(cwd, '..', '..', '..', 'data', 'input',
                            'processed',
                            'ancilliary_data',
                            f"data_{files}_{territorial_unit}_df.csv")
        data = pd.read_csv(file_path_source) #, converters={region_code: str, data_value_name: float})
        column_names = data.columns
        for column_name in column_names:
            if "reg" in column_name:
                region_code = column_name
            if "value" in column_name:
                data_value_name = column_name
        data.rename(columns = {region_code: f"{territorial_unit}_region_code", data_value_name: f"{files}_value"}, inplace = True)
        # Erase any other columns
        columns_to_keep = [f"{territorial_unit}_region_code", f"{files}_value"]
        data = data[columns_to_keep]
        # print(f"The length of the data_{files}_{territorial_unit}_df is: ", len(data))
        data_length[files] = len(data)
        if data[f"{files}_value"].isna().any():
            missing_values = data[f"{files}_value"].isna().sum()
            print(f"Missing values found in the data_df in the column name {files}. Number of missing values: {missing_values}")
        else:
            data[f"{files}_value"] = data[f"{files}_value"].astype(int)
        print('The file name is: ', files)
        # print(data.info())
        data[f"{territorial_unit}_region_code"] = data[f"{territorial_unit}_region_code"].astype('object')
        data_merged = join_EU_df.merge(data, how='left', on=f"{territorial_unit}_region_code")
        numeric_cols = data_merged.select_dtypes(include=['int', 'float']).columns
        data_merged_LAU = data_merged.groupby(f"NUTS3_region_code")[numeric_cols ].sum()
        join_EU_NUTS3_df = join_EU_df.drop(columns=['LAU_region_code'])    
        join_EU_NUTS3_df = join_EU_NUTS3_df.groupby('NUTS3_region_code').first()
        data_merged = join_EU_NUTS3_df.merge(data_merged_LAU, how='left', on='NUTS3_region_code')
        data_merged = data_merged.reset_index()
        data_merged = data_merged[['EU_region_code', 'NUTS0_region_code', 'NUTS1_region_code', 'NUTS2_region_code', 'NUTS3_region_code', f"{files}_value"]]
        if data_merged[f"{files}_value"].isna().any():
            missing_values_2 = data_merged[f"{files}_value"].isna().sum()
            print(f"Missing values found in the data_merged in the column name {files}. Number of missing values: {missing_values_2}")
        else:
            data_merged[f"{files}_value"] = data_merged[f"{files}_value"].astype('Int64')
        data_merged.to_csv(file_path_destination)
    return data, join_EU_df, data_merged, data_length, data_merged_LAU, join_EU_NUTS3_df

In [19]:
territorial_unit = 'LAU'
data, join_EU_df, data_merged, data_length, data_merged_LAU, join_EU_NUTS3_df = fill_territorial_unit_dataframe_LAU(territorial_unit)

The lenght of LAU is:  95743
The lenght of NUTS3_gdf is:  1169
The lenght of NUTS2_gdf is:  240
The lenght of NUTS1_gdf is:  92
The lenght of NUTS0_gdf is:  27
The file name is:  electricity demand of chemical industries
The file name is:  electricity demand of iron and steel industries
The file name is:  electricity demand of non-ferrous metal industries
The file name is:  electricity demand of non-metallic mineral industries
The file name is:  electricity demand of paper and printing industries
The file name is:  electricity demand of refineries
The file name is:  fuel demand of chemical industries
The file name is:  fuel demand of iron and steel industries
The file name is:  fuel demand of non-ferrous metal industries
The file name is:  fuel demand of non-metallic mineral industries
The file name is:  fuel demand of paper and printing industries
The file name is:  fuel demand of refineries
The file name is:  generation capacity of biomass plants
The file name is:  generation capacit

In [15]:
data_merged.shape

(1170, 6)

In [16]:
data_merged.head()

Unnamed: 0,EU_region_code,NUTS0_region_code,NUTS1_region_code,NUTS2_region_code,NUTS3_region_code,residential heat demand_value
0,EU,AT,AT1,AT11,AT111,0
1,EU,AT,AT1,AT11,AT112,0
2,EU,AT,AT1,AT11,AT113,0
3,EU,AT,AT1,AT12,AT121,0
4,EU,AT,AT1,AT12,AT122,0


In [71]:
join_EU_df.shape

(95743, 6)

In [86]:
data_merged.shape

(95743, 7)

In [83]:
data_merged_LAU.head()

Unnamed: 0_level_0,electricity demand of iron and steel industries_value
NUTS3_region_code,Unnamed: 1_level_1
AT111,0.0
AT112,0.0
AT113,0.0
AT121,0.0
AT122,0.0


In [74]:
data_merged_LAU.shape

(1170, 1)

In [50]:
data.sample(50)

Unnamed: 0,LAU_region_code,electricity demand of iron and steel industries_value
9871,SML10,0
11180,03014,0
50321,80157,0
79619,16071065,0
93892,1002301570802,0
84374,019081,0
78972,16066017,0
22772,544248,0
18715,32704,0
86413,030052,0


In [15]:
data_merged.shape

(95314, 4)

In [14]:
join_EU_df.head()

Unnamed: 0,EU_region_code,NUTS0_region_code,NUTS1_region_code,NUTS2_region_code,NUTS3_region_code,LAU_region_code
0,EU,BE,BE3,BE35,BE353,93088
1,EU,BE,BE3,BE35,BE353,93010
2,EU,BE,BE3,BE35,BE353,93014
3,EU,BE,BE3,BE35,BE353,93018
4,EU,BE,BE3,BE35,BE353,93022


In [14]:
data, data_merged, join_EU_df, join_EU_NUTS3_df, data_length = fill_territorial_unit_dataframe()

The lenght of LAU is:  95743
The lenght of NUTS3_gdf is:  1169
The lenght of NUTS2_gdf is:  240
The lenght of NUTS1_gdf is:  92
The lenght of NUTS0_gdf is:  27
Missing values found in the data_merged in the column name deaths. Number of missing values: 15
Missing values found in the data_merged in the column name employment in nace sector G-I. Number of missing values: 15
Missing values found in the data_merged in the column name gross domestic product. Number of missing values: 15
Missing values found in the data_merged in the column name gross value added. Number of missing values: 15
Missing values found in the data_merged in the column name gross value added in nace sector G-I. Number of missing values: 15
Missing values found in the data_merged in the column name live births. Number of missing values: 15
Missing values found in the data_merged in the column name number of busses_PL. Number of missing values: 1097
Missing values found in the data_merged in the column name number of

In [15]:
data_merged.head()

Unnamed: 0,EU_region_code,NUTS0_region_code,NUTS1_region_code,NUTS2_region_code,NUTS3_region_code,total number of businesses_value
0,EU,AT,AT1,AT11,AT111,2179.0
1,EU,AT,AT1,AT11,AT112,10491.0
2,EU,AT,AT1,AT11,AT113,5869.0
3,EU,AT,AT1,AT12,AT121,13175.0
4,EU,AT,AT1,AT12,AT122,15323.0


In [None]:
len(data_merged)

1170

In [None]:
data_merged.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1170 entries, 0 to 1169
Data columns (total 6 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   EU_region_code                    1169 non-null   object 
 1   NUTS0_region_code                 1169 non-null   object 
 2   NUTS1_region_code                 1169 non-null   object 
 3   NUTS2_region_code                 1169 non-null   object 
 4   NUTS3_region_code                 1170 non-null   object 
 5   total number of businesses_value  1155 non-null   float64
dtypes: float64(1), object(5)
memory usage: 64.0+ KB


# Join all dataframes together at NUTS3 level

In [47]:
def get_file_names():
    extension_1 = 'data'
    directory = os.path.join(cwd, '..', '..', '..', 
                            'data', 
                            'input',
                            'processed',
                            'ancilliary_data'
                            )
    path_destination = os.path.join(cwd, '..', '..', '..', 
                        'data', 
                        'input',
                        'processed',
                        'ancilliary_data',
                        'processed'
                        "data.csv"
                        )
    
    dfs = []
    file_count = 0
    for filename in os.listdir(directory):
        if filename.startswith(extension_1):
            file_path = os.path.join(directory, filename)
            df = pd.read_csv(file_path)
            df = df.sort_values("NUTS3_region_code", ignore_index=True)
            dfs.append(df)
            file_count += 1
            
    keep_columns = set()
    for df in dfs:
        for column in df.columns:
            if column not in keep_columns:
                keep_columns.add(column)
            else:
                df.drop(column, axis=1, inplace=True)
                
        concatenated_df = pd.concat(dfs, axis=1, sort=True)
        concatenated_df = concatenated_df.drop(columns=['Unnamed: 0'])
    # concatenated_df.to_csv(path_destination, index=False)
    return concatenated_df, file_count

In [48]:
concatenated_df, file_count = get_file_names()
print(f"Concatenated {file_count} files.")

Concatenated 146 files.


In [49]:
number_of_columns = concatenated_df.shape[1]
print(f"Number of columns: {number_of_columns}")

Number of columns: 182


In [50]:
concatenated_df.shape

(1170, 182)

In [27]:
concatenated_df.dtypes

EU_region_code                                                  object
NUTS0_region_code                                               object
NUTS1_region_code                                               object
NUTS2_region_code                                               object
NUTS3_region_code                                               object
                                                                 ...  
residential energy demand from liquified petroleum gas_value     int64
residential energy demand from solids_value                      int64
residential energy demand_value                                  int64
residential footprint area_value                                 int64
residential heat demand_value                                    int64
Length: 125, dtype: object

In [57]:
pd.options.display.max_columns = 182
pd.options.display.float_format = '{:,.2f}'.format
concatenated_df.head(90)[70:90]
# concatenated_df.head()

Unnamed: 0,EU_region_code,NUTS0_region_code,NUTS1_region_code,NUTS2_region_code,NUTS3_region_code,deaths_value,electricity demand of chemical industries_value,electricity demand of iron and steel industries_value,electricity demand of non-ferrous metal industries_value,electricity demand of non-metallic mineral industries_value,electricity demand of paper and printing industries_value,electricity demand of refineries_value,employment in nace sector G-I_value,Distance_from_origin_region_to_E_road,Distance_within_E_road,Distance_from_E_road_to_destination_region,Total_distance,Traffic_flow_trucks_2010,Traffic_flow_trucks_2019,Traffic_flow_trucks_2030,Traffic_flow_tons_2010,Traffic_flow_tons_2019,Traffic_flow_tons_2030,fuel demand of chemical industries_value,fuel demand of iron and steel industries_value,fuel demand of non-ferrous metal industries_value,fuel demand of non-metallic mineral industries_value,fuel demand of paper and printing industries_value,fuel demand of refineries_value,generation capacity of biomass plants_value,generation capacity of fossil brown coal or lignite plants_value,generation capacity of fossil coal-derived gas plants_value,generation capacity of fossil gas plants_value,generation capacity of fossil hard coal plants_value,generation capacity of fossil oil plants_value,generation capacity of fossil oil shale plants_value,generation capacity of fossil peat plants_value,generation capacity of geothermal plants_value,generation capacity of hydro pumped storage plants_value,generation capacity of hydro run-of-river and poundage plants_value,generation capacity of hydro water reservoir plants_value,generation capacity of marine plants_value,generation capacity of nuclear plants_value,generation capacity of other plants_value,generation capacity of solar plants_value,generation capacity of waste plants_value,generation capacity of wind offshore plants_value,generation capacity of wind onshore plants_value,gross domestic product_value,gross value added in nace sector G-I_value,gross value added_value,live births_value,bicycle_network_value,bus_network_value,railways_network_value,road_major_network_value,shipping_network_value,non-residential footprint area_value,number of biomass plants_value,number of buildings_value,number of busses_PL_value,number of chemical industries_value,number of fossil brown coal or lignite plants_value,number of fossil coal-derived gas plants_value,number of fossil gas plants_value,number of fossil hard coal plants_value,number of fossil oil plants_value,number of fossil oil shale plants_value,number of fossil peat plants_value,number of geothermal plants_value,number of hydro pumped storage plants_value,number of hydro run-of-river and poundage plants_value,number of hydro water reservoir plants_value,number of iron and steel industries_value,number of large businesses_value,number of lorries_PL_value,number of marine plants_value,number of mopeds_PL_value,number of motorcycles_PL_value,number of non-ferrous metal industries_value,number of non-metallic mineral industries_value,number of non-residential buildings_value,number of nuclear plants_value,number of other plants_value,number of paper and printing industries_value,number of passenger cars_PL_value,number of pixels with agriculture with natural vegetation_value,number of pixels with agro-forestry areas_value,number of pixels with airports_value,number of pixels with bare rocks_value,number of pixels with beaches - dunes - sands_value,number of pixels with broad-leaved forest_value,number of pixels with burnt areas_value,number of pixels with coastal lagoons_value,number of pixels with complex cultivation patterns_value,number of pixels with coniferous forest_value,number of pixels with construction sites_value,number of pixels with continuous urban fabric_value,number of pixels with discontinuous urban fabric_value,number of pixels with dump sites_value,number of pixels with estuaries_value,number of pixels with fruit trees and berry plantations_value,number of pixels with glaciers and perpetual snow_value,number of pixels with green urban areas_value,number of pixels with industrial or commercial units_value,number of pixels with inland marshes_value,number of pixels with intertidal flats_value,number of pixels with land use - nodata_value,number of pixels with mineral extraction sites_value,number of pixels with mixed forest_value,number of pixels with moors and heathland_value,number of pixels with natural grasslands_value,number of pixels with non-irrigated arable land_value,number of pixels with olive groves_value,number of pixels with pastures_value,number of pixels with peat bogs_value,number of pixels with permanent crops_value,number of pixels with permanently irrigated land_value,number of pixels with port areas_value,number of pixels with rice fields_value,number of pixels with road and rail networks_value,number of pixels with salines_value,number of pixels with salt marshes_value,number of pixels with sclerophyllous vegetation_value,number of pixels with sea and ocean_value,number of pixels with sparsely vegetated areas_value,number of pixels with sport and leisure facilities_value,number of pixels with transitional woodland-shrub_value,number of pixels with vineyards_value,number of pixels with water bodies_value,number of pixels with water courses_value,number of refineries_value,number of residential buildings_PL_value,number of road tractors_PL_value,number of small businesses_value,number of solar plants_value,number of waste plants_value,number of wind offshore plants_value,number of wind onshore plants_value,onshore wind potential capacity_value,population_value,quality of life index_value,railway length (with power source - electrified track)_value,railway length (with power source - non-electrified)_value,railway length (with power source - overhead electrified)_value,railway length (with power source - unknown)_value,relative gross value added in nace sector G-I_value,relative gross value added_value,residential energy demand from biomass and wastes_value,residential energy demand from derived heat_value,residential energy demand from electricity_value,residential energy demand from gas or diesel oil incl. biofuels_value,residential energy demand from gases incl. biogas_value,residential energy demand from geothermal energy_value,residential energy demand from liquified petroleum gas_value,residential energy demand from solids_value,residential energy demand_value,residential footprint area_value,residential heat demand_value,fuel_stations_value,charging_stations_value,bicycle_stations_value,bus_stations_value,airport_stations_value,railway_station_value,train_station_value,subway_station_value,lightrail_station_value,shipping_station_value,helicopter_station_value,total area_value,total employment_value,total number of businesses_value,Motorcylces,Agricultural Tractors,Buses,Passenger Vehicles,Motor Vehicles Other,Motor Vehicles Total,Load Force Wagons,Trailers Other,Trailers Total
70,EU,BE,BE3,BE33,BE336,737.0,0,0,0,0,0,0,5600.0,10.0,1094.51,126.72,1231.23,144.58,151.98,162.55,1966.28,2066.98,2210.64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2205.0,364.0,1968.0,806.0,0.0,0.0,38434.09,286112.84,0.0,0,0,0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,,0,,,0,0,0,0,0,0,,3612,0,0,0,0,2244,0,0,6517,19004,0,79,7379,0,0,0,0,0,615,393,0,0,31,19701,2559,338,5537,0,31329,796,0,0,0,0,109,0,0,0,0,0,37,1518,0,231,0,0,,,0.0,0,0,0,0,0,78408.0,0.0,17,10,38,0,16.0,89.0,0,0,0,0,0,0,0,0,0,0,0,36.0,6.0,1.0,1095.0,2.0,16.0,3.0,0.0,0.0,0.0,3.0,847.0,28400.0,0.0,,,,,,,,,
71,EU,BE,BE3,BE34,BE341,515.0,0,0,0,18,0,0,5100.0,3.0,1102.28,129.25,1234.54,80.57,87.13,96.35,1095.78,1185.0,1310.33,0,0,0,511,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1600.0,303.0,1429.0,700.0,0.0,0.0,60016.75,353286.91,0.0,0,0,0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,,0,,,0,1,0,0,0,0,,2481,0,0,0,0,6260,0,0,5262,1336,0,94,4761,0,0,5,0,62,881,55,0,0,98,4072,301,0,8316,0,6745,0,0,0,0,0,100,0,0,0,0,0,70,338,3,48,176,0,,,0.0,0,0,0,0,0,63642.0,0.0,30,0,13,0,18.0,89.0,0,0,0,0,0,0,0,0,0,0,0,22.0,25.0,1.0,929.0,1.0,50.0,4.0,0.0,0.0,0.0,4.0,319.0,20900.0,0.0,,,,,,,,,
72,EU,BE,BE3,BE34,BE342,448.0,0,0,0,0,0,0,3300.0,4.0,1124.04,130.39,1258.43,85.0,89.12,95.07,1155.99,1212.06,1292.89,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1166.0,195.0,1041.0,580.0,0.0,0.0,24788.29,430777.64,0.0,0,0,0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,,0,,,0,0,0,0,0,0,,7358,0,68,0,0,5247,0,0,29155,15002,26,0,5478,0,0,64,0,31,326,0,0,0,26,25229,0,390,8443,0,26859,547,0,0,0,0,74,0,0,0,0,0,26,2618,0,19,6,0,,,0.0,0,0,0,0,0,50228.0,0.0,22,0,17,0,16.0,89.0,0,0,0,0,0,0,0,0,0,0,0,19.0,4.0,0.0,1261.0,2.0,17.0,3.0,0.0,0.0,0.0,1.0,1047.0,15500.0,0.0,,,,,,,,,
73,EU,BE,BE3,BE34,BE343,582.0,0,0,0,0,0,0,4900.0,14.0,1152.49,130.2,1296.69,97.5,101.42,107.19,1325.96,1379.36,1457.83,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1735.0,287.0,1549.0,612.0,0.0,0.0,54808.15,409719.35,209.08,0,0,0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,,0,,,0,0,0,0,0,0,,5864,0,169,0,0,15508,0,0,14456,14917,0,0,7807,53,0,84,0,0,518,0,0,0,281,34483,548,1412,3512,452,17833,406,0,0,0,0,111,0,0,4954,8,201,337,2384,3182,18,0,0,,,0.0,0,0,0,0,0,57734.0,0.0,56,0,25,0,16.0,89.0,0,0,0,0,0,0,0,0,0,0,0,35.0,13.0,0.0,1314.0,2.0,42.0,4.0,0.0,0.0,0.0,70.0,958.0,23400.0,0.0,,,,,,,,,
74,EU,BE,BE3,BE34,BE344,628.0,0,0,0,0,0,0,4900.0,2.0,1120.21,127.75,1249.95,174.79,180.84,189.78,2377.2,2459.46,2581.04,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1838.0,302.0,1641.0,723.0,0.0,0.0,118620.5,702410.54,0.0,0,0,0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,,0,,,0,0,0,0,0,1,,7292,0,196,0,214,27231,66,0,28523,18947,115,57,7725,0,0,2797,0,0,678,0,0,0,164,37935,146,33,4056,0,18601,70,0,0,0,0,149,0,0,1042,0,0,246,2482,3479,48,0,0,,,0.0,0,0,0,0,0,63821.0,0.0,131,5,50,0,16.0,89.0,0,0,0,0,0,0,0,0,0,0,0,35.0,14.0,3.0,1586.0,3.0,50.0,6.0,0.0,0.0,0.0,2.0,1359.0,25900.0,0.0,,,,,,,,,
75,EU,BE,BE3,BE34,BE345,491.0,0,0,0,0,735,0,2300.0,18.0,1126.28,127.26,1271.54,75.03,80.35,87.88,1020.38,1092.74,1195.14,0,0,0,0,2660,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,988.0,131.0,882.0,586.0,0.0,0.0,97373.96,616209.62,0.0,0,0,0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,,0,,,0,0,0,0,0,1,,4681,0,0,0,0,24571,0,0,12049,5185,0,0,5638,0,0,0,0,0,543,374,50,0,147,12185,185,134,19593,0,23702,0,0,0,0,0,75,0,21,0,0,0,146,563,0,118,0,0,,,0.0,0,0,0,0,0,54181.0,0.0,67,2,30,0,13.0,89.0,0,0,0,0,0,0,0,0,0,0,0,21.0,8.0,1.0,902.0,0.0,29.0,4.0,0.0,0.0,0.0,2.0,778.0,13700.0,0.0,,,,,,,,,
76,EU,BE,BE3,BE35,BE351,1240.0,0,0,0,0,0,0,7000.0,1.0,1081.44,138.58,1221.02,113.39,114.66,116.95,1542.16,1559.31,1590.54,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2555.0,391.0,2281.0,1068.0,0.0,0.0,257204.34,1155178.38,151.26,0,0,0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,,0,,,0,0,0,0,0,0,,9549,0,164,0,0,27780,0,0,28666,5033,37,39,11627,0,0,224,0,187,724,0,0,0,324,39193,0,913,24532,0,17979,0,0,0,0,0,171,0,0,0,0,0,1576,947,0,0,413,0,,,0.0,0,0,0,0,0,112757.0,0.0,96,0,13,0,15.0,89.0,0,0,0,0,0,0,0,0,0,0,0,44.0,13.0,2.0,1812.0,2.0,144.0,10.0,0.0,0.0,0.0,26.0,1597.0,36100.0,0.0,,,,,,,,,
77,EU,BE,BE3,BE35,BE352,3143.0,1353,0,0,216,0,0,23600.0,2.0,1102.72,129.19,1233.91,332.36,334.64,339.38,4520.16,4551.1,4615.62,225,0,0,2216,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9978.0,1568.0,8909.0,3130.0,0.0,0.0,371245.16,1289832.13,0.0,0,0,0,,1,0,0,0,0,0,0,0,0,0,0,0,0,0.0,,0,,,0,1,0,0,0,0,,7872,0,0,0,0,16421,0,0,18980,532,35,98,23164,0,0,80,0,44,1465,0,0,0,755,7654,0,1933,42784,0,4687,0,0,0,0,0,259,0,0,0,0,0,793,126,0,29,461,0,,,0.0,0,0,0,0,0,319132.0,0.0,106,10,6,0,15.0,89.0,0,0,0,0,0,0,0,0,0,0,0,90.0,32.0,39.0,2779.0,3.0,209.0,15.0,0.0,10.0,0.0,5.0,1168.0,123500.0,0.0,,,,,,,,,
78,EU,BE,BE3,BE35,BE353,718.0,0,0,0,0,0,0,3500.0,7.0,833.55,15.56,856.11,175.57,172.27,168.72,2387.71,2342.92,2294.53,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1283.0,182.0,1145.0,628.0,2766.78,0.0,66025.6,504233.55,0.0,0,0,0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,,0,,,0,0,0,0,0,0,,3719,0,229,0,0,22268,0,0,15792,2487,111,0,6563,0,0,0,0,85,682,0,0,0,288,14205,27,0,15296,0,9824,0,0,0,0,0,19,0,0,0,0,0,0,219,0,260,0,0,,,0.0,0,0,0,0,0,67544.0,0.0,0,32,5,0,14.0,89.0,0,0,0,0,0,0,0,0,0,0,0,22.0,3.0,0.0,982.0,3.0,85.0,6.0,0.0,19.0,0.0,1.0,911.0,17800.0,0.0,,,,,,,,,
79,EU,BG,BG3,BG31,BG311,1922.0,0,0,0,0,0,0,6110.0,7.0,1438.33,15.75,1461.09,29.14,32.64,37.41,396.33,443.88,508.83,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,376.0,52.0,324.0,548.0,0.0,0.0,134763.46,649308.47,0.0,0,0,0,,0,0,0,0,0,0,0,0,0,0,0,0,0,185.0,,0,,,0,0,0,0,0,0,,30397,0,98,401,158,71600,0,0,6023,1566,0,0,11130,0,0,369,0,25,1297,0,0,0,37,5228,0,2859,137403,0,11325,0,0,0,47,0,86,0,0,0,0,1081,0,12981,4641,666,4121,0,,,1346.0,0,0,0,0,0,82835.0,0.0,0,0,0,0,13.0,86.0,0,0,0,0,0,0,0,0,0,0,0,29.0,0.0,0.0,44.0,1.0,36.0,1.0,0.0,0.0,1.0,0.0,3035.0,30580.0,2930.0,,,,,,,,,


In [58]:
pd.set_option('display.max_columns', None) # set the maximum number of columns to None to show all columns
print(concatenated_df.columns)
print(f"Number of columns: {len(concatenated_df.columns)}")
concatenated_df.columns

Index(['EU_region_code', 'NUTS0_region_code', 'NUTS1_region_code',
       'NUTS2_region_code', 'NUTS3_region_code', 'deaths_value',
       'electricity demand of chemical industries_value',
       'electricity demand of iron and steel industries_value',
       'electricity demand of non-ferrous metal industries_value',
       'electricity demand of non-metallic mineral industries_value',
       ...
       'total number of businesses_value', 'Motorcylces',
       'Agricultural Tractors', 'Buses', 'Passenger Vehicles',
       ' Motor Vehicles Other', ' Motor Vehicles Total', 'Load Force Wagons',
       ' Trailers Other', 'Trailers Total'],
      dtype='object', length=182)
Number of columns: 182


Index(['EU_region_code', 'NUTS0_region_code', 'NUTS1_region_code',
       'NUTS2_region_code', 'NUTS3_region_code', 'deaths_value',
       'electricity demand of chemical industries_value',
       'electricity demand of iron and steel industries_value',
       'electricity demand of non-ferrous metal industries_value',
       'electricity demand of non-metallic mineral industries_value',
       ...
       'total number of businesses_value', 'Motorcylces',
       'Agricultural Tractors', 'Buses', 'Passenger Vehicles',
       ' Motor Vehicles Other', ' Motor Vehicles Total', 'Load Force Wagons',
       ' Trailers Other', 'Trailers Total'],
      dtype='object', length=182)