# Fleet data per country

All data sourced from the European Alternative Fuels (AF) Observatory:

[country_identifier]_psg_vehicle_fleetdata:
AF Fleet percentage of total fleet (M1)
Fleet of alternative fuelled (BEV, PHEV, H2, LPG, CNG, LNG) passenger cars as a percentage of the total fleet.

[country_identifier]_psg_vehicle_fleetdata:
AF New registrations (M1)
Number of newly registered alternative fuelled (BEV, PHEV, H2, LPG, CNG, LNG) passenger cars (M1).

[country_identifier]_psg_vehicle_fleetdata:
AF Market share of total registrations (M1)
Newly registered alternative fuelled (BEV, PHEV, H2, LPG, CNG, LNG) passenger cars as a percentage of the total number of registrations.

In [1]:
import pandas as pd
import os

# Debug: Check the current working directory
print("Current Working Directory:", os.getcwd())

# Set the path to the raw data folder
data_path = "../data/raw/fleet_data"  # Relative path to the raw data folder

# Verify if the path exists
if not os.path.exists(data_path):
    print(f"Relative path '{data_path}' not found. Using absolute path instead.")
    data_path = "c:/Users/jskif/OneDrive - epfl.ch/Documents/SMT/DSML/DSML/data/raw"

# List all CSV files in the raw data folder
csv_files = [file for file in os.listdir(data_path) if file.endswith(".csv")]

# Initialize dictionaries to store DataFrames for each file type
newreg_dataframes = {}
eu_dataframes = {}
registrations_dataframes = {}

# Process files based on their type
for file in csv_files:
    country_name = file.split("_")[0]  # Extract country name from the file name
    file_path = os.path.join(data_path, file)

    if file.endswith("newreg.csv"):
        newreg_dataframes[country_name] = pd.read_csv(file_path)
    elif file.endswith("EU.csv"):
        eu_dataframes[country_name] = pd.read_csv(file_path)
    elif file.endswith("registrations.csv"):
        registrations_dataframes[country_name] = pd.read_csv(file_path)

# Combine datasets for each file type
combined_newreg_df = pd.concat(newreg_dataframes.values(), keys=newreg_dataframes.keys(), names=["Country", "Index"]).reset_index()
combined_fleet_df = pd.concat(eu_dataframes.values(), keys=eu_dataframes.keys(), names=["Country", "Index"]).reset_index()
combined_registrations_mktshare_df = pd.concat(registrations_dataframes.values(), keys=registrations_dataframes.keys(), names=["Country", "Index"]).reset_index()

# Display basic information about the combined datasets
print("Combined New Registrations Dataset Info:")
print(combined_newreg_df.info())

print("\nCombined EU Dataset Info:")
print(combined_fleet_df.info())

print("\nCombined Registrations Dataset Info:")
print(combined_registrations_mktshare_df.info())

# Save the combined datasets to the processed folder
processed_path = "../data/processed"
os.makedirs(processed_path, exist_ok=True)

combined_newreg_df.to_csv(os.path.join(processed_path, "combined_newreg_data.csv"), index=False)
combined_fleet_df.to_csv(os.path.join(processed_path, "combined_fleet_data.csv"), index=False)
combined_registrations_mktshare_df.to_csv(os.path.join(processed_path, "combined_registrations_mktshare_data.csv"), index=False)

print(f"Combined datasets saved to {processed_path}/")  

Current Working Directory: c:\Users\jskif\OneDrive - epfl.ch\Documents\SMT\DSML\DSML\src
Combined New Registrations Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 108 entries, 0 to 107
Data columns (total 9 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Country   108 non-null    object 
 1   Index     108 non-null    int64  
 2   Category  108 non-null    int64  
 3   BEV       108 non-null    int64  
 4   PHEV      108 non-null    int64  
 5   H2        73 non-null     float64
 6   LPG       39 non-null     float64
 7   CNG       85 non-null     float64
 8   LNG       0 non-null      float64
dtypes: float64(4), int64(4), object(1)
memory usage: 7.7+ KB
None

Combined EU Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 96 entries, 0 to 95
Data columns (total 9 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Country   96 non-null     object 
 1   Index     96 non-nu

# Air quality (AQ) data per country