In [None]:
import os
import pandas as pd
import geopandas as gpd
from energyemissionsregio.config import SHP_PATH
import matplotlib.pyplot as plt

In [None]:
nuts3 = gpd.read_file(os.path.join(SHP_PATH, "NUTS3.shp"))

In [None]:
nuts3[nuts3["code"].str.startswith("ES")]

In [None]:
vars = ["de_number_of_passenger_cars_emission_group_euro_1",
        "de_number_of_passenger_cars_emission_group_euro_2",
        "de_number_of_passenger_cars_emission_group_euro_3",
        "de_number_of_passenger_cars_emission_group_euro_4",
        "de_number_of_passenger_cars_emission_group_euro_5",
        "de_number_of_passenger_cars_emission_group_euro_6r",
        "de_number_of_passenger_cars_emission_group_euro_6dt",
        "de_number_of_passenger_cars_emission_group_euro_6d",
        "de_number_of_passenger_cars_emission_group_euro_other"]


In [None]:
imputed_data_es = None
for var in vars:
    temp_df = pd.read_csv(os.path.join("..", "..", "data", 
                                            "imputed_data", 
                                            f"{var}.csv"))
    
    if imputed_data_es is None:
        imputed_data_es = temp_df 
    else:
        imputed_data_es = pd.merge(imputed_data_es, temp_df, on="region_code", how="outer")

        imputed_data_es = imputed_data_es[imputed_data_es["region_code"
                                                    ].str.startswith("ES")]

        imputed_data_es["value"] = imputed_data_es["value_x"] + imputed_data_es["value_y"]
        imputed_data_es.drop(columns=["value_x", "value_y"], inplace=True)

        imputed_data_es = imputed_data_es[["value", "region_code"]].copy()

In [None]:
validation_data = {"ES211": 3226,
                    "ES213": 10699,
                    "ES212": 7262
                    }

In [None]:
validation_data = pd.DataFrame(list(validation_data.items()), columns=["region_code", "value"])

In [None]:
# Ensure region_code is sorted consistently
final_valdiation_data = validation_data.sort_values('region_code')
imputed_data_de_nuts2 = imputed_data_es[imputed_data_es["region_code"].str.startswith("ES21")].sort_values('region_code')

# Plot
plt.figure(figsize=(10, 3))
plt.scatter(final_valdiation_data['region_code'], final_valdiation_data['value'], color='blue', label='Validation data', alpha=0.7)
plt.scatter(imputed_data_de_nuts2['region_code'], imputed_data_de_nuts2['value'], color='red', label='Predicted data', alpha=0.7)

plt.xlabel('Region Code')
plt.ylabel('Number of passenger cars')

plt.legend()
plt.grid(True, linestyle='--', alpha=0.6)

plt.savefig(os.path.join("..", "..", "figures", "missing_value_imputation", 
                            f"cars_es_validation.png"), 
                            bbox_inches='tight')  # Save the figure as a PNG file