In [None]:
import os
import pandas as pd
import geopandas as gpd
from energyemissionsregio.config import SHP_PATH
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error


### validation - Germany (Utilized agricultural area)

In [None]:
nuts1 = gpd.read_file(os.path.join(SHP_PATH, "NUTS1.shp"))
nuts1_de = nuts1[nuts1["code"].str.startswith("DE")].copy()

nuts1_de["name_small"] = nuts1_de["name"].str.lower()

In [None]:
# data from destatis for the year 2020, in hecatres 
validation_data = pd.read_csv(os.path.join("..", "..", "data", "validation_data", "41141-0101_de_flat.csv"), delimiter=";")

utilized_agri_area_de_states = validation_data[validation_data["value_variable_label"] == "Fläche"][["1_variable_attribute_label", "value"]].copy()

utilized_agri_area_de_states["value"] = utilized_agri_area_de_states["value"] * 0.01 #hectare to sq. km.

In [None]:
utilized_agri_area_de_states["name_small"] = utilized_agri_area_de_states["1_variable_attribute_label"].str.lower()

In [None]:
final_valdiation_data = pd.merge(nuts1_de, utilized_agri_area_de_states, on="name_small", how = "left")

In [None]:
final_valdiation_data

In [None]:
imputed_data = pd.read_csv(os.path.join("..", "..", "data", "imputed_data", "es_utilized_agricultural_area.csv"))

In [None]:
imputed_data_de = imputed_data[imputed_data["region_code"].str.startswith("DE")].copy()

In [None]:
imputed_data_de["NUTS2"] = imputed_data_de["region_code"].str[:3]

In [None]:
imputed_data_de_nuts2 = imputed_data_de[["value", "NUTS2"]].groupby("NUTS2").sum().reset_index()

In [None]:
imputed_data_de_nuts2

In [None]:
plot_data_1 = pd.merge(final_valdiation_data, imputed_data_de_nuts2, left_on = 'code', right_on="NUTS2", how="left", suffixes=["_validation", "_imputed"])

### validation - Spain (Number of cars)

In [None]:
nuts3 = gpd.read_file(os.path.join(SHP_PATH, "NUTS3.shp"))

In [None]:
vars = ["de_number_of_passenger_cars_emission_group_euro_1",
        "de_number_of_passenger_cars_emission_group_euro_2",
        "de_number_of_passenger_cars_emission_group_euro_3",
        "de_number_of_passenger_cars_emission_group_euro_4",
        "de_number_of_passenger_cars_emission_group_euro_5",
        "de_number_of_passenger_cars_emission_group_euro_6r",
        "de_number_of_passenger_cars_emission_group_euro_6dt",
        "de_number_of_passenger_cars_emission_group_euro_6d",
        "de_number_of_passenger_cars_emission_group_euro_other"]


In [None]:
imputed_data_es = None
for var in vars:
    temp_df = pd.read_csv(os.path.join("..", "..", "data", 
                                            "imputed_data", 
                                            f"{var}.csv"))
    
    if imputed_data_es is None:
        imputed_data_es = temp_df 
    else:
        imputed_data_es = pd.merge(imputed_data_es, temp_df, on="region_code", how="outer")

        imputed_data_es = imputed_data_es[imputed_data_es["region_code"
                                                    ].str.startswith("ES")]

        imputed_data_es["value"] = imputed_data_es["value_x"] + imputed_data_es["value_y"]
        imputed_data_es.drop(columns=["value_x", "value_y"], inplace=True)

        imputed_data_es = imputed_data_es[["value", "region_code"]].copy()

In [None]:
imputed_data_es

In [None]:
validation_data = {"ES211": 3226,
                    "ES213": 10699,
                    "ES212": 7262
                    }

In [None]:
validation_data = pd.DataFrame(list(validation_data.items()), columns=["region_code", "value"])

In [None]:
region_names = nuts3.set_index("code")["name"].to_dict()

In [None]:
imputed_data_de_nuts2 = imputed_data_es[imputed_data_es["region_code"].str.startswith("ES21")].sort_values('region_code')

plot_data_2 = pd.merge(validation_data, imputed_data_de_nuts2, on="region_code", suffixes=["_validation", "_predicted"] )


In [None]:
plot_data_2["region_name"] = plot_data_2["region_code"].map(region_names)

In [None]:
plot_data_2


In [None]:
fig = plt.figure(figsize=(15, 7))
gs = fig.add_gridspec(2, 1, wspace=0.1, hspace=0.8)

ax1 = plt.subplot(gs[:1, :])

ax1.scatter(plot_data_1['1_variable_attribute_label'], 
            plot_data_1['value_imputed'], 
            color='red', 
            label='Predicted data', 
            alpha=0.7, 
           )

ax1.scatter(plot_data_1['1_variable_attribute_label'], 
            plot_data_1['value_validation'], 
            color='blue', 
            label='Validation data', 
            alpha=0.7,
           )

ax1.set_xlabel('Region name')
ax1.set_ylabel('Utilized agricultural area\n(square kilometer)')
plt.xticks(rotation=45, ha="right") 
plt.legend(loc="upper left")
plt.grid(True, linestyle='--', alpha=0.6)

ax2 = plt.subplot(gs[1:, :])

ax2.scatter(plot_data_2['region_name'], 
            plot_data_2['value_validation'], 
            color='blue', 
            label='Validation data', 
            alpha=0.7)

ax2.scatter(plot_data_2['region_name'], 
            plot_data_2['value_predicted'], 
            color='red', 
            label='Predicted data', 
            alpha=0.7)

ax2.set_xlabel('Region name')
ax2.set_ylabel('Number of\npassenger cars')

plt.xticks(rotation=45, ha="right") 
plt.legend(loc="upper left")
plt.grid(True, linestyle='--', alpha=0.6)

plt.savefig(os.path.join("..", "..", "figures", "missing_value_imputation", 
                            f"uaa_de_cars_es_validation.png"), 
                            bbox_inches='tight')  # Save the figure as a PNG file

# plt.legend(handles=handles, loc='upper center', bbox_to_anchor=(0.5, 0), ncol=2, fontsize=15)

# plt.savefig(save_path, format="png", bbox_inches="tight", dpi=200)