In [None]:
import os
import pandas as pd
import geopandas as gpd
from energyemissionsregio.config import DATA_PATH, SHP_PATH
import matplotlib.pyplot as plt

In [None]:
lau = gpd.read_file(os.path.join(SHP_PATH, "LAU.shp"))
lau_es = lau[lau["code"].str.startswith("ES21")].copy()

In [None]:
# total residential area - year 2024
validation_data = pd.read_csv(os.path.join("..", "..", "data", "validation_data", "xls0013414_i.csv"), delimiter=";", decimal=",")

validation_data["Surface area (Ha)"] = validation_data["Surface area (Ha)"].astype(str).str.replace(",", ".").astype(float)

validation_data["value"] = validation_data["Surface area (Ha)"] * 0.01 #hectare to sq. km.

In [None]:
name_changes = {"Abanto y Ciervana-Abanto Zierbena": "Abanto y Ciérvana-Abanto Zierbena",
                    "Agurain/Salvatierra": "Agurain",
                    "Armiñon": "Armiñón",
                    "Arrasate/Mondragon": "Arrasate",
                    "Ayala/Aiara": "Ayala",
                    "Baños de Ebro/Mañueta": "Baños de Ebro",
                    "Campezo/Kanpezu": "Campezo",
                    "Donostia / San Sebastian": "Donostia",
                    "Elburgo/Burgelu": "Elburgo",
                    "Elvillar/Bilar": "Elvillar",
                    "Erriberagoitia/Ribera Alta": "Erriberagoitia",
                    "Harana/Valle de Arana": "Harana",
                    "Iruña Oka/Iruña de Oca": "Iruña Oka",
                    "Karrantza Harana/Valle de Carranza": "Karrantza Harana",
                    "Labastida/Bastida": "Labastida",
                    "Lagran": "Lagrán",
                    "Lanciego/Lantziego": "Lanciego",
                    "Lantaron": "Lantarón",
                    "Laudio/Llodio": "Laudio",
                    "Moreda de Alava/Moreda Araba": "Moreda de Álava",
                    "MunitibarArbatzegi Gerrikaitz-": "Munitibar-Arbatzegi Gerrikaitz",
                    "Ribera Baja/Erribera Beitia" : "Erriberabeitia",
                    "San Millan/Donemiliaga": "San Millán",
                    "Urduña/Orduña": "Urduña",
                    "Valdegovia/Gaubea": "Valdegovía",
                    "Valle de Trapaga-Trapagaran": "Valle de Trápaga-Trapagaran",
                    "Villabuena de Alava/Eskuernaga": "Villabuena de Álava",
                    "Yécora/Iekora": "Yécora"}

In [None]:
validation_data["Municipalities"] = validation_data["Municipalities"].replace(name_changes)

In [None]:
validation_data

In [None]:
merge_df = pd.merge(lau_es, validation_data, left_on="name", right_on = "Municipalities", how="outer")

In [None]:
final_validation_data = merge_df[["code", "Surface area (Ha)"]].copy()

In [None]:
final_validation_data["value"] = final_validation_data["Surface area (Ha)"] * 0.01 #hectare to sq. km.

In [None]:
final_validation_data

In [None]:
final_validation_data["value"].sum()

In [None]:
imputed_data = pd.read_csv(os.path.join("..", "..", "data", "imputed_data", "de_residential_building_living_area.csv"))

In [None]:
imputed_data = imputed_data[imputed_data["region_code"].str.startswith("ES21")][["region_code", "value"]].copy()

In [None]:
validation_data = {"ES211": 54582000,
                    "ES213": 79987900,
                    "ES212": 51070700
                    }

In [None]:
validation_data = pd.DataFrame(list(validation_data.items()), columns=["region_code", "value"])

In [None]:
validation_data["value_sq. km."] = validation_data["value"] / 1000000

In [None]:
validation_data

In [None]:
# Ensure region_code is sorted consistently
validation_data = validation_data.sort_values('region_code')
imputed_data = imputed_data.sort_values('region_code')

# Plot
plt.figure(figsize=(8, 5))
plt.scatter(validation_data['region_code'], 
            validation_data['value'], 
            color='blue', 
            label='Validation data', 
            alpha=0.7)


plt.scatter(imputed_data['region_code'], 
            imputed_data['value'], 
            color='red', 
            label='Imputed data', 
            alpha=0.7)

plt.xlabel('Region Code')
plt.ylabel('Value')
plt.title('Comparison of Values Across Regions')
plt.legend()
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()


