In [None]:
import os
import pandas as pd
from energyemissionsregio.config import DATA_PATH
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
var_df = pd.read_excel(
    os.path.join(DATA_PATH, "..", "..", "01_raw", "variables_with_details_and_tags.xlsx"),
    sheet_name="collected_variables_EU",
)

### input variables

In [None]:
vars = ["population", 
             "area", 
             'continuous_urban_fabric_cover',
       'discontinuous_urban_fabric_cover',
       'industrial_or_commercial_units_cover',
       'port_areas_cover',
       'airports_cover', 'mineral_extraction_sites_cover',
       'dump_sites_cover', 'construction_sites_cover',
       'green_urban_areas_cover', 'sport_and_leisure_facilities_cover',
       'non_irrigated_arable_land_cover',
       'permanently_irrigated_land_cover', 'rice_fields_cover',
       'vineyards_cover', 'fruit_trees_and_berry_plantations_cover',
       'olive_groves_cover', 'pastures_cover', 'permanent_crops_cover',
       'complex_cultivation_patterns_cover',
       'agriculture_with_natural_vegetation_cover',
       'agro_forestry_areas_cover', 'broad_leaved_forest_cover',
       'coniferous_forest_cover', 'mixed_forest_cover',
       'natural_grasslands_cover', 'moors_and_heathland_cover',
       'sclerophyllous_vegetation_cover',
       'transitional_woodland_shrub_cover',
       'beaches_dunes_and_sand_cover', 'bare_rocks_cover',
       'sparsely_vegetated_areas_cover', 'burnt_areas_cover',
       'glaciers_and_perpetual_snow_cover', 'inland_marshes_cover',
       'peat_bogs_cover', 'salt_marshes_cover', 'salines_cover',
       'intertidal_flats_cover', 'water_courses_cover',
       'water_bodies_cover', 'coastal_lagoons_cover', 'estuaries_cover',
       'sea_and_ocean_cover',
       'fuel_demand_of_chemical_industries',
       'fuel_demand_of_iron_and_steel_industries',
       'fuel_demand_of_non_ferrous_metal_industries',
       'fuel_demand_of_non_metallic_mineral_industries',
       'fuel_demand_of_paper_and_printing_industries',
       'fuel_demand_of_refineries', 
       "railway_network",
       "road_network",
       "number_of_buildings",
       'average_air_pollution_due_to_pm2.5',
       'average_air_pollution_due_to_no2',
       'average_air_pollution_due_to_o3',
       'average_air_pollution_due_to_pm10',
       'number_of_buffaloes', 'number_of_cattle', 'number_of_pigs',
       'number_of_sheeps', 'number_of_chickens', 'number_of_goats',
             ]


In [None]:
lau_df = None

for var_name in vars:
    _df = pd.read_csv(
        os.path.join(DATA_PATH, f"{var_name}.csv")
    )
    _df = _df[_df["region_code"].str.startswith(("ES", "DE"))][["region_code", "value"]].copy()
    _df.rename(columns={"value": var_name}, inplace=True)

    if lau_df is not None:
        lau_df = pd.merge(lau_df, _df, on="region_code", how="outer")
    else:
        lau_df = _df

In [None]:
# Data sources, including Corine Land Cover, OpenStreetMap, and SEEnergies, provide spatial data either as raster files or at 
# specific X-Y coordinates. This spatial information is overlain with LAU regions and aggregated at the LAU level to create 
# regional datasets. If no data points overlap with a given LAU region, the value is set to zero
lau_df = lau_df.fillna(0)

### correlation between LAU variables in both countries

In [None]:
corr_df_lau = lau_df.copy()
corr_df_lau.drop(columns="region_code", inplace=True)

In [None]:
correlations_lau = corr_df_lau.corr()

# Plotting the heatmap
plt.figure(figsize=(15, 15))  # Adjust the figure size as needed
sns.heatmap(correlations_lau, annot=False, cmap="Blues", cbar=True, annot_kws={"rotation": 90}, vmin=-1, vmax=1)
plt.savefig(os.path.join("..", "..", "figures", f"correlation_spatially_lau.png"), bbox_inches='tight')  # Save the figure as a PNG file
plt.show()

### Correlation between LAU variables, after aggregating them to NUTS3 spatial level

In [None]:
corr_df_nuts3 = None

for var_name in vars:
    
    _df = lau_df[["region_code", var_name]].copy()

    #convert LAU to NUTS3 regions
    _df["region_code"] = _df["region_code"].str.split("_").str[0]

    # aggregate per NUTS3 region 
    agg_method = var_df[var_df["var_name"] == var_name][
            "var_aggregation_method"
        ].values[0]

    if agg_method == "sum":
        _df = _df.groupby("region_code").sum().reset_index()
    elif agg_method in ["mean", "bool"]:
        _df = _df.groupby("region_code").mean().reset_index()
    elif agg_method == "max":
        _df = _df.groupby("region_code").max().reset_index()
    else:
        raise ValueError("Unknown var aggregation method")

    if corr_df_nuts3 is not None:
        corr_df_nuts3 = pd.merge(corr_df_nuts3, _df, on="region_code", how="outer")
    else:
        corr_df_nuts3 = _df

In [None]:
corr_df_nuts3.drop(columns="region_code", inplace=True)

In [None]:
corr_df_nuts3

In [None]:
correlations_nuts3 = corr_df_nuts3.corr()

# Plotting the heatmap
plt.figure(figsize=(15, 15))  # Adjust the figure size as needed
sns.heatmap(correlations_nuts3, annot=False, cmap="Blues", cbar=True, annot_kws={"rotation": 90}, vmin=-1, vmax=1)
plt.savefig(os.path.join("..", "..", "figures", f"correlation_spatially_nuts3.png"), bbox_inches='tight')  # Save the figure as a PNG file
plt.show()

### Correlation difference between the variables at NUTS3 and LAU spatial levels

In [None]:
correlations_diff = correlations_nuts3 - correlations_lau

In [None]:
# Plotting the heatmap
plt.figure(figsize=(15, 15))  # Adjust the figure size as needed
sns.heatmap(correlations_diff.abs(), annot=False, cmap="Reds", cbar=True, annot_kws={"rotation": 90}, vmin=0, vmax=2)
plt.savefig(os.path.join("..", "..", "figures", f"correlation_spatially_diff.png"), bbox_inches='tight')  # Save the figure as a PNG file
plt.show()