In [None]:
import os 

import pandas as pd
import geopandas as gpd
from energyemissionsregio.config import DATA_PATH, SHP_PATH, units
from energyemissionsregio.utils import solve_proxy_equation, get_proxy_var_list
from energyemissionsregio.disaggregation import distribute_data_equally, perform_proxy_based_disaggregation
from energyemissionsregio.plotting_functions import plot_proxy_data, plot_solved_proxy_data, plot_target_data

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
cwd = os.getcwd()

In [None]:
lau_shp = gpd.read_file(os.path.join(SHP_PATH, "LAU.shp"))
nuts3_shp = gpd.read_file(os.path.join(SHP_PATH, "NUTS3.shp"))

In [None]:
lau_shp = lau_shp[lau_shp["code"].str.startswith(("DE", "ES"))]
nuts3_shp = nuts3_shp[nuts3_shp["code"].str.startswith(("DE", "ES"))]

In [None]:
var_proxy_details = [
    {"target_var": "de_employment_in_agriculture",
      "proxy": "es_utilized_agricultural_area",
      "proxy_confidence_level": 4}, 

#     {"target_var": "de_employment_in_textile_and_leather_manufacturing",
#      "proxy": "industrial_or_commercial_units_cover",
#     "proxy_confidence_level": 2},

#  {"target_var": "de_employment_in_food_and_beverage_manufacturing",
#   "proxy": "industrial_or_commercial_units_cover",
#   "proxy_confidence_level": 2},

#  {"target_var": "de_employment_in_mechanical_and_automotive_engineering",
#   "proxy": "industrial_or_commercial_units_cover",
#   "proxy_confidence_level": 2
#   },
  
#  {"target_var": "de_employment_in_mechatronics_energy_and_electrical",
#   "proxy": "industrial_or_commercial_units_cover",
#   "proxy_confidence_level": 2},

#  {"target_var": "de_employment_in_wood_processing",
#    "proxy": "industrial_or_commercial_units_cover",
#   "proxy_confidence_level": 2},

#   {"target_var": "employment_in_construction",
#    "proxy": "construction_sites_cover+road_network",
#   "proxy_confidence_level": 3},

#   {"target_var": "employment_in_agriculture_forestry_and_fishing",
#    "proxy": """es_utilized_agricultural_area+
#                 number_of_cattle+
#                 number_of_pigs+
#                 number_of_buffaloes+
#                 agro_forestry_areas_cover+
#                 water_bodies_cover+
#                 water_courses_cover""",
#   "proxy_confidence_level": 3},


#   {"target_var": "road_transport_of_freight",
#    "proxy": "road_network",
#   "proxy_confidence_level": 2},

#   {"target_var": "de_number_of_passenger_cars_emission_group_euro_1",
#    "proxy": "population",
#   "proxy_confidence_level": 2},

#   {"target_var": "de_number_of_passenger_cars_emission_group_euro_2",
#    "proxy": "population",
#   "proxy_confidence_level": 2},

#   {"target_var":  "de_number_of_passenger_cars_emission_group_euro_3",
#    "proxy": "population",
#   "proxy_confidence_level": 2},

#   {"target_var": "de_number_of_passenger_cars_emission_group_euro_4",
#    "proxy": "population",
#   "proxy_confidence_level": 2},

#   {"target_var": "de_number_of_passenger_cars_emission_group_euro_5",
#    "proxy": "population",
#   "proxy_confidence_level": 2},

#   {"target_var": "de_number_of_passenger_cars_emission_group_euro_6r",
#    "proxy": "population",
#   "proxy_confidence_level": 2},

#   {"target_var": "de_number_of_passenger_cars_emission_group_euro_6dt",
#    "proxy": "population",
#   "proxy_confidence_level": 2},

# {"target_var": "de_number_of_passenger_cars_emission_group_euro_6d",
#    "proxy": "population",
#   "proxy_confidence_level": 2},

#   {"target_var": "de_number_of_passenger_cars_emission_group_euro_other",
#    "proxy": "population",
#   "proxy_confidence_level": 2},

#   {"target_var": "de_residential_building_living_area",
#    "proxy": "population",
#   "proxy_confidence_level": 4},

#   {"target_var": "de_non_residential_building_living_area",
#    "proxy": "industrial_or_commercial_units_cover",
#   "proxy_confidence_level": 4},
]

In [None]:
for proxy_detail_dict in var_proxy_details: 
    target_var = proxy_detail_dict["target_var"]
    proxy_equation = proxy_detail_dict["proxy"]
    proxy_confidence_level = proxy_detail_dict["proxy_confidence_level"]

    print(target_var)

    # Fig paths 
    fig_path = os.path.join("..", "..", "figures", "disaggregation", "NUTS3", target_var)
    os.makedirs(fig_path, exist_ok=True)

    # read in target data 
    if os.path.exists(os.path.join(cwd, "..", "..", "data", "imputed_data", f"{target_var}.csv")):
        target_data = pd.read_csv(os.path.join(cwd, "..", "..", "data", "imputed_data", f"{target_var}.csv"))
    else:
        target_data = pd.read_csv(os.path.join(DATA_PATH, f"{target_var}.csv"))

    target_data = target_data[target_data["region_code"].str.startswith(("DE", "ES"))][["region_code", "value"]].copy()
    target_data["value_confidence_level"] = 5 # VERY HIGH

    # read in proxy data 
    proxy_var_list = get_proxy_var_list(proxy_equation)
    
    proxy_data_dict = {}
    for proxy_var in proxy_var_list:
        
        if "cproj_" in proxy_var:
            _df_de = pd.read_csv(os.path.join(DATA_PATH, "..", "climate_projections", "DE", proxy_var, "2020.csv"))
            _df_de = _df_de[_df_de["climate_experiment"] == "RCP4.5"].copy()

            _df_de.drop(columns="climate_experiment", inplace=True)

            _df_es = pd.read_csv(os.path.join(DATA_PATH, "..", "climate_projections", "ES", proxy_var, "2020.csv"))
            _df_es = _df_es[_df_es["climate_experiment"] == "RCP4.5"].copy()

            _df_es.drop(columns="climate_experiment", inplace=True)

            proxy_data = pd.concat([_df_de, _df_es])

            proxy_data["value_confidence_level"] = 5 # VERY HIGH because no missing values

        else:
            if os.path.exists(os.path.join(cwd, "..", "..", "data", "imputed_data", f"{proxy_var}.csv")):
                proxy_data = pd.read_csv(os.path.join(cwd, "..", "..", "data", "imputed_data", f"{proxy_var}.csv")) # has already assigned value_confidence_level (from data imputation stage)
            else:
                proxy_data = pd.read_csv(os.path.join(DATA_PATH, f"{proxy_var}.csv"))
                proxy_data["value_confidence_level"] = 5 # VERY HIGH because no missing values

        proxy_data = proxy_data[proxy_data["region_code"].str.startswith(("DE", "ES"))][["region_code", "value", "value_confidence_level"]].copy()
        proxy_data["value"] = proxy_data["value"].fillna(0) # LAU gridded data 

        ## plot
        proxy_var_unit = units[proxy_var]
        save_path = os.path.join(fig_path, f"{proxy_var}.png")
        plot_proxy_data(proxy_data, lau_shp, proxy_var_unit, save_path)

        proxy_data_dict.update({proxy_var: proxy_data})

    solved_proxy_data = solve_proxy_equation(proxy_equation, proxy_data_dict)

    ## plot
    save_path = os.path.join(fig_path, "solved_proxy_data.png")
    plot_solved_proxy_data(solved_proxy_data, lau_shp, save_path)

    target_var_unit = units[target_var]
    round_to_int = True if target_var_unit == "number" else False

    disagg_data = perform_proxy_based_disaggregation(target_data, solved_proxy_data, "NUTS3", proxy_confidence_level, round_to_int)

    disagg_data.to_csv(os.path.join(cwd, "..", "..", "data", "disaggregated_data", f"{target_var}.csv"), index=False)

    ## plot
    save_path = os.path.join(fig_path, f"{target_var}.png")
    plot_target_data(target_data, disagg_data, nuts3_shp, lau_shp, target_var_unit, save_path)

### cproj_annual_mean_temperature_heating_degree_days

In [None]:
target_var = "cproj_annual_mean_temperature_heating_degree_days"

_df_de = pd.read_csv(os.path.join(DATA_PATH, "..", "climate_projections", "DE", target_var, "2020.csv"))
_df_de = _df_de[_df_de["climate_experiment"] == "RCP4.5"].copy()

_df_de.drop(columns="climate_experiment", inplace=True)

_df_es = pd.read_csv(os.path.join(DATA_PATH, "..", "climate_projections", "ES", target_var, "2020.csv"))
_df_es = _df_es[_df_es["climate_experiment"] == "RCP4.5"].copy()

_df_es.drop(columns="climate_experiment", inplace=True)

target_data = pd.concat([_df_de, _df_es])

target_data["value_confidence_level"] = 5

In [None]:
regions_gdf = gpd.read_file(os.path.join(DATA_PATH, "..", "..", "02_processed", "shapefiles", "LAU.shp"))
lau_regions = regions_gdf[regions_gdf["code"].str.startswith(("DE", "ES"))][["code"]].copy()

In [None]:
proxy_confidence_level = 3

In [None]:
disagg_data = distribute_data_equally(target_data, "NUTS3", lau_regions, proxy_confidence_level)

In [None]:
disagg_data.to_csv(os.path.join(cwd, "..", "..", "data", "disaggregated_data", f"{target_var}.csv"), index=False)