In [None]:
import os 

import pandas as pd
import geopandas as gpd
from energyemissionsregio.config import DATA_PATH, SHP_PATH, units
from energyemissionsregio.utils import solve_proxy_equation, get_proxy_var_list
from energyemissionsregio.disaggregation import distribute_data_equally, perform_proxy_based_disaggregation
from energyemissionsregio.plotting_functions import (plot_proxy_data_single_country, 
                                                     plot_proxy_data_both_countries, 
                                                     plot_solved_proxy_data_single_country, 
                                                     plot_solved_proxy_data_both_countries, 
                                                     plot_target_data_single_country, 
                                                     plot_target_data_both_countries)

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
cwd = os.getcwd()

In [None]:
lau_shp = gpd.read_file(os.path.join(SHP_PATH, "LAU.shp"))
nuts3_shp = gpd.read_file(os.path.join(SHP_PATH, "NUTS3.shp"))

In [None]:
def get_target_data(target_var):
    """Return target data for both countries if they are presnt. else for whichever country available."""
    if os.path.exists(os.path.join("..", "..", "data", "imputed_data", f"{target_var}.csv")):
        target_data = pd.read_csv(os.path.join("..", "..", "data", "imputed_data", f"{target_var}.csv"))
    else:
        target_data = pd.read_csv(os.path.join(DATA_PATH, f"{target_var}.csv"))

    target_data = target_data[target_data["region_code"].str.startswith(("DE", "ES"))][["region_code", "value"]].copy() 
    target_data["value_confidence_level"] = 5 # VERY HIGH

    return target_data

In [None]:
def get_proxy_data(proxy_equation, fig_path, country="both"):

    proxy_var_list = get_proxy_var_list(proxy_equation)
    
    proxy_data_dict = {}
    for proxy_var in proxy_var_list:
        
        if "cproj_" in proxy_var:
            if country == "DE":
                _df = pd.read_csv(os.path.join(DATA_PATH, "..", "climate_projections", "DE", proxy_var, "2020.csv"))
                proxy_data = _df[_df["climate_experiment"] == "RCP4.5"].copy()

                proxy_data.drop(columns="climate_experiment", inplace=True)
            
            elif country == "ES":
                _df = pd.read_csv(os.path.join(DATA_PATH, "..", "climate_projections", "ES", proxy_var, "2020.csv"))
                proxy_data = _df[_df["climate_experiment"] == "RCP4.5"].copy()

                proxy_data.drop(columns="climate_experiment", inplace=True)

            else:
                _df_de = pd.read_csv(os.path.join(DATA_PATH, "..", "climate_projections", "DE", proxy_var, "2020.csv"))
                _df_de = _df_de[_df_de["climate_experiment"] == "RCP4.5"].copy()

                _df_de.drop(columns="climate_experiment", inplace=True)

                _df_es = pd.read_csv(os.path.join(DATA_PATH, "..", "climate_projections", "ES", proxy_var, "2020.csv"))
                _df_es = _df_es[_df_es["climate_experiment"] == "RCP4.5"].copy()

                _df_es.drop(columns="climate_experiment", inplace=True)

                proxy_data = pd.concat([_df_de, _df_es])

            proxy_data["value_confidence_level"] = 5 # VERY HIGH because no missing values

        else:
            if os.path.exists(os.path.join("..", "..", "data", "imputed_data", f"{proxy_var}.csv")):
                proxy_data = pd.read_csv(os.path.join("..", "..", "data", "imputed_data", f"{proxy_var}.csv")) # has already assigned value_confidence_level (from data imputation stage)
            else:
                proxy_data = pd.read_csv(os.path.join(DATA_PATH, f"{proxy_var}.csv"))
                proxy_data["value_confidence_level"] = 5 # VERY HIGH because no missing values

        if country == "both":
            proxy_data = proxy_data[proxy_data["region_code"].str.startswith(("DE", "ES"))][["region_code", "value", "value_confidence_level"]].copy()   
        else:
            proxy_data = proxy_data[proxy_data["region_code"].str.startswith(country)][["region_code", "value", "value_confidence_level"]].copy()
       
        proxy_data["value"] = proxy_data["value"].fillna(0) # LAU gridded data 

        proxy_var_unit = units[proxy_var]
        save_path = os.path.join(fig_path, f"{proxy_var}.png")

        ## plot
        if country == "both":
            sub_lau_shp = lau_shp[lau_shp["code"].str.startswith(("DE", "ES"))].copy()
            plot_proxy_data_both_countries(proxy_data, sub_lau_shp, proxy_var_unit, save_path)
        
        else: 
            sub_lau_shp = lau_shp[lau_shp["code"].str.startswith(country)].copy()
            plot_proxy_data_single_country(proxy_data, sub_lau_shp, proxy_var_unit, save_path)


        proxy_data_dict.update({proxy_var: proxy_data})

    solved_proxy_data = solve_proxy_equation(proxy_equation, proxy_data_dict)

    ## plot
    save_path = os.path.join(fig_path, "solved_proxy_data.png")
    if country == "both":
        plot_solved_proxy_data_both_countries(solved_proxy_data, lau_shp, save_path)
    else:
        plot_solved_proxy_data_single_country(solved_proxy_data, lau_shp, save_path)

    return solved_proxy_data

In [None]:
lau_shp = lau_shp[lau_shp["code"].str.startswith(("DE", "ES"))]
nuts3_shp = nuts3_shp[nuts3_shp["code"].str.startswith(("DE", "ES"))]

In [None]:
var_proxy_details = [
    
      #   {"target_var": "de_employment_in_textile_and_leather_manufacturing",
      #       "proxy": "industrial_or_commercial_units_cover",
      #       "proxy_confidence_level": 2,
      #       "country": "DE"},

      #       {"target_var": "de_employment_in_food_and_beverage_manufacturing",
      #       "proxy": "industrial_or_commercial_units_cover",
      #       "proxy_confidence_level": 2,
      #       "country": "DE"},

      #       {"target_var": "de_employment_in_mechanical_and_automotive_engineering",
      #       "proxy": "industrial_or_commercial_units_cover",
      #       "proxy_confidence_level": 2,
      #       "country": "DE" },

      #        {"target_var": "de_employment_in_mechatronics_energy_and_electrical",
      #       "proxy": "industrial_or_commercial_units_cover",
      #       "proxy_confidence_level": 2,
      #                   "country": "DE" },

      #        {"target_var": "de_employment_in_wood_processing",
      #             "proxy": "industrial_or_commercial_units_cover",
      #             "proxy_confidence_level": 2,
      #             "country": "DE"},

      #       {"target_var": "de_number_of_passenger_cars_emission_group_euro_1",
      #             "proxy": "population",
      #             "proxy_confidence_level": 2,
      #             "country": "DE"},

      #       {"target_var": "de_number_of_passenger_cars_emission_group_euro_2",
      #             "proxy": "population",
      #             "proxy_confidence_level": 2,
      #             "country": "DE"},

      #       {"target_var":  "de_number_of_passenger_cars_emission_group_euro_3",
      #             "proxy": "population",
      #             "proxy_confidence_level": 2,
      #             "country": "DE"},

      #       {"target_var": "de_number_of_passenger_cars_emission_group_euro_4",
      #             "proxy": "population",
      #             "proxy_confidence_level": 2,
      #             "country": "DE"},

      #       {"target_var": "de_number_of_passenger_cars_emission_group_euro_5",
      #       "proxy": "population",
      #       "proxy_confidence_level": 2,
      #             "country": "DE"},

      #       {"target_var": "de_number_of_passenger_cars_emission_group_euro_6r",
      #       "proxy": "population",
      #       "proxy_confidence_level": 2,
      #             "country": "DE"},

      #       {"target_var": "de_number_of_passenger_cars_emission_group_euro_6dt",
      #       "proxy": "population",
      #       "proxy_confidence_level": 2,
      #             "country": "DE"},

      #       {"target_var": "de_number_of_passenger_cars_emission_group_euro_6d",
      #       "proxy": "population",
      #       "proxy_confidence_level": 2,
      #             "country": "DE"},

      #       {"target_var": "de_number_of_passenger_cars_emission_group_euro_other",
      #       "proxy": "population",
      #       "proxy_confidence_level": 2,
      #             "country": "DE"},

      #       {"target_var": "de_residential_building_living_area",
      #       "proxy": "population",
      #       "proxy_confidence_level": 4,
      #             "country": "DE"},

      #       {"target_var": "de_non_residential_building_living_area",
      #       "proxy": "industrial_or_commercial_units_cover",
      #       "proxy_confidence_level": 4,
      #             "country": "DE"},

      #       {"target_var": "es_number_of_commerical_and_service_companies",
      #       "proxy": "industrial_or_commercial_units_cover",
      #       "proxy_confidence_level": 4,
      #             "country": "ES"},

      #       {"target_var": "es_average_daily_traffic_light_duty_vehicles",
      #       "proxy": "population",
      #       "proxy_confidence_level": 2,
      #             "country": "ES"},

      #       {"target_var": "employment_in_construction",
      #       "proxy": "construction_sites_cover+road_network",
      #       "proxy_confidence_level": 3,
      #             "country": "both"},
            
            {"target_var": "employment_in_manufacturing",
            "proxy": "industrial_or_commercial_units_cover",
            "proxy_confidence_level": 4,
                  "country": "both"},

            # {"target_var": "employment_in_agriculture_forestry_and_fishing",
            # "proxy": """es_utilized_agricultural_area +
            #             agro_forestry_areas_cover +
            #             water_bodies_cover +
            #             water_courses_cover""",
            # "proxy_confidence_level": 3,
            # "country": "both"},

            
            # {"target_var": "road_transport_of_freight",
            # "proxy": "road_network",
            # "proxy_confidence_level": 2,
            # "country": "both"},

            # {"target_var": 'number_of_buffaloes',
            # "proxy": "es_utilized_agricultural_area",
            # "proxy_confidence_level": 2,
            # "country": "both"}, 

            # {"target_var": 'number_of_cattle',
            # "proxy": "es_utilized_agricultural_area",
            # "proxy_confidence_level": 2,
            # "country": "both"}, 

            # {"target_var": 'number_of_pigs',
            # "proxy": "es_utilized_agricultural_area",
            # "proxy_confidence_level": 2,
            # "country": "both"}, 
]

In [None]:
for var_proxy_detail in var_proxy_details:

    target_var = var_proxy_detail["target_var"]
    proxy = var_proxy_detail["proxy"]
    proxy_confidence_level = var_proxy_detail["proxy_confidence_level"]
    country = var_proxy_detail["country"]

    # Fig paths 
    fig_path = os.path.join(cwd, "..", "..", "figures", "disaggregation", "NUTS3", target_var)
    os.makedirs(fig_path, exist_ok=True)

    target_data = get_target_data(target_var)

    proxy_data = get_proxy_data(proxy, fig_path, country)

    target_var_unit = units[target_var]
    round_to_int = True if target_var_unit == "number" else False

    disagg_data = perform_proxy_based_disaggregation(target_data, proxy_data, "NUTS3", proxy_confidence_level, round_to_int)

    disagg_data.to_csv(os.path.join("..", "..", "data", "disaggregated_data", f"{target_var}.csv"), index=False)

    ## plot
    if country == "DE":
        sub_lau_shp = lau_shp[lau_shp["code"].str.startswith("DE")].copy()
        sub_nuts3_shp = nuts3_shp[nuts3_shp["code"].str.startswith("DE")].copy()

    elif country == "ES":
        sub_lau_shp = lau_shp[lau_shp["code"].str.startswith("ES")].copy()
        sub_nuts3_shp = nuts3_shp[nuts3_shp["code"].str.startswith("ES")].copy()

    else:
        sub_lau_shp = lau_shp[lau_shp["code"].str.startswith(("DE", "ES"))].copy()
        sub_nuts3_shp = nuts3_shp[nuts3_shp["code"].str.startswith(("DE", "ES"))].copy()

    save_path = os.path.join(fig_path, "disagg_data.png")

    if country == "both":
        plot_target_data_both_countries(target_data, disagg_data, 
                                        sub_nuts3_shp, sub_lau_shp, 
                                        target_var_unit, save_path)
    else:
        plot_target_data_single_country(target_data, disagg_data, 
                                        sub_nuts3_shp, sub_lau_shp, 
                                        target_var_unit, save_path)

### cproj_annual_mean_temperature_heating_degree_days

In [None]:
target_var = "cproj_annual_mean_temperature_heating_degree_days"

_df_de = pd.read_csv(os.path.join(DATA_PATH, "..", "climate_projections", "DE", target_var, "2020.csv"))
_df_de = _df_de[_df_de["climate_experiment"] == "RCP4.5"].copy()

_df_de.drop(columns="climate_experiment", inplace=True)

_df_es = pd.read_csv(os.path.join(DATA_PATH, "..", "climate_projections", "ES", target_var, "2020.csv"))
_df_es = _df_es[_df_es["climate_experiment"] == "RCP4.5"].copy()

_df_es.drop(columns="climate_experiment", inplace=True)

target_data = pd.concat([_df_de, _df_es])

target_data["value_confidence_level"] = 5

In [None]:
regions_gdf = gpd.read_file(os.path.join(DATA_PATH, "..", "..", "02_processed", "shapefiles", "LAU.shp"))
lau_regions = regions_gdf[regions_gdf["code"].str.startswith(("DE", "ES"))][["code"]].copy()

In [None]:
proxy_confidence_level = 3

In [None]:
disagg_data = distribute_data_equally(target_data, "NUTS3", lau_regions, proxy_confidence_level)

In [None]:
disagg_data.to_csv(os.path.join("..", "..", "data", "disaggregated_data", f"{target_var}.csv"), index=False)