In [None]:
import os 
import pandas as pd
import geopandas as gpd
from energyemissionsregio.config import DATA_PATH, SHP_PATH, units, confidence_level_mapping
from energyemissionsregio.utils import solve_proxy_equation, get_proxy_var_list
from energyemissionsregio.disaggregation import perform_proxy_based_disaggregation
from energyemissionsregio.plotting_functions import (plot_proxy_data_both_countries, 
                                                     plot_proxy_data_single_country, 
                                                     plot_solved_proxy_data_single_country,
                                                    plot_solved_proxy_data_both_countries, 
                                                    plot_nuts0_data)

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
cwd = os.getcwd()

In [None]:
lau_shp = gpd.read_file(os.path.join(SHP_PATH, "LAU.shp"))
lau_shp = lau_shp[lau_shp["code"].str.startswith(("DE", "ES"))].copy()

de_lau_shp = lau_shp[lau_shp["code"].str.startswith("DE")].copy()
es_lau_shp = lau_shp[lau_shp["code"].str.startswith("ES")].copy()

In [None]:
def get_proxy_data_dict(fig_path, proxy_var_list = None, proxy_var_list_de=None, proxy_var_list_es=None):

    if proxy_var_list is not None: # both countries 
        proxy_data_dict = {}
        for i, proxy_var in enumerate(proxy_var_list):
            if os.path.exists(os.path.join(cwd, "..", "..", "data", "disaggregated_data", f"{proxy_var}.csv")):
                proxy_data = pd.read_csv(os.path.join(cwd, "..", "..", "data", "disaggregated_data", f"{proxy_var}.csv"))
                # has already assigned value_confidence_level (from disaggregation stage)
            
            elif os.path.exists(os.path.join(cwd, "..", "..", "data", "imputed_data", f"{proxy_var}.csv")):
                proxy_data = pd.read_csv(os.path.join(cwd, "..", "..", "data", "imputed_data", f"{proxy_var}.csv"))
                # has already assigned value_confidence_level (from imputation stage)
                
            else:
                proxy_data = pd.read_csv(os.path.join(DATA_PATH, f"{proxy_var}.csv"))
                proxy_data["value_confidence_level"] = 5 # VERY HIGH 

            proxy_data = proxy_data[proxy_data["region_code"].str.startswith(("DE", "ES"))][["region_code", 
                                                                                            "value", 
                                                                                            "value_confidence_level"]].copy()

            proxy_data["value"] = proxy_data["value"].fillna(0)
            proxy_data_dict.update({proxy_var: proxy_data})

            ## plot
            proxy_var_unit = units[proxy_var]
            save_path = os.path.join(fig_path, f"proxy_{i}.png")
            plot_proxy_data_both_countries(proxy_data, lau_shp, proxy_var_unit, save_path)

        return proxy_data_dict
    
    else:
        # DE 
        proxy_data_dict_de = {}
        for i, proxy_var in enumerate(proxy_var_list_de):
            if os.path.exists(os.path.join(cwd, "..", "..", "data", "disaggregated_data", f"{proxy_var}.csv")):
                proxy_data = pd.read_csv(os.path.join(cwd, "..", "..", "data", "disaggregated_data", f"{proxy_var}.csv"))
                # has already assigned value_confidence_level (from disaggregation stage)

            elif os.path.exists(os.path.join(cwd, "..", "..", "data", "imputed_data", f"{proxy_var}.csv")):
                proxy_data = pd.read_csv(os.path.join(cwd, "..", "..", "data", "imputed_data", f"{proxy_var}.csv"))
                # has already assigned value_confidence_level (from imputation stage)

            else:
                proxy_data = pd.read_csv(os.path.join(DATA_PATH, f"proxy_{i}.csv"))
                proxy_data["value_confidence_level"] = 5 # VERY HIGH 

            proxy_data = proxy_data[proxy_data["region_code"].str.startswith("DE")][["region_code", 
                                                                                    "value", 
                                                                                    "value_confidence_level"]].copy()

            proxy_data["value"] = proxy_data["value"].fillna(0)
            proxy_data_dict_de.update({proxy_var: proxy_data})

            ## plot
            proxy_var_unit = units[proxy_var]
            save_path = os.path.join(fig_path, f"de_proxy_{i}.png")

            plot_proxy_data_single_country(proxy_data, de_lau_shp, proxy_var_unit, save_path)

        #ES 
        proxy_data_dict_es = {}
        for proxy_var in proxy_var_list_es:
            if os.path.exists(os.path.join(cwd, "..", "..", "data", "disaggregated_data", f"{proxy_var}.csv")):
                proxy_data = pd.read_csv(os.path.join(cwd, "..", "..", "data", "disaggregated_data", f"{proxy_var}.csv"))
                # has already assigned value_confidence_level (from disaggregation stage)

            elif os.path.exists(os.path.join(cwd, "..", "..", "data", "imputed_data", f"{proxy_var}.csv")):
                proxy_data = pd.read_csv(os.path.join(cwd, "..", "..", "data", "imputed_data", f"{proxy_var}.csv"))
                # has already assigned value_confidence_level (from imputation stage)

            else:
                proxy_data = pd.read_csv(os.path.join(DATA_PATH, f"{proxy_var}.csv"))
                proxy_data["value_confidence_level"] = 5 # VERY HIGH 

            proxy_data = proxy_data[proxy_data["region_code"].str.startswith("ES")][["region_code", 
                                                                                    "value", 
                                                                                    "value_confidence_level"]].copy()

            proxy_data["value"] = proxy_data["value"].fillna(0)
            proxy_data_dict_es.update({proxy_var: proxy_data})

            ## plot
            proxy_var_unit = units[proxy_var]
            save_path = os.path.join(fig_path, f"es_proxy_{i}.png")

            plot_proxy_data_single_country(proxy_data, es_lau_shp, proxy_var_unit, save_path)

        return proxy_data_dict_de, proxy_data_dict_es

In [None]:
var_proxy_details = [
    {"target_var": "final_energy_consumption_in_iron_and_steel_industries",
      "proxy": "number_of_iron_and_steel_industries",
      "proxy_confidence_level": 3}, 

    {"target_var": "final_energy_consumption_in_non_ferrous_metals_industries",
     "proxy": "number_of_non_ferrous_metals_industries",
    "proxy_confidence_level": 3},

 {"target_var": "final_energy_consumption_in_chemical_and_petrochemical_industries",
  "proxy": "number_of_chemical_industries+number_of_refineries",
  "proxy_confidence_level": 3},

  {"target_var": "final_energy_consumption_in_non_metallic_minerals_industries",
  "proxy": "number_of_non_metallic_minerals_industries",
  "proxy_confidence_level": 3},

  {"target_var": "final_energy_consumption_in_mining_and_quarrying_industries",
  "proxy": "mineral_extraction_sites_cover",
  "proxy_confidence_level": 4},

  {"target_var": "final_energy_consumption_in_paper_pulp_and_printing_industries",
  "proxy": "number_of_paper_and_printing_industries",
  "proxy_confidence_level": 3},

  {"target_var": "final_energy_consumption_in_wood_and_wood_products_industries",
  "proxy_de": "de_employment_in_wood_processing",
  "proxy_es": "employment_in_manufacturing",
  "proxy_confidence_level_de": 4,
  "proxy_confidence_level_es": 3},

   {"target_var": "final_energy_consumption_in_transport_equipment_industries",
  "proxy_de": "de_employment_in_mechanical_and_automotive_engineering",
  "proxy_es": "employment_in_manufacturing",
  "proxy_confidence_level_de": 4,
  "proxy_confidence_level_es": 3},

  {"target_var": "final_energy_consumption_in_machinery_industries",
  "proxy_de": "de_employment_in_mechatronics_energy_and_electrical",
  "proxy_es": "employment_in_manufacturing",
  "proxy_confidence_level_de": 4,
  "proxy_confidence_level_es": 3},

  {"target_var": "final_energy_consumption_in_food_beverages_and_tobacco_industries",
  "proxy_de": "de_employment_in_food_and_beverage_manufacturing",
  "proxy_es": "employment_in_manufacturing",
  "proxy_confidence_level_de": 4,
  "proxy_confidence_level_es": 3},

   {"target_var": "final_energy_consumption_in_construction",
  "proxy": "employment_in_construction",
  "proxy_confidence_level_de": 4,
  "proxy_confidence_level_es": 3},

  {"target_var": "final_energy_consumption_in_textile_and_leather_industries",
  "proxy_de": "de_employment_in_textile_and_leather_manufacturing",
  "proxy_es": "employment_in_manufacturing",
  "proxy_confidence_level_de": 4,
  "proxy_confidence_level_es": 3},

  {"target_var": "final_energy_consumption_in_rail_transport",
  "proxy": "railway_network",
  "proxy_confidence_level": 4},

  {"target_var": "final_energy_consumption_in_road_transport",
  "proxy_de": "road_transport_of_freight + \
    (3.83 * de_number_of_passenger_cars_emission_group_euro_1) + \
    (1.78 * de_number_of_passenger_cars_emission_group_euro_2) +\
     (1.25 * de_number_of_passenger_cars_emission_group_euro_3) + \
    (0.825 * de_number_of_passenger_cars_emission_group_euro_4) +\
    (0.735 * de_number_of_passenger_cars_emission_group_euro_5) +\
    (0.6745 * de_number_of_passenger_cars_emission_group_euro_6r) + \
    (0.6745 * de_number_of_passenger_cars_emission_group_euro_6dt) + \
    (0.6745 * de_number_of_passenger_cars_emission_group_euro_6d) +\
     (3.83 * de_number_of_passenger_cars_emission_group_euro_other)",
    "proxy_es": "road_transport_of_freight + es_average_daily_traffic_light_duty_vehicles",
  "proxy_confidence_level_de": 4,
  "proxy_confidence_level_es": 4},

   {"target_var": "final_energy_consumption_in_domestic_aviation",
  "proxy": "air_transport_of_freight+air_transport_of_passengers",
  "proxy_confidence_level": 4},

  {"target_var": "final_energy_consumption_in_domestic_navigation",
  "proxy": "port_areas_cover",
  "proxy_confidence_level": 4},

   {"target_var": "final_energy_consumption_in_households",
   "proxy_de": "de_residential_building_living_area*cproj_annual_mean_temperature_heating_degree_days",
  "proxy_es": "population*cproj_annual_mean_temperature_heating_degree_days",
  "proxy_confidence_level_de": 4,
  "proxy_confidence_level_es": 3,},

    {"target_var": "final_energy_consumption_in_commerce",
   "proxy_de": "de_non_residential_building_living_area*cproj_annual_mean_temperature_heating_degree_days",
  "proxy_es": "es_number_of_commerical_and_service_companies*cproj_annual_mean_temperature_heating_degree_days",
  "proxy_confidence_level_de": 4,
  "proxy_confidence_level_es": 3,},

    {"target_var": "final_energy_consumption_in_agriculture_and_forestry",
  "proxy": "employment_in_agriculture_forestry_and_fishing",
  "proxy_confidence_level": 4}
  ]

In [None]:
for proxy_detail_dict in var_proxy_details: 
    target_var = proxy_detail_dict["target_var"]
    proxy_equation = proxy_detail_dict.get("proxy", None)
    proxy_confidence_level = proxy_detail_dict.get("proxy_confidence_level", None)

    if proxy_equation is None:
        proxy_equation_de = proxy_detail_dict["proxy_de"]
        proxy_confidence_level_de = proxy_detail_dict["proxy_confidence_level_de"]

        proxy_equation_es = proxy_detail_dict["proxy_es"]
        proxy_confidence_level_es = proxy_detail_dict["proxy_confidence_level_es"]

    print(target_var)

    # Fig paths 
    fig_path = os.path.join("..", "..", "figures", "disaggregation", "FEC", target_var)
    os.makedirs(fig_path, exist_ok=True)

    # target data 
    target_data = pd.read_csv(os.path.join(DATA_PATH, f"{target_var}.csv"))
    target_data = target_data[target_data["region_code"].str.startswith(("DE", "ES"))][["region_code", "value"]].copy()
    
    target_data["value_confidence_level"] = 5 # VERY HIGH

    # proxy data 
    if proxy_equation is not None: # both countries need to be treated together 
        proxy_var_list = get_proxy_var_list(proxy_equation)
    
        proxy_data_dict = get_proxy_data_dict(fig_path, 
                                              proxy_var_list = proxy_var_list)
        
        solved_proxy_data = solve_proxy_equation(proxy_equation, proxy_data_dict)
        
        ## plot
        save_path = os.path.join(fig_path, "solved_proxy_data.png")
        plot_solved_proxy_data_both_countries(solved_proxy_data, lau_shp, save_path)

        # disaggregate
        disagg_data = perform_proxy_based_disaggregation(target_data, solved_proxy_data, "NUTS0", proxy_confidence_level)

    else:
        proxy_var_list_de = get_proxy_var_list(proxy_equation_de)
        proxy_var_list_es = get_proxy_var_list(proxy_equation_es)
    
        proxy_data_dict_de, proxy_data_dict_es = get_proxy_data_dict(fig_path, 
                                                                     proxy_var_list=None,
                                                                     proxy_var_list_de = proxy_var_list_de,
                                                                     proxy_var_list_es = proxy_var_list_es)

        solved_proxy_data_de = solve_proxy_equation(proxy_equation_de, proxy_data_dict_de)
        solved_proxy_data_es = solve_proxy_equation(proxy_equation_es, proxy_data_dict_es)

        ## plot
        save_path = os.path.join(fig_path, "de_solved_proxy_data.png")
        plot_solved_proxy_data_single_country(solved_proxy_data_de, de_lau_shp, save_path)

        save_path = os.path.join(fig_path, "es_solved_proxy_data.png")
        plot_solved_proxy_data_single_country(solved_proxy_data_es, es_lau_shp, save_path)
        
        # disaggregate 
        target_data_de = target_data[target_data["region_code"].str.startswith("DE")].copy()
        disagg_data_de = perform_proxy_based_disaggregation(target_data_de, 
                                                            solved_proxy_data_de, 
                                                            "NUTS0", 
                                                            proxy_confidence_level_de)

        target_data_es = target_data[target_data["region_code"].str.startswith("ES")].copy()

        disagg_data_es = perform_proxy_based_disaggregation(target_data_es, 
                                                         solved_proxy_data_es,
                                                           "NUTS0", 
                                                           proxy_confidence_level_es)

        disagg_data = pd.concat([disagg_data_de, disagg_data_es]) # merge both countries 

    disagg_data["value_confidence_level"] = disagg_data["value_confidence_level"].map(confidence_level_mapping)

    disagg_data.to_csv(os.path.join(cwd, "..", "..", "data", "disaggregated_data", "FEC", f"{target_var}.csv"), index=False)

    ## plot
    save_path = os.path.join(fig_path, f"disagg_data.png")
    de_true_value = target_data[target_data["region_code"] == "DE"]["value"].values[0]
    es_true_value = target_data[target_data["region_code"] == "ES"]["value"].values[0]

    target_var_unit = "MWh"
    plot_nuts0_data(de_true_value, es_true_value, disagg_data, lau_shp, target_var_unit, save_path)