In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import math 
import pandas as pd
import geopandas as gpd
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib import rc

from zoomin.main_modules.disaggregation import disaggregate_eucalc_output
from zoomin.data.population import get_population
from zoomin.main_modules.shares import get_population_share

In [None]:
cwd = os.getcwd()

In [None]:
countries_dict = {'AT': 'Austria',
 'BE': 'Belgium',
 'BG': 'Bulgaria',
 'CH': 'Switzerland',
 'CY': 'Cyprus',
 'CZ': 'Czechia',
 'DE': 'Germany',
 'DK': 'Denmark',
 'EE': 'Estonia',
 'EL': 'Greece',
 'ES': 'Spain',
 'FI': 'Finland',
 'FR': 'France',
 'HR': 'Croatia',
 'HU': 'Hungary',
 'IE': 'Ireland',
 'IT': 'Italy',
 'LT': 'Lithuania',
 'LU': 'Luxembourg',
 'LV': 'Latvia',
 'MT': 'Malta',
 'NL': 'Netherlands',
 'PL': 'Poland',
 'PT': 'Portugal',
 'RO': 'Romania',
 'SE': 'Sweden',
 'SI': 'Slovenia',
 'SK': 'Slovakia',
 'UK': 'United Kingdom'
 }

# Disaggregation

In [None]:
DATA_PATH = os.path.join(cwd, f"../../data/input/EUCalc_dummy_EU.json")

SAVE_PATH = os.path.join(cwd, "../data/output/")
save_name = f'disaggregated_data_EU'

In [None]:
disagg_data = disaggregate_eucalc_output(DATA_PATH, 
                                         region_to_disaggregate,
                                         save_path=SAVE_PATH, 
                                         save_name=save_name)

In [None]:
disagg_data

# EU Country level

### Population and population share 

In [None]:
INPUT_PATH = os.path.join(cwd, "../../data/input/processed")
OUTPUT_PATH = os.path.join(cwd, "../reports/figures/")

In [None]:
gdf = gpd.read_file(os.path.join(INPUT_PATH,  'Shapefiles', 'countries.shp'))

In [None]:
countries = gdf.CNTR_CODE

reg_pop = get_population(countries, countries = True)
reg_pop_share = get_population_share(countries, countries = True)

gdf['POP'] = gdf.apply(lambda x: reg_pop.get(x['CNTR_CODE']), axis=1)
gdf['POP_SHARE'] = gdf.apply(lambda x: reg_pop_share.get(x['CNTR_CODE']), axis=1)


In [None]:
gdf

In [None]:
country_color_dict= {"#0076FF" : 'Belgium', 
                   "#008F9C" : 'Greece', 
                   "#E85EBE": 'Lithuania', 
                   "#00B917":  'Portugal',
                   "#EEE502":  'Bulgaria',  
                   "#FF029D": 'Spain',
                   "#98FF52":  'Luxembourg', 
                   "#691970": 'Romania',
                   "#1A1A1A":  'Czechia', 
                   "#007DB5": 'France', 
                   "#00FF00": 'Hungary',
                   "#774D00":  'Slovenia', 
                   "#0E4CA1": 'Denmark', 
                   "#0000FF": 'Croatia', 
                   "#7544B1": 'Malta', 
                   "#B500FF": 'Slovakia',
                   "#6A826C":  'Germany', 
                   "#FF6E41": 'Italy', 
                   "#005F39": 'Netherlands',
                   "#5FAD4E":  'Finland',
                   "#A75740":  'Estonia',        
                   "#A5FFD2": 'Cyprus',
                   "#009BFF": 'Austria',
                   "#CEEDAD": 'Sweden',
                   "#7A4782": 'Ireland',
                   "#7E2DD2": 'Latvia',
                   "#85A900": 'Poland',
                   "#FF0056": 'Switzerland',
                   "#00AE7E": 'United Kingdom',
#                    "#D5FF00", 
#                    "#BDD393", 
#                    "#9E008E", 
#                    "#C28C9F", 
#                    "#FF74A3",
#                    "#01D0FF", 
#                    "#004754", 
#                    "#E56FFE", 
#                    "#788231", 
#                    "#FFB167",
#                    "#3EE3AA", 
#                    "#671A31", 
#                    "#5E5E5E", 
#                    "#200F3A",
#                    "#FFDB66", 
#                    "#006401", 
#                    "#01FFFE",
#                    "#FF937E",
#                    "#00FF78", 
#                    "#6B6882", 
#                    "#95003A", 
#                    "#FE8900",
#                    "#91D0CB", 
#                    "#BE9970", 
#                    "#FF0000", 
#                    "#001544", 
#                    "#968AE8",
#                    "#BB8800", 
#                    "#43002C", 
#                    "#DEFF74", 
#                    "#00FFC6", 
#                    "#FFE502",
#                    "#620E00", 
                   }

country_color_dict = dict((v,k) for k,v in country_color_dict.items())

### Population and polutation share

In [None]:
matplotlib.rcParams.update({'font.size': 25})

fig = plt.figure(figsize=(13, 13))
gs = fig.add_gridspec(1, 2, wspace=0, hspace=0)

#=======================================================================================
# Population 
ax1 = plt.subplot(gs[:, :1])

vmin, vmax = min(gdf['POP']), max(gdf['POP'])
gdf.plot(column='POP', cmap='Blues', linewidth=0.8, ax=ax1, edgecolor='0.8')
ax1.axis('off')

ax1.set_title(f'Population', fontweight="bold", fontsize=30)

sm = plt.cm.ScalarMappable(cmap='Blues', norm=plt.Normalize(vmin=vmin, vmax=vmax))
sm._A = []
cbar = fig.colorbar(sm, orientation='horizontal', shrink=0.8, pad=0);
cbar.ax.tick_params(labelsize=30)
#=======================================================================================
# population share 

ax2 = plt.subplot(gs[:, 1:])

vmin, vmax = min(gdf['POP_SHARE']), max(gdf['POP_SHARE'])
gdf.plot(column='POP_SHARE', cmap='Blues', linewidth=0.8, ax=ax2, edgecolor='0.8')
ax2.axis('off')

ax2.set_title(f'Population share', fontweight="bold", fontsize=30)

sm = plt.cm.ScalarMappable(cmap='Blues', norm=plt.Normalize(vmin=vmin, vmax=vmax))
sm._A = []
cbar = fig.colorbar(sm, orientation='horizontal', shrink=0.8, pad=0);
cbar.ax.tick_params(labelsize=30)

plt.tight_layout()

#save figure 
fig_name = f'EU_population'

plt.savefig(fname=f'{os.path.join(OUTPUT_PATH, fig_name)}.png', format='png', bbox_inches="tight", dpi=200)    

### Disaggregated 2020, 2015

In [None]:
EUCALC_PATH = os.path.join(cwd, "../../data/output/")

disagg_data = pd.read_csv(os.path.join(EUCALC_PATH, 'disaggregated_data_EU.csv'))

In [None]:
disagg_data

In [None]:
vars = disagg_data.variable.unique()

In [None]:
var_0_list  = []
for var in vars:
    df = disagg_data.loc[disagg_data['variable'] == var].drop(columns=['variable', 'title', 'region'])
    if (df.values == 0).all() or (df.isnull().values.all()):
        var_0_list.append(var)

In [None]:
valid_vars = set(vars) - set(var_0_list)

In [None]:
list(valid_vars)[1:2]

In [None]:
matplotlib.rcParams.update({'font.size': 15})

for var in list(valid_vars): 
    data = disagg_data.loc[disagg_data['variable'] == var].drop(columns=['variable', 'title'])

    fig, ax = plt.subplots(figsize=(10, 5))

    bottom_val = [0 for i in range(7)]
    for country in countries_dict.keys():
        country_full_name = countries_dict.get(country)

        row = data.loc[data.region==country]

        x = list(row.columns[1:])
        y = list(row.values.flatten()[1:])
        ax.bar(x, 
               y, 
               width=0.3, 
               bottom=bottom_val, 
               color=country_color_dict[country_full_name])

        bottom_val = [a + b for a, b in zip(bottom_val, list(y))]

    # y label    
    var_name, unit = var.split('[')
    ax.set_ylabel(unit[:-1])

    # legend
    color_patch_list = []
    for label, color in country_color_dict.items():
        color_patch_list.append(mpatches.Patch(edgecolor=color, 
                                               facecolor=color, 
                                               label=label))

    color_patch_list = list(reversed(color_patch_list)) #flip to match bars  
    #plt.legend(handles=color_patch_list, bbox_to_anchor=(0.5, -0.06), loc='upper center', ncol=5, fontsize=14)
    plt.legend(handles=color_patch_list, bbox_to_anchor=(1.1, 0.5), loc='center left', fontsize=14,  ncol=2)
    
    #save figure 
    fig_name = f'EU_{var_name}' # % is a special char in latex, so such chars must be removeed 

    plt.savefig(fname=f'{os.path.join(OUTPUT_PATH, fig_name)}.png', format='png', bbox_inches="tight", dpi=200)    

In [None]:
matplotlib.rcParams.update({'font.size': 14})

for var in valid_vars:
    fig, ax = plt.subplots(nrows=1, 
                       ncols=2, 
                       gridspec_kw={'hspace': 0, 'wspace': 0}, # spacing between the grid figs 
                       figsize=(10, 10))

    data = disagg_data.loc[disagg_data['variable'] == var].drop(columns=['variable', 'title'])
    data = gdf.merge(data, how='left', left_on='NUTS_ID', right_on='region')


    vmin = data[['2020','2050']].min().min()
    vmax = data[['2020','2050']].max().max()

    #=======================================================================================
    # Disaggregated values in 2020

    data.plot(column='2020', vmin=vmin, vmax=vmax, cmap='Blues', linewidth=0.8, ax=ax[0], edgecolor='0.8')
    ax[0].axis('off')

    ax[0].set_title('Year 2020', fontsize=20)
    #=======================================================================================
    # Disaggregated values in 2050

    data.plot(column='2050', vmin=vmin, vmax=vmax, cmap='Blues', linewidth=0.8, ax=ax[1], edgecolor='0.8')
    ax[1].axis('off')

    ax[1].set_title('Year 2050', fontsize=20)

    sm = plt.cm.ScalarMappable(cmap='Blues', norm=plt.Normalize(vmin=vmin, vmax=vmax))
    sm._A = []
    cbar = fig.colorbar(sm, ax=[ax[0], ax[1]], orientation = 'horizontal', location='bottom', shrink=1, fraction=0.046, pad=0.04);
    cbar.ax.set_ylabel(var.split('[')[1][:-1])


    #save figure 
    var = var.split('[')[0]
    fig_name = f'EU_disagg_{var}'

    plt.savefig(fname=f'{os.path.join(OUTPUT_PATH, fig_name)}.png', format='png', bbox_inches="tight", dpi=200)    

### Top 10% and bottom 10% regions

In [None]:
rc('text', usetex=True)

In [None]:
n_countries = math.ceil(len(countries_dict) * 10/100)

In [None]:
n_countries

In [None]:
for var in list(valid_vars):
    fig, ax = plt.subplots(nrows=1, 
                       ncols=2, 
                       gridspec_kw={'hspace': 0, 'wspace': 0}, # spacing between the grid figs 
                       figsize=(13, 13))

    data = disagg_data.loc[disagg_data['variable'] == var].drop(columns=['variable', 'title'])
    data = gdf.merge(data, how='left', left_on='NUTS_ID', right_on='region')


    #=======================================================================================
    # Year 2020
    # all 
    data.plot(color='white', ax=ax[0], edgecolor='0.8')

    #top 10 percent
    top_data = data[['region', '2020', 'geometry']].sort_values(by='2020')[:n_countries]
    top_data.plot(color="#00FF78", ax=ax[0], edgecolor='0.8')

    #top 10 percent
    bottom_data = data[['region', '2020', 'geometry']].sort_values(by='2020')[-n_countries:]
    bottom_data.plot(color="#E56FFE", ax=ax[0], edgecolor='0.8')

    ax[0].axis('off')

    ax[0].set_title('Year 2020', fontsize=20)

    # legend

    color_patch_list = []
    color_patch_list.append(mpatches.Patch(edgecolor="#00FF78", 
                                           facecolor="#00FF78", 
                                           label=r"\textbf{Top 10\% regions}"))
    for region in top_data.region:
        color_patch_list.append(mpatches.Patch(edgecolor="white", 
                                           facecolor="white", 
                                           label=countries_dict.get(region)))

    color_patch_list.append(mpatches.Patch(edgecolor="#E56FFE", 
                                           facecolor="#E56FFE", 
                                           label=r"\textbf{Bottom 10\% regions}"))
    for region in bottom_data.region:
        color_patch_list.append(mpatches.Patch(edgecolor="white", 
                                           facecolor="white", 
                                           label=countries_dict.get(region)))

    ax[0].legend(handles=color_patch_list, bbox_to_anchor=(0.5, 0.0), loc='upper center', ncol=2, fontsize=14)
    # #=======================================================================================
    # Year 2050
    #all 
    data.plot(color='white', ax=ax[1], edgecolor='0.8')

    #top 10 percent
    top_data = data[['region', '2050', 'geometry']].sort_values(by='2050')[:n_countries]
    top_data.plot(color="#00FF78", ax=ax[1], edgecolor='0.8')

    #top 10 percent
    bottom_data = data[['region', '2050', 'geometry']].sort_values(by='2050')[-n_countries:]
    bottom_data.plot(color="#E56FFE", ax=ax[1], edgecolor='0.8')

    ax[1].axis('off')

    ax[1].set_title('Year 2050', fontsize=20)

    # legend

    color_patch_list = []
    color_patch_list.append(mpatches.Patch(edgecolor="#00FF78", 
                                           facecolor="#00FF78", 
                                           label=r"\textbf{Top 10\% regions}"))
    for region in top_data.region:
        color_patch_list.append(mpatches.Patch(edgecolor="white", 
                                           facecolor="white", 
                                           label=countries_dict.get(region)))

    color_patch_list.append(mpatches.Patch(edgecolor="#E56FFE", 
                                           facecolor="#E56FFE", 
                                           label=r"\textbf{Bottom 10\% regions}"))
    for region in bottom_data.region:
        color_patch_list.append(mpatches.Patch(edgecolor="white", 
                                           facecolor="white", 
                                           label=countries_dict.get(region)))

    ax[1].legend(handles=color_patch_list, bbox_to_anchor=(0.5, 0.0), loc='upper center', ncol=2, fontsize=14)
    
    #save figure 
    var = var.split('[')[0]
    fig_name = f'EU_TD_{var}'

    plt.savefig(fname=f'{os.path.join(OUTPUT_PATH, fig_name)}.png', format='png', bbox_inches="tight", dpi=200)    

### DFs for LATEX report

In [None]:
var_info = pd.read_csv(os.path.join(INPUT_PATH, 'eucalc_output_vars.csv'))

In [None]:
sector_full_names = {'agr': 'Agriculture', 
                    'bld': 'Buildings', 
                    'tra' : 'Transport', 
                    'elc': 'Electricity', 
                    'ccu': 'Carbon capture and use', 
                    'fos': 'Fossil', 
                    'ind': 'Industry', 
                    'lus': 'Land use', 
                    'min': 'Minerals', 
                    'str': 'Storage', 
                    'wat': 'Water', 
                    'air': 'Air polution',
                    'amm': 'Ammonium', 
                    'bdy': 'bdy', 
                    'clm': 'clm', 
                    'clt': 'clt', 
                    'dhg': 'dhg',
                    'gas': 'gas', 
                     'oal' : 'oal', 
                     'ear': 'ear', 
                     'ass': 'ass', 
                     'oil': 'oil', 
                     'ude': 'ude',
                     #nan, 
                     'ter': 'Terrestrial', 
                     'bio': 'bio',
                     'row': 'row', 
                     'ref': 'ref',  
                     'emp': 'emp',  
                     'gen': 'gen', 
                     'rod': 'rod', 
                     'lfs': 'lfs',
                      'O2e': 'O2e'}

In [None]:
len(sector_full_names)

In [None]:
var_info['sector_full_name'] = var_info.apply(lambda x: sector_full_names.get(x['sector']), axis=1)

In [None]:
var_info.metric.unique()

In [None]:
DATA_PATH = 'C:/Users/s.patil/Documents/code/spatial_disaggregation/reports/Data/'

# save different csv for each sector so it is easy to read in lATEX 
for sector in sector_full_names.values():
 #sector = 'Land use'    
    data = var_info[['column', 'sector_full_name', 'metric', 'display_name']]


    data = data.loc[data['sector_full_name'] == sector].drop(columns='sector_full_name')

    # should be a valid var i.e., must be present in the data 
    data = data[data['column'].isin(valid_vars)]

    if not data.empty:

        for index, row in data.iterrows():
            # replace null display names with last part of the var names 
            if pd.isnull(row['display_name']):
                data.loc[index,'display_name'] = row['column'].split('_')[-1].split('[')[0]

            # no commas allowed in csv files. so put the string after comma in brackets
            if ',' in row['display_name']:

                try:
                    [val1, val2] = row['display_name'].split(', ')

                    data.loc[index,'display_name'] = f'{val1} ({val2})'

                except:
                    data.loc[index,'display_name'] = ' '.join(row['display_name'].split(', '))

             # remove units from columns 
            data.loc[index,'column'] = row['column'].split('[')[0]

        data_name = sector.replace(" ", "-")
        print(data_name)
        data.to_csv(os.path.join(DATA_PATH, f'{data_name}.csv'))

In [None]:
with open(r"C:\Users\s.patil\Documents\code\spatial_disaggregation\reports\Agriculture.tex", "w") as f:
    f.write("\\begin{tabular}{" + " | ".join(["c"] * len(df.columns)) + "}\n")
    for i, row in df.iterrows():
        f.write(" & ".join([str(x) for x in row.values]) + " \\\\\n")
    f.write("\\end{tabular}")
