In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import math 
import pandas as pd
import geopandas as gpd
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.patches import Rectangle
from matplotlib import rc

from zoomin.main_modules.disaggregation import disaggregate_eucalc_output
from zoomin.data.population import get_population
from zoomin.main_modules.shares import get_population_share

In [None]:
cwd = os.getcwd()

# Disaggregation

In [None]:
region_to_disaggregate = 'DE'
DATA_PATH = os.path.join(cwd, f"../../data/input/raw/EUCalc_dummy_{region_to_disaggregate}.json")

SAVE_PATH = os.path.join(cwd, "../data/output/")
save_name = f'disaggregated_data_{region_to_disaggregate}'

In [None]:
disagg_data = disaggregate_eucalc_output(DATA_PATH, 
                                         region_to_disaggregate,
                                         save_path=SAVE_PATH, 
                                         save_name=save_name)

In [None]:
disagg_data

### Population and population share 

In [None]:
INPUT_PATH = os.path.join(cwd, "../../data/input/processed/")
OUTPUT_PATH = os.path.join(cwd, "../reports/figures/")

In [None]:
gdf = gpd.read_file(os.path.join(INPUT_PATH,  'Shapefiles', 'nuts3_DE.shp'))

In [None]:
gdf

In [None]:
reg_pop = get_population(gdf.NUTS_ID, countries = False)
reg_pop_share = get_population_share(gdf.NUTS_ID, countries = False)

gdf['POP'] = gdf.apply(lambda x: reg_pop.get(x['NUTS_ID']), axis=1)
gdf['POP_SHARE'] = gdf.apply(lambda x: reg_pop_share.get(x['NUTS_ID']), axis=1)

In [None]:
gdf

### Population and polutation share

In [None]:
matplotlib.rcParams.update({'font.size': 30})

fig = plt.figure(figsize=(20, 20))
gs = fig.add_gridspec(1, 2, wspace=0, hspace=0)

#=======================================================================================
# Population 
ax1 = plt.subplot(gs[:, :1])

vmin, vmax = min(gdf['POP']), max(gdf['POP'])
gdf.plot(column='POP', cmap='Blues', linewidth=0.8, ax=ax1, edgecolor='0.8')
ax1.axis('off')

ax1.set_title(f'Population', fontweight="bold", fontsize=30)

sm = plt.cm.ScalarMappable(cmap='Blues', norm=plt.Normalize(vmin=vmin, vmax=vmax))
sm._A = []
cbar = fig.colorbar(sm, orientation='horizontal', shrink=0.8, pad=0);
cbar.ax.tick_params(labelsize=30)
#=======================================================================================
# population share 

ax2 = plt.subplot(gs[:, 1:])

vmin, vmax = min(gdf['POP_SHARE']), max(gdf['POP_SHARE'])
gdf.plot(column='POP_SHARE', cmap='Blues', linewidth=0.8, ax=ax2, edgecolor='0.8')
ax2.axis('off')

ax2.set_title(f'Population share', fontweight="bold", fontsize=30)

sm = plt.cm.ScalarMappable(cmap='Blues', norm=plt.Normalize(vmin=vmin, vmax=vmax))
sm._A = []
cbar = fig.colorbar(sm, orientation='horizontal', shrink=0.8, pad=0);
cbar.ax.tick_params(labelsize=30)

plt.tight_layout()

#save figure 
fig_name = f'DE_population'

plt.savefig(fname=f'{os.path.join(OUTPUT_PATH, fig_name)}.png', format='png', bbox_inches="tight", dpi=200)    

### Disaggregated 2020, 2015

In [None]:
EUCALC_PATH = os.path.join(cwd, "../../data/output/")

disagg_data = pd.read_csv(os.path.join(EUCALC_PATH, 'disaggregated_data_DE.csv'))

In [None]:
disagg_data

In [None]:
vars = disagg_data.variable.unique()

In [None]:
var_0_list  = []
for var in vars:
    df = disagg_data.loc[disagg_data['variable'] == var].drop(columns=['variable', 'title', 'region'])
    if (df.values == 0).all() or (df.isnull().values.all()):
        var_0_list.append(var)

In [None]:
valid_vars = set(vars) - set(var_0_list)

In [None]:
valid_vars

In [None]:
matplotlib.rcParams.update({'font.size': 15})

In [None]:

for var in valid_vars:

    fig = plt.figure()
    ax = fig.add_axes([0,0,1,1])

    data = disagg_data.loc[disagg_data['variable'] == var].drop(columns=['variable', 'title', 'region']).sum()

    ax.bar(data.index, data.values, color='b', width=0.6)
    
    var_name, unit = var.split('[')
    ax.set_ylabel(unit[:-1])

    #save figure 
    fig_name = f'DE_{var_name}' # % is a special char in latex, so such chars must be removeed 

    plt.savefig(fname=f'{os.path.join(OUTPUT_PATH, fig_name)}.png', format='png', bbox_inches="tight", dpi=200)    

In [None]:
matplotlib.rcParams.update({'font.size': 15})
for var in list(valid_vars):
    fig, ax = plt.subplots(nrows=1, 
                       ncols=2, 
                       gridspec_kw={'hspace': 0, 'wspace': 0}, # spacing between the grid figs 
                       figsize=(15, 15))

    data = disagg_data.loc[disagg_data['variable'] == var].drop(columns=['variable', 'title'])
    data = gdf.merge(data, how='left', left_on='NUTS_ID', right_on='region')


    vmin = data[['2020','2050']].min().min()
    vmax = data[['2020','2050']].max().max()

    #=======================================================================================
    # Disaggregated values in 2020

    data.plot(column='2020', vmin=vmin, vmax=vmax, cmap='Blues', ax=ax[0], edgecolor="face", linewidth=0.8)
    ax[0].axis('off')

    ax[0].set_title('Year 2020', fontsize=20)
    #=======================================================================================
    # Disaggregated values in 2050

    data.plot(column='2050', vmin=vmin, vmax=vmax, cmap='Blues', ax=ax[1], edgecolor="face", linewidth=0.8)
    ax[1].axis('off')

    ax[1].set_title('Year 2050', fontsize=20)

    sm = plt.cm.ScalarMappable(cmap='Blues', norm=plt.Normalize(vmin=vmin, vmax=vmax))
    sm._A = []
    cbar = fig.colorbar(sm, ax=[ax[0], ax[1]], orientation = 'horizontal', location='bottom', shrink=1, fraction=0.046, pad=0.04);
    cbar.ax.set_ylabel(var.split('[')[1][:-1])


    #save figure 
    var = var.split('[')[0]
    fig_name = f'DE_disagg_{var}'

    plt.savefig(fname=f'{os.path.join(OUTPUT_PATH, fig_name)}.png', format='png', bbox_inches="tight", dpi=200)    

### Top 10% and bottom 10% regions

In [None]:
rc('text', usetex=True)

In [None]:
n_regions = math.ceil(len(gdf) * 10/100)

In [None]:
n_regions

In [None]:
for var in list(valid_vars):
    fig, ax = plt.subplots(nrows=1, 
                       ncols=2, 
                       gridspec_kw={'hspace': 0, 'wspace': 0}, # spacing between the grid figs 
                       figsize=(15, 15))

    data = disagg_data.loc[disagg_data['variable'] == var].drop(columns=['variable', 'title'])
    data = gdf.merge(data, how='left', left_on='NUTS_ID', right_on='region')


    #=======================================================================================
    # Year 2020
    # all 
    data.plot(color='white', ax=ax[0], edgecolor='0.8')

    #top 10 percent
    top_data = data[['region', '2020', 'geometry']].sort_values(by='2020')[:n_regions]
    top_data.plot(color="#00FF78", ax=ax[0], edgecolor='0.8')

    #top 10 percent
    bottom_data = data[['region', '2020', 'geometry']].sort_values(by='2020')[-n_regions:]
    bottom_data.plot(color="#E56FFE", ax=ax[0], edgecolor='0.8')

    ax[0].axis('off')

    ax[0].set_title('Year 2020', fontsize=20)

#     # legend

#     color_patch_list = []
#     color_patch_list.append(mpatches.Patch(edgecolor="#00FF78", 
#                                            facecolor="#00FF78", 
#                                            label=r"\textbf{Top 10\% regions}"))
#     for region in top_data.region:
#         color_patch_list.append(mpatches.Patch(edgecolor="white", 
#                                            facecolor="white", 
#                                            label=gdf.loc[gdf['NUTS_ID'] == region, "NUTS_NAME"].iloc[0]
#                                               ))

#     color_patch_list.append(mpatches.Patch(edgecolor="#E56FFE", 
#                                            facecolor="#E56FFE", 
#                                            label=r"\textbf{Bottom 10\% regions}"))
#     for region in bottom_data.region:
#         color_patch_list.append(mpatches.Patch(edgecolor="white", 
#                                            facecolor="white", 
#                                            label=gdf.loc[gdf['NUTS_ID'] == region, "NUTS_NAME"].iloc[0]
#                                               ))

#     ax[0].legend(handles=color_patch_list, bbox_to_anchor=(0.5, 0.0), loc='upper center', ncol=2, fontsize=14)
    # #=======================================================================================
    # Year 2050
    #all 
    data.plot(color='white', ax=ax[1], edgecolor='0.8')

    #top 10 percent
    top_data = data[['region', '2050', 'geometry']].sort_values(by='2050')[:n_regions]
    top_data.plot(color="#00FF78", ax=ax[1], edgecolor='0.8')

    #top 10 percent
    bottom_data = data[['region', '2050', 'geometry']].sort_values(by='2050')[-n_regions:]
    bottom_data.plot(color="#E56FFE", ax=ax[1], edgecolor='0.8')

    ax[1].axis('off')

    ax[1].set_title('Year 2050', fontsize=20)
    
     # legend
    color_patch_list = []
    color_patch_list.append(mpatches.Patch(edgecolor="#00FF78", 
                                           facecolor="#00FF78", 
                                           label=r"\textbf{Top 10\% regions}"))
   
    color_patch_list.append(mpatches.Patch(edgecolor="#E56FFE", 
                                           facecolor="#E56FFE", 
                                           label=r"\textbf{Bottom 10\% regions}"))

    fig.legend(handles=color_patch_list, bbox_to_anchor=(0.5, 0.25), loc='upper center', ncol=2, fontsize=14)

    # legend
#     color_patch_list = []
#     color_patch_list.append(mpatches.Patch(edgecolor="#00FF78", 
#                                            facecolor="#00FF78", 
#                                            label=r"\textbf{Top 10\% regions}"))
#     for region in top_data.region:
#         color_patch_list.append(mpatches.Patch(edgecolor="white", 
#                                            facecolor="white", 
#                                            label=gdf.loc[gdf['NUTS_ID'] == region, "NUTS_NAME"].iloc[0]
#                                               ))

#     color_patch_list.append(mpatches.Patch(edgecolor="#E56FFE", 
#                                            facecolor="#E56FFE", 
#                                            label=r"\textbf{Bottom 10\% regions}"))
#     for region in bottom_data.region:
#         color_patch_list.append(mpatches.Patch(edgecolor="white", 
#                                            facecolor="white", 
#                                            label=gdf.loc[gdf['NUTS_ID'] == region, "NUTS_NAME"].iloc[0]
#                                               ))

#     ax[1].legend(handles=color_patch_list, bbox_to_anchor=(0.5, 0.0), loc='upper center', ncol=2, fontsize=14)
    
    #save figure 
    var = var.split('[')[0]
    fig_name = f'DE_TD_{var}'

    plt.savefig(fname=f'{os.path.join(OUTPUT_PATH, fig_name)}.png', format='png', bbox_inches="tight", dpi=200)    

### DFs for LATEX report

In [None]:
var_info = pd.read_csv(os.path.join(INPUT_PATH, 'eucalc_output_vars.csv'))

In [None]:
sector_full_names = {'agr': 'Agriculture', 
                    'bld': 'Buildings', 
                    'tra' : 'Transport', 
                    'elc': 'Electricity', 
                    'ccu': 'Carbon capture and use', 
                    'fos': 'Fossil', 
                    'ind': 'Industry', 
                    'lus': 'Land use', 
                    'min': 'Minerals', 
                    'str': 'Storage', 
                    'wat': 'Water', 
                    'air': 'Air polution',
                    'amm': 'Ammonium', 
                    'bdy': 'bdy', 
                    'clm': 'clm', 
                    'clt': 'clt', 
                    'dhg': 'dhg',
                    'gas': 'gas', 
                     'oal' : 'oal', 
                     'ear': 'ear', 
                     'ass': 'ass', 
                     'oil': 'oil', 
                     'ude': 'ude',
                     #nan, 
                     'ter': 'Terrestrial', 
                     'bio': 'bio',
                     'row': 'row', 
                     'ref': 'ref',  
                     'emp': 'emp',  
                     'gen': 'gen', 
                     'rod': 'rod', 
                     'lfs': 'lfs',
                      'O2e': 'O2e'}

In [None]:
len(sector_full_names)

In [None]:
var_info['sector_full_name'] = var_info.apply(lambda x: sector_full_names.get(x['sector']), axis=1)

In [None]:
var_info.metric.unique()

In [None]:
var_info.rename(columns={"column": "var_name", "metric": "Variable Sub-category", "display_name": 'Variable'}, inplace=True)

In [None]:
var_info

In [None]:
valid_sectors = []

In [None]:
DATA_PATH = 'C:/Users/s.patil/Documents/code/spatial_disaggregation/reports/data/'

# save different csv for each sector so it is easy to read in lATEX 
for sector in sector_full_names.values():
 #sector = 'Land use'    
    data = var_info[['var_name', 'sector_full_name', 'Variable Sub-category', 'Variable']]


    data = data.loc[data['sector_full_name'] == sector].drop(columns='sector_full_name')

    # should be a valid var i.e., must be present in the data 
    data = data[data['var_name'].isin(valid_vars)]

    if not data.empty:
        
        for index, row in data.iterrows():
            # replace null display names with last part of the var names 
            if pd.isnull(row['Variable']):
                data.loc[index,'Variable'] = row['var_name'].split('_')[-1].split('[')[0]
            
            # if variable category has nas, fill it with variable value 
            data['Variable Sub-category'].fillna(data['Variable'], inplace=True)
        
            # no commas allowed in csv files. so put the string after comma in brackets
            if ',' in row['Variable']:

                try:
                    [val1, val2] = row['Variable'].split(', ')

                    data.loc[index,'Variable'] = f'{val1} ({val2})'

                except:
                    data.loc[index,'Variable'] = ' '.join(row['Variable'].split(', '))

             # remove units from columns 
            data.loc[index,'var_name'] = row['var_name'].split('[')[0]
        
        
        
        # sort categories 
        data.sort_values('Variable Sub-category', inplace=True)
        
        data_name = sector.replace(" ", "-")
        valid_sectors.append(data_name)
        data.to_csv(os.path.join(DATA_PATH, f'{data_name}.csv'), index=False)

In [None]:
valid_sectors

In [None]:
dfs_path = r'C:\Users\s.patil\Documents\code\spatial_disaggregation\reports\Data'

In [None]:
nl = '\n'
latex_nl = '\\\\'

for valid_sector in valid_sectors:
    df = pd.read_csv(os.path.join(dfs_path, f'{valid_sector}.csv'))
    with open(os.path.join(r"C:\Users\s.patil\Documents\code\spatial_disaggregation\reports\Tables", f'{valid_sector}.tex'), "w") as f:

        f.write(f"\\begin{{longtable}}{{|ll|}}{nl}")

        # table header
        f.write(f"\\toprule{nl}")
        f.write(f"\\textbf{{Variable Sub-category}} & \\textbf{{Variable}}{latex_nl}{nl}")
        f.write(f"\\toprule{nl}")

        for group, group_df in df.groupby(['Variable Sub-category']):

            n_rows = len(group_df)
            var_cat = group
            first_var = group_df['Variable'].values[0]
            rest_vars = group_df['Variable'].values[1:]

            f.write(f"\\multirow{{{n_rows}}}{{*}}{{{var_cat}}} & {first_var} {latex_nl}{nl}")

            if len(rest_vars) > 0:
                for var in rest_vars:
                    f.write(f" & {var}{latex_nl}{nl}")

            if not var_cat == df['Variable Sub-category'].values[-1]:
                f.write(f"\\midrule{nl}")

        f.write(f"\\bottomrule{nl}")
        f.write(f"\\end{{longtable}}")

## data quality figures 

In [None]:
fig, ax = plt.subplots()
ax.scatter(0.8, 2, s=5000, c='r', edgecolors='black')
ax.scatter(1, 2, s=5000, c='yellow', edgecolors='black', alpha= 0.1)
ax.scatter(1.2, 2, s=5000, c='g', edgecolors='black', alpha= 0.1)
plt.xlim(0.6, 1.4)
plt.ylim(1.95, 2.05)


#ax.add_patch(Rectangle((2, 2.2), 0.2, 0.2, fill=None, alpha=1))
ax.axis('off')
fig.show()

fig_name = f'data_status_red'

plt.savefig(fname=f'{os.path.join(OUTPUT_PATH, fig_name)}.png', format='png', bbox_inches="tight", dpi=200)    