# Foreacasting - Metric Per Entity

#### Version 1.0

### TODO

- Read aggreated data
- Forecast 30 years for metric per entity

### Observations

## Setup

### Import Packages

In [1]:
from datetime import datetime, date
from pprint import pprint
from collections import OrderedDict
import os

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
# import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

from prophet import Prophet

# Set ipython's max row display
# pd.set_option('display.max_row', 1000)
# Set iPython's max column width
pd.set_option('display.max_columns', 50)

# sns.set_style("darkgrid")
plt.rcParams['figure.figsize'] = [20, 10]

import warnings
warnings.filterwarnings('ignore')
import logging
logger = logging.getLogger('cmdstanpy')
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.CRITICAL)

### Functions

In [2]:
def masked_heatmap():
    mask = np.zeros_like(corr)
    mask[np.triu_indices_from(mask)] = True
    fig,ax = plt.subplots()
    sns.heatmap(corr,center = 0, fmt = ".3f", square = True, annot= True, mask=mask)

## Global Variables

#### Dataset Parameters

In [3]:
file_name = '../data/silver_tables/yearly_values_per_entity.csv'

year_range = [1960, 2022]
entity_category = 'Country'

forecast_horizon = 30

In [4]:
index_columns = [
    'Entity',
    'Code',
    'Year',
    'Entity_Category'
]

forecast_columns = [
    'Annual CO2 emissions',
    #'Annual change in primary energy consumption (%)',
    # 'Per capita electricity (kWh)',
    # 'Fossil fuels per capita (kWh)',
    # 'Fossil fuels (% equivalent primary energy)',
    # 'Renewables per capita (kWh - equivalent)', 'Renewables (% electricity)',
    # 'Solar Generation - TWh', 'prod of Electricity from solar (TWh)',
]

enrichment_columns = [
    'Land area (sq. km)',
    'Population density (people per sq. km of land area)'
]

## Data Prep

In [5]:
df_original = pd.read_csv(file_name)

In [6]:
# df_original = df_original.loc[
#     (df_original['Year'] > year_range[0])
#     & (df_original['Year'] < year_range[1])
#     & (df_original['Entity_Category'] == entity_category)
# ][[*index_columns,*forecase_columns]]

df_original = df_original.loc[
    (df_original['Year'] > year_range[0])
    & (df_original['Year'] < year_range[1])
][[*index_columns,*forecast_columns, *enrichment_columns]]

In [7]:
df_original.head(2)

Unnamed: 0,Entity,Code,Year,Entity_Category,Annual CO2 emissions,Land area (sq. km),Population density (people per sq. km of land area)
34,Aruba,ABW,1961,Country,645553.0,180.0,307.966667
35,Aruba,ABW,1962,Country,708942.0,180.0,312.411111


In [8]:
df_original['Year'] = pd.to_datetime(df_original['Year'], format='%Y')

### Create Per Country Dataframes

In [9]:
x = 'Year'
y = 'Annual CO2 emissions'

In [10]:
entity_dfs_dict = {}

for entity in df_original['Entity'].unique():
    entity_dfs_dict[entity] = df_original.loc[df_original['Entity'] == entity][[x, y]]
    entity_dfs_dict[entity].columns = ['ds', 'y']

In [11]:
entity_dfs_dict[entity].head()

Unnamed: 0,ds,y
21818,1961-01-01,5061263.0
21819,1962-01-01,4888959.0
21820,1963-01-01,4700377.0
21821,1964-01-01,4469069.0
21822,1965-01-01,5209198.0


## Forecast Generation 

In [12]:
def create_predict_df_for_entity(entity_name):
    df = entity_dfs_dict[entity_name]
    
    m = Prophet(
        yearly_seasonality='auto',
        weekly_seasonality=False,
        daily_seasonality=False,
    )
    m.fit(df)
    
    future = m.make_future_dataframe(periods=forecast_horizon, freq='Y')
    forecast = m.predict(future)
    
    df = df.set_index('ds').join(forecast.set_index('ds'), how='outer').reset_index()
    df['Entity'] = entity_name
    
    return df

In [13]:
def create_pred_df(entity_dfs_dict):
    all_pred_df = pd.DataFrame()

    try:
        for entity_name in entity_dfs_dict:
            print(f"Forecasting for {entity_name} done")
            
            try:
                df = create_predict_df_for_entity(entity_name)    
            except ValueError:
                print(f'Not enough observations in {entity_name} for forecasting')
                df = pd.DataFrame()

            all_pred_df = pd.concat([all_pred_df, df])
        return all_pred_df
    
    except KeyboardInterrupt:
        return all_pred_df

In [14]:
all_entities_pred_df = create_pred_df(entity_dfs_dict)

Forecasting for Aruba done
Forecasting for Afghanistan done
Forecasting for Angola done
Forecasting for Anguilla done
Forecasting for Albania done
Forecasting for Andorra done
Forecasting for Netherlands Antilles done
Not enough observations in Netherlands Antilles for forecasting
Forecasting for United Arab Emirates done
Forecasting for Argentina done
Forecasting for Armenia done
Forecasting for American Samoa done
Not enough observations in American Samoa for forecasting
Forecasting for Antarctica done
Forecasting for Antigua And Barbuda done
Forecasting for Australia done
Forecasting for Austria done
Forecasting for Azerbaijan done
Forecasting for Burundi done
Forecasting for Belgium done
Forecasting for Benin done
Forecasting for Bonaire Sint Eustatius And Saba done
Forecasting for Burkina Faso done
Forecasting for Bangladesh done
Forecasting for Bulgaria done
Forecasting for Bahrain done
Forecasting for Bahamas done
Forecasting for Bosnia And Herzegovina done
Forecasting for Belar

### Calculate Next 10 year sum

In [15]:
all_entities_pred_df['ds'].max()

Timestamp('2050-12-31 00:00:00')

In [16]:
df_final = all_entities_pred_df.rename(columns={'ds':'Year', 'y': f'{y}', 'yhat': f'{y}_predicted'})

In [17]:
df_final['Year'] = df_final['Year'].dt.year

In [18]:
df_final['Year'].max()

2050

In [19]:
entity_lookup = df_original[['Entity_Category', 'Entity', 'Land area (sq. km)']].drop_duplicates()

df_final = df_final.merge(entity_lookup, how='left')

In [20]:
df_final.head()

Unnamed: 0,Year,Annual CO2 emissions,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,Annual CO2 emissions_predicted,Entity,Entity_Category,Land area (sq. km)
0,1961,645553.0,348571.96528,-377250.662105,1399045.0,348571.96528,348571.96528,116000.995604,116000.995604,116000.995604,116000.995604,116000.995604,116000.995604,0.0,0.0,0.0,464572.960884,Aruba,Country,180.0
1,1962,708942.0,370306.27032,-365914.724469,1298121.0,370306.27032,370306.27032,99154.731984,99154.731984,99154.731984,99154.731984,99154.731984,99154.731984,0.0,0.0,0.0,469461.002304,Aruba,Country,180.0
2,1963,679088.0,392040.575359,-371633.757417,1289299.0,392040.575359,392040.575359,89222.225921,89222.225921,89222.225921,89222.225921,89222.225921,89222.225921,0.0,0.0,0.0,481262.801281,Aruba,Country,180.0
3,1964,660275.0,413774.880414,-346927.437147,1342158.0,413774.880414,413774.880414,86215.622238,86215.622238,86215.622238,86215.622238,86215.622238,86215.622238,0.0,0.0,0.0,499990.502652,Aruba,Country,180.0
4,1965,592387.0,435568.73151,-349855.818559,1385812.0,435568.73151,435568.73151,116000.995604,116000.995604,116000.995604,116000.995604,116000.995604,116000.995604,0.0,0.0,0.0,551569.727113,Aruba,Country,180.0


In [21]:
df_final.loc[df_final['Year'].isin([2010, 2011])]

Unnamed: 0,Year,Annual CO2 emissions,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,Annual CO2 emissions_predicted,Entity,Entity_Category,Land area (sq. km)
49,2010,2506176.0,1.414267e+06,6.829244e+05,2.366091e+06,1.414267e+06,1.414267e+06,9.915473e+04,9.915473e+04,9.915473e+04,9.915473e+04,9.915473e+04,9.915473e+04,0.0,0.0,0.0,1.513422e+06,Aruba,Country,180.0
50,2011,2498848.0,1.436002e+06,6.285647e+05,2.311090e+06,1.436002e+06,1.436002e+06,8.922223e+04,8.922223e+04,8.922223e+04,8.922223e+04,8.922223e+04,8.922223e+04,0.0,0.0,0.0,1.525224e+06,Aruba,Country,180.0
139,2010,8397779.0,4.482459e+06,2.471320e+06,8.211813e+06,4.482459e+06,4.482459e+06,1.025229e+06,1.025229e+06,1.025229e+06,1.025229e+06,1.025229e+06,1.025229e+06,0.0,0.0,0.0,5.507688e+06,Afghanistan,Country,652230.0
140,2011,12105788.0,4.604349e+06,2.819084e+06,8.717096e+06,4.604349e+06,4.604349e+06,1.149122e+06,1.149122e+06,1.149122e+06,1.149122e+06,1.149122e+06,1.149122e+06,0.0,0.0,0.0,5.753471e+06,Afghanistan,Country,652230.0
278,2010,28864503.0,2.669853e+07,1.641819e+07,2.774561e+07,2.669853e+07,2.669853e+07,-4.535889e+06,-4.535889e+06,-4.535889e+06,-4.535889e+06,-4.535889e+06,-4.535889e+06,0.0,0.0,0.0,2.216264e+07,Angola,Country,1246700.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76525,2011,3372367.0,7.194967e+05,2.049425e+06,5.195306e+06,7.194967e+05,7.194967e+05,2.928405e+06,2.928405e+06,2.928405e+06,2.928405e+06,2.928405e+06,2.928405e+06,0.0,0.0,0.0,3.647902e+06,Zambia,Country,743390.0
76663,2010,7878316.0,1.174665e+06,8.273880e+06,1.602984e+07,1.174665e+06,1.174665e+06,1.098235e+07,1.098235e+07,1.098235e+07,1.098235e+07,1.098235e+07,1.098235e+07,0.0,0.0,0.0,1.215701e+07,Zimbabwe,Country,386850.0
76664,2010,7878316.0,1.174665e+06,8.273880e+06,1.602984e+07,1.174665e+06,1.174665e+06,1.098235e+07,1.098235e+07,1.098235e+07,1.098235e+07,1.098235e+07,1.098235e+07,0.0,0.0,0.0,1.215701e+07,Zimbabwe,Country,
76665,2011,9743983.0,1.258022e+06,8.457420e+06,1.604376e+07,1.258022e+06,1.258022e+06,1.102432e+07,1.102432e+07,1.102432e+07,1.102432e+07,1.102432e+07,1.102432e+07,0.0,0.0,0.0,1.228234e+07,Zimbabwe,Country,386850.0


In [22]:
def calculate_x_years_forecast_sum(frame, n_years, start_year=2021):
    year_list = [start_year+x for x in range(n_years)]
    
    frame = frame.loc[frame['Year'].isin(year_list)]
    
    return frame.groupby('Entity')[f'{y}_predicted'].sum()

In [23]:
forecast_sum = calculate_x_years_forecast_sum(
    df_final.set_index('Entity')[[f'{y}_predicted', 'Year']],
    10,
    2021
)

forecast_sum = forecast_sum.rename(f'{y}_predicted_next_10_year_sum')

df_final = df_final.join(forecast_sum, on='Entity')

In [24]:
df_final.head()

Unnamed: 0,Year,Annual CO2 emissions,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,Annual CO2 emissions_predicted,Entity,Entity_Category,Land area (sq. km),Annual CO2 emissions_predicted_next_10_year_sum
0,1961,645553.0,348571.96528,-377250.662105,1399045.0,348571.96528,348571.96528,116000.995604,116000.995604,116000.995604,116000.995604,116000.995604,116000.995604,0.0,0.0,0.0,464572.960884,Aruba,Country,180.0,18856380.0
1,1962,708942.0,370306.27032,-365914.724469,1298121.0,370306.27032,370306.27032,99154.731984,99154.731984,99154.731984,99154.731984,99154.731984,99154.731984,0.0,0.0,0.0,469461.002304,Aruba,Country,180.0,18856380.0
2,1963,679088.0,392040.575359,-371633.757417,1289299.0,392040.575359,392040.575359,89222.225921,89222.225921,89222.225921,89222.225921,89222.225921,89222.225921,0.0,0.0,0.0,481262.801281,Aruba,Country,180.0,18856380.0
3,1964,660275.0,413774.880414,-346927.437147,1342158.0,413774.880414,413774.880414,86215.622238,86215.622238,86215.622238,86215.622238,86215.622238,86215.622238,0.0,0.0,0.0,499990.502652,Aruba,Country,180.0,18856380.0
4,1965,592387.0,435568.73151,-349855.818559,1385812.0,435568.73151,435568.73151,116000.995604,116000.995604,116000.995604,116000.995604,116000.995604,116000.995604,0.0,0.0,0.0,551569.727113,Aruba,Country,180.0,18856380.0


In [25]:
df_final['Entity'].value_counts()

Spain                2730
Germany              2457
Belarus              2366
Romania              2093
Hungary              2093
                     ... 
Marshall Islands       59
Kosovo                 51
Wallis And Futuna      50
Timor                  49
Christmas Island       44
Name: Entity, Length: 222, dtype: int64

## Save Tables

In [26]:
df_final.to_csv('../data/forecasts_tables/all_entities_forecasts.csv', index=False)