# Foreacasting - Metric Per Entity

#### Version 1.0

### TODO

- Read aggreated data
- Forecast 30 years for metric per entity

### Observations

## Setup

### Import Packages

In [1]:
from datetime import datetime, date
from pprint import pprint
from collections import OrderedDict
import os

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
# import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

from prophet import Prophet

# Set ipython's max row display
# pd.set_option('display.max_row', 1000)
# Set iPython's max column width
pd.set_option('display.max_columns', 50)

# sns.set_style("darkgrid")
plt.rcParams['figure.figsize'] = [20, 10]

import warnings
warnings.filterwarnings('ignore')
import logging
logger = logging.getLogger('cmdstanpy')
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.CRITICAL)

  from .autonotebook import tqdm as notebook_tqdm


### Functions

In [2]:
def masked_heatmap():
    mask = np.zeros_like(corr)
    mask[np.triu_indices_from(mask)] = True
    fig,ax = plt.subplots()
    sns.heatmap(corr,center = 0, fmt = ".3f", square = True, annot= True, mask=mask)

## Global Variables

#### Dataset Parameters

In [3]:
file_name = '../data/silver_tables/yearly_values_per_entity.csv'

year_range = [1960, 2022]
entity_category = 'Country'

forecast_horizon = 30

In [4]:
index_columns = [
    'Entity',
    'Code',
    'Year',
    'Entity_Category'
]
forecase_columns = [
    'Annual CO2 emissions',
    #'Annual change in primary energy consumption (%)',
    # 'Per capita electricity (kWh)',
    'Fossil fuels per capita (kWh)',
    # 'Fossil fuels (% equivalent primary energy)',
    # 'Renewables per capita (kWh - equivalent)', 'Renewables (% electricity)',
    # 'Solar Generation - TWh', 'prod of Electricity from solar (TWh)',
]

## Data Prep

In [5]:
df_original = pd.read_csv(file_name)

In [6]:
# df_original = df_original.loc[
#     (df_original['Year'] > year_range[0])
#     & (df_original['Year'] < year_range[1])
#     & (df_original['Entity_Category'] == entity_category)
# ][[*index_columns,*forecase_columns]]

df_original = df_original.loc[
    (df_original['Year'] > year_range[0])
    & (df_original['Year'] < year_range[1])
][[*index_columns,*forecase_columns]]

In [7]:
df_original.head(2)

Unnamed: 0,Entity,Code,Year,Entity_Category,Annual CO2 emissions,Fossil fuels per capita (kWh)
34,Aruba,ABW,1961,Country,645553.0,
35,Aruba,ABW,1962,Country,708942.0,


In [8]:
df_original['Year'] = pd.to_datetime(df_original['Year'], format='%Y')

### Create Per Country Dataframes

In [9]:
x = 'Year'
y = 'Annual CO2 emissions'

In [10]:
entity_dfs_dict = {}

for entity in df_original['Entity'].unique():
    entity_dfs_dict[entity] = df_original.loc[df_original['Entity'] == entity][[x, y]]
    entity_dfs_dict[entity].columns = ['ds', 'y']

In [11]:
entity_dfs_dict[entity].head()

Unnamed: 0,ds,y
21818,1961-01-01,5061263.0
21819,1962-01-01,4888959.0
21820,1963-01-01,4700377.0
21821,1964-01-01,4469069.0
21822,1965-01-01,5209198.0


## Forecast Generation 

In [12]:
def create_predict_df_for_entity(entity_name):
    df = entity_dfs_dict[entity_name]
    
    m = Prophet(
        yearly_seasonality='auto',
        weekly_seasonality=False,
        daily_seasonality=False,
    )
    m.fit(df)
    
    future = m.make_future_dataframe(periods=forecast_horizon, freq='Y')
    forecast = m.predict(future)
    
    df = df.set_index('ds').join(forecast.set_index('ds'), how='outer').reset_index()
    df['Entity'] = entity_name
    
    return df

In [13]:
def create_pred_df(entity_dfs_dict):
    all_pred_df = pd.DataFrame()

    try:
        for entity_name in entity_dfs_dict:
            print(f"Forecasting for {entity_name} done")
            
            try:
                df = create_predict_df_for_entity(entity_name)    
            except ValueError:
                print(f'Not enough observations in {entity_name} for forecasting')
                df = pd.DataFrame()

            all_pred_df = pd.concat([all_pred_df, df])
        return all_pred_df
    
    except KeyboardInterrupt:
        return all_pred_df

In [14]:
all_entities_pred_df = create_pred_df(entity_dfs_dict)

Forecasting for Aruba done
Forecasting for Afghanistan done
Forecasting for Angola done


INFO:prophet:n_changepoints greater than number of observations. Using 23.


Forecasting for Anguilla done
Forecasting for Albania done


INFO:prophet:n_changepoints greater than number of observations. Using 23.


Forecasting for Andorra done
Forecasting for Netherlands Antilles done
Not enough observations in Netherlands Antilles for forecasting
Forecasting for United Arab Emirates done
Forecasting for Argentina done
Forecasting for Armenia done


INFO:prophet:n_changepoints greater than number of observations. Using 15.


Forecasting for American Samoa done
Not enough observations in American Samoa for forecasting
Forecasting for Antarctica done
Forecasting for Antigua And Barbuda done
Forecasting for Australia done
Forecasting for Austria done
Forecasting for Azerbaijan done
Forecasting for Burundi done
Forecasting for Belgium done
Forecasting for Benin done
Forecasting for Bonaire Sint Eustatius And Saba done
Forecasting for Burkina Faso done
Forecasting for Bangladesh done
Forecasting for Bulgaria done
Forecasting for Bahrain done
Forecasting for Bahamas done
Forecasting for Bosnia And Herzegovina done
Forecasting for Belarus done
Forecasting for Belize done
Forecasting for Bermuda done
Forecasting for Bolivia done
Forecasting for Brazil done
Forecasting for Barbados done
Forecasting for Brunei done
Forecasting for Bhutan done
Forecasting for Botswana done
Forecasting for Central African Republic done
Forecasting for Canada done
Forecasting for Switzerland done
Forecasting for Chile done
Forecasting 

INFO:prophet:n_changepoints greater than number of observations. Using 10.


Forecasting for Christmas Island done
Forecasting for Cayman Islands done
Not enough observations in Cayman Islands for forecasting
Forecasting for Cyprus done
Forecasting for Czechia done
Forecasting for Germany done
Forecasting for Djibouti done
Forecasting for Dominica done
Forecasting for Denmark done
Forecasting for Dominican Republic done
Forecasting for Algeria done
Forecasting for Ecuador done
Forecasting for Egypt done


INFO:prophet:n_changepoints greater than number of observations. Using 20.


Forecasting for Eritrea done
Forecasting for Western Sahara done
Not enough observations in Western Sahara for forecasting
Forecasting for Spain done
Forecasting for Estonia done
Forecasting for Ethiopia done
Forecasting for Finland done
Forecasting for Fiji done
Forecasting for Falkland Islands done
Not enough observations in Falkland Islands for forecasting
Forecasting for France done
Forecasting for Faeroe Islands done


INFO:prophet:n_changepoints greater than number of observations. Using 22.


Forecasting for Micronesia (Country) done
Forecasting for Gabon done
Forecasting for United Kingdom done
Forecasting for Georgia done
Forecasting for Ghana done
Forecasting for Gibraltar done
Not enough observations in Gibraltar for forecasting
Forecasting for Guinea done
Forecasting for Guadeloupe done
Forecasting for Gambia done
Forecasting for Guinea-Bissau done
Forecasting for Equatorial Guinea done
Forecasting for Greece done
Forecasting for Grenada done
Forecasting for Greenland done
Forecasting for Guatemala done
Forecasting for French Guiana done
Forecasting for Guam done
Not enough observations in Guam for forecasting
Forecasting for Guyana done
Forecasting for Hong Kong done
Forecasting for Honduras done
Forecasting for Croatia done
Forecasting for Haiti done
Forecasting for Hungary done
Forecasting for Indonesia done
Forecasting for India done
Forecasting for Ireland done
Forecasting for Iran done
Forecasting for Iraq done
Forecasting for Iceland done
Forecasting for Israel 

INFO:prophet:n_changepoints greater than number of observations. Using 23.


Forecasting for Liechtenstein done
Forecasting for Sri Lanka done


INFO:prophet:n_changepoints greater than number of observations. Using 23.


Forecasting for Lesotho done
Forecasting for Lithuania done
Forecasting for Luxembourg done
Forecasting for Latvia done
Forecasting for Macao done
Forecasting for Morocco done
Forecasting for Moldova done
Forecasting for Madagascar done
Forecasting for Maldives done
Forecasting for Mexico done


INFO:prophet:n_changepoints greater than number of observations. Using 22.


Forecasting for Marshall Islands done
Forecasting for North Macedonia done
Forecasting for Mali done
Forecasting for Malta done
Forecasting for Myanmar done
Forecasting for Montenegro done
Forecasting for Mongolia done
Forecasting for Mozambique done
Forecasting for Mauritania done
Forecasting for Montserrat done
Forecasting for Martinique done
Forecasting for Mauritius done
Forecasting for Malawi done
Forecasting for Malaysia done


INFO:prophet:n_changepoints greater than number of observations. Using 23.


Forecasting for Mayotte done


INFO:prophet:n_changepoints greater than number of observations. Using 23.


Forecasting for Namibia done
Forecasting for New Caledonia done
Forecasting for Niger done
Forecasting for Nigeria done
Forecasting for Nicaragua done
Forecasting for Niue done
Forecasting for Netherlands done
Forecasting for Norway done
Forecasting for Nepal done
Forecasting for Nauru done
Forecasting for New Zealand done
Forecasting for Oman done


INFO:prophet:n_changepoints greater than number of observations. Using 9.


Forecasting for Czechoslovakia done
Not enough observations in Czechoslovakia for forecasting
Forecasting for Kosovo done
Forecasting for Serbia And Montenegro done
Not enough observations in Serbia And Montenegro for forecasting
Forecasting for Ussr done
Not enough observations in Ussr for forecasting
Forecasting for World done
Forecasting for Yugoslavia done
Not enough observations in Yugoslavia for forecasting
Forecasting for Pakistan done
Forecasting for Panama done
Forecasting for Peru done
Forecasting for Philippines done
Forecasting for Palau done
Forecasting for Papua New Guinea done
Forecasting for Poland done
Forecasting for Puerto Rico done
Not enough observations in Puerto Rico for forecasting
Forecasting for North Korea done
Forecasting for Portugal done
Forecasting for Paraguay done


INFO:prophet:n_changepoints greater than number of observations. Using 23.


Forecasting for Palestine done
Forecasting for French Polynesia done
Forecasting for Qatar done
Forecasting for Reunion done
Forecasting for Romania done
Forecasting for Russia done
Forecasting for Rwanda done
Forecasting for Saudi Arabia done
Forecasting for Sudan done
Forecasting for Senegal done
Forecasting for Singapore done
Forecasting for Saint Helena done
Forecasting for Solomon Islands done
Forecasting for Sierra Leone done
Forecasting for El Salvador done
Forecasting for Somalia done
Forecasting for Saint Pierre And Miquelon done
Forecasting for Serbia done
Forecasting for South Sudan done
Forecasting for Sao Tome And Principe done
Forecasting for Suriname done
Forecasting for Slovakia done
Forecasting for Slovenia done
Forecasting for Sweden done
Forecasting for Eswatini done
Forecasting for Sint Maarten (Dutch Part) done
Forecasting for Seychelles done
Forecasting for Syria done


INFO:prophet:n_changepoints greater than number of observations. Using 23.


Forecasting for Turks And Caicos Islands done
Forecasting for Chad done
Forecasting for Togo done
Forecasting for Thailand done
Forecasting for Tajikistan done
Forecasting for Turkmenistan done


INFO:prophet:n_changepoints greater than number of observations. Using 14.


Forecasting for Timor done
Forecasting for Tonga done
Forecasting for Trinidad And Tobago done
Forecasting for Tunisia done
Forecasting for Turkey done


INFO:prophet:n_changepoints greater than number of observations. Using 23.


Forecasting for Tuvalu done
Forecasting for Taiwan done
Forecasting for Tanzania done
Forecasting for Uganda done
Forecasting for Ukraine done
Forecasting for Uruguay done
Forecasting for United States done
Forecasting for Uzbekistan done
Forecasting for Saint Vincent And The Grenadines done
Forecasting for Venezuela done
Forecasting for British Virgin Islands done
Forecasting for United States Virgin Islands done
Not enough observations in United States Virgin Islands for forecasting
Forecasting for Vietnam done
Forecasting for Vanuatu done


INFO:prophet:n_changepoints greater than number of observations. Using 15.


Forecasting for Wallis And Futuna done
Forecasting for Samoa done
Forecasting for Yemen done
Forecasting for South Africa done
Forecasting for Zambia done
Forecasting for Zimbabwe done


### Calculate Next 10 year sum

In [26]:
all_entities_pred_df['ds'].max()

Timestamp('2050-12-31 00:00:00')

In [27]:
df_final = all_entities_pred_df.rename(columns={'ds':'Year', 'y': f'{y}', 'yhat': f'{y}_predicted'})

In [28]:
df_final['Year'] = df_final['Year'].dt.year

In [29]:
df_final['Year'].max()

2050

In [30]:
entity_lookup = df_original[['Entity_Category', 'Entity']].drop_duplicates()

df_final = df_final.merge(entity_lookup, how='left')

In [31]:
df_final.head()

Unnamed: 0,Year,Annual CO2 emissions,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,Annual CO2 emissions_predicted,Entity,Entity_Category
0,1961,645553.0,348572.950385,-392061.276763,1326828.0,348572.950385,348572.950385,115998.488548,115998.488548,115998.488548,115998.488548,115998.488548,115998.488548,0.0,0.0,0.0,464571.438933,Aruba,Country
1,1962,708942.0,370307.263601,-374778.972297,1302761.0,370307.263601,370307.263601,99152.21588,99152.21588,99152.21588,99152.21588,99152.21588,99152.21588,0.0,0.0,0.0,469459.479482,Aruba,Country
2,1963,679088.0,392041.576818,-392316.39939,1359247.0,392041.576818,392041.576818,89219.703262,89219.703262,89219.703262,89219.703262,89219.703262,89219.703262,0.0,0.0,0.0,481261.280081,Aruba,Country
3,1964,660275.0,413775.890031,-382731.039051,1356004.0,413775.890031,413775.890031,86213.095529,86213.095529,86213.095529,86213.095529,86213.095529,86213.095529,0.0,0.0,0.0,499988.98556,Aruba,Country
4,1965,592387.0,435569.749307,-292298.458799,1385045.0,435569.749307,435569.749307,115998.488548,115998.488548,115998.488548,115998.488548,115998.488548,115998.488548,0.0,0.0,0.0,551568.237855,Aruba,Country


In [38]:
df_final.loc[df_final['Year'].isin([2010, 2011])]

Unnamed: 0,Year,Annual CO2 emissions,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,Annual CO2 emissions_predicted,Entity,Entity_Category
49,2010,2506176.0,1.414269e+06,5.772685e+05,2.346292e+06,1.414269e+06,1.414269e+06,9.915222e+04,9.915222e+04,9.915222e+04,9.915222e+04,9.915222e+04,9.915222e+04,0.0,0.0,0.0,1.513421e+06,Aruba,Country
50,2011,2498848.0,1.436003e+06,6.222454e+05,2.363576e+06,1.436003e+06,1.436003e+06,8.921970e+04,8.921970e+04,8.921970e+04,8.921970e+04,8.921970e+04,8.921970e+04,0.0,0.0,0.0,1.525223e+06,Aruba,Country
139,2010,8397779.0,4.482451e+06,2.664981e+06,8.573177e+06,4.482451e+06,4.482451e+06,1.025230e+06,1.025230e+06,1.025230e+06,1.025230e+06,1.025230e+06,1.025230e+06,0.0,0.0,0.0,5.507682e+06,Afghanistan,Country
140,2011,12105788.0,4.604341e+06,2.802753e+06,8.674368e+06,4.604341e+06,4.604341e+06,1.149124e+06,1.149124e+06,1.149124e+06,1.149124e+06,1.149124e+06,1.149124e+06,0.0,0.0,0.0,5.753465e+06,Afghanistan,Country
229,2010,28864503.0,2.669853e+07,1.584073e+07,2.788072e+07,2.669853e+07,2.669853e+07,-4.535899e+06,-4.535899e+06,-4.535899e+06,-4.535899e+06,-4.535899e+06,-4.535899e+06,0.0,0.0,0.0,2.216263e+07,Angola,Country
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19236,2011,473948952.0,4.565131e+08,4.232050e+08,4.793291e+08,4.565131e+08,4.565131e+08,-3.157381e+06,-3.157381e+06,-3.157381e+06,-3.157381e+06,-3.157381e+06,-3.157381e+06,0.0,0.0,0.0,4.533557e+08,South Africa,Country
19326,2010,3120300.0,7.100872e+05,1.976791e+06,5.323809e+06,7.100872e+05,7.100872e+05,2.920004e+06,2.920004e+06,2.920004e+06,2.920004e+06,2.920004e+06,2.920004e+06,0.0,0.0,0.0,3.630091e+06,Zambia,Country
19327,2011,3372367.0,7.194968e+05,1.956958e+06,5.176817e+06,7.194968e+05,7.194968e+05,2.928404e+06,2.928404e+06,2.928404e+06,2.928404e+06,2.928404e+06,2.928404e+06,0.0,0.0,0.0,3.647901e+06,Zambia,Country
19416,2010,7878316.0,1.174671e+06,8.412342e+06,1.601006e+07,1.174671e+06,1.174671e+06,1.098231e+07,1.098231e+07,1.098231e+07,1.098231e+07,1.098231e+07,1.098231e+07,0.0,0.0,0.0,1.215698e+07,Zimbabwe,Country


In [40]:
[2021+x for x in range(10)]

[2021, 2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030]

In [47]:
def calculate_x_years_forecast_sum(frame, n_years, start_year=2021):
    year_list = [start_year+x for x in range(n_years)]
    
    frame = frame.loc[frame['Year'].isin(year_list)]
    
    return frame.groupby('Entity')[f'{y}_predicted'].sum()

In [54]:
forecast_sum

Entity
Afghanistan          7.812264e+07
Albania              4.483901e+07
Algeria              1.826159e+09
Andorra              4.257528e+06
Angola               3.666501e+08
                         ...     
Wallis And Futuna    2.519330e+05
World                4.504659e+11
Yemen                2.212880e+08
Zambia               3.845097e+07
Zimbabwe             1.440840e+08
Name: Annual CO2 emissions_predicted, Length: 221, dtype: float64

In [53]:
forecast_sum = calculate_x_years_forecast_sum(
    df_final.set_index('Entity')[[f'{y}_predicted', 'Year']],
    10,
    2021
)

forecast_sum = forecast_sum.rename(f'{y}_predicted_10')

df_final.join(forecast_sum, on='Entity')

ValueError: columns overlap but no suffix specified: Index(['Annual CO2 emissions_predicted'], dtype='object')

In [51]:
df_final.head()

Unnamed: 0,Year,Annual CO2 emissions,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,Annual CO2 emissions_predicted,Entity,Entity_Category,Annual CO2 emissions_predicted_next_10_sum
0,1961,645553.0,348572.950385,-392061.276763,1326828.0,348572.950385,348572.950385,115998.488548,115998.488548,115998.488548,115998.488548,115998.488548,115998.488548,0.0,0.0,0.0,464571.438933,Aruba,Country,
1,1962,708942.0,370307.263601,-374778.972297,1302761.0,370307.263601,370307.263601,99152.21588,99152.21588,99152.21588,99152.21588,99152.21588,99152.21588,0.0,0.0,0.0,469459.479482,Aruba,Country,
2,1963,679088.0,392041.576818,-392316.39939,1359247.0,392041.576818,392041.576818,89219.703262,89219.703262,89219.703262,89219.703262,89219.703262,89219.703262,0.0,0.0,0.0,481261.280081,Aruba,Country,
3,1964,660275.0,413775.890031,-382731.039051,1356004.0,413775.890031,413775.890031,86213.095529,86213.095529,86213.095529,86213.095529,86213.095529,86213.095529,0.0,0.0,0.0,499988.98556,Aruba,Country,
4,1965,592387.0,435569.749307,-292298.458799,1385045.0,435569.749307,435569.749307,115998.488548,115998.488548,115998.488548,115998.488548,115998.488548,115998.488548,0.0,0.0,0.0,551568.237855,Aruba,Country,


In [32]:
df_final['Entity'].value_counts()

Libya                91
Equatorial Guinea    91
Luxembourg           91
Latvia               91
Morocco              91
                     ..
Eritrea              57
Kosovo               51
Wallis And Futuna    50
Timor                49
Christmas Island     44
Name: Entity, Length: 222, dtype: int64

In [32]:
df_final['Entity'].value_counts()

Libya                91
Equatorial Guinea    91
Luxembourg           91
Latvia               91
Morocco              91
                     ..
Eritrea              57
Kosovo               51
Wallis And Futuna    50
Timor                49
Christmas Island     44
Name: Entity, Length: 222, dtype: int64

## Save Tables

In [25]:
df_final.to_csv('../data/forecasts_tables/all_entities_forecasts.csv', index=False)