# Foreacasting - Metric Per Entity

#### Version 1.0

### TODO

- Read aggreated data
- Forecast 30 years for metric per entity

### Observations

## Setup

### Import Packages

In [9]:
import scipy
from datetime import datetime, date
from pprint import pprint
from collections import OrderedDict
import os

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
# import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

from prophet import Prophet

# Set ipython's max row display
# pd.set_option('display.max_row', 1000)
# Set iPython's max column width
pd.set_option('display.max_columns', 50)

# sns.set_style("darkgrid")
plt.rcParams['figure.figsize'] = [20, 10]

import warnings
warnings.filterwarnings('ignore')
import logging
logger = logging.getLogger('cmdstanpy')
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.CRITICAL)

### Functions

In [10]:
def masked_heatmap():
    mask = np.zeros_like(corr)
    mask[np.triu_indices_from(mask)] = True
    fig,ax = plt.subplots()
    sns.heatmap(corr,center = 0, fmt = ".3f", square = True, annot= True, mask=mask)

## Global Variables

#### Dataset Parameters

In [11]:
file_name = '../data/silver_tables/yearly_values_per_entity.csv'

year_range = [1960, 2022]
entity_category = 'Country'

forecast_horizon = 30

In [12]:
index_columns = [
    'Entity',
    'Code',
    'Year',
]
forecase_columns = [
    'Annual CO2 emissions',
    #'Annual change in primary energy consumption (%)',
    # 'Per capita electricity (kWh)',
    'Fossil fuels per capita (kWh)',
    # 'Fossil fuels (% equivalent primary energy)',
    # 'Renewables per capita (kWh - equivalent)', 'Renewables (% electricity)',
    # 'Solar Generation - TWh', 'prod of Electricity from solar (TWh)',
]

## Data Prep

In [13]:
df_original = pd.read_csv(file_name)

In [14]:
df_original = df_original.loc[
    (df_original['Year'] > year_range[0])
    & (df_original['Year'] < year_range[1])
    & (df_original['Entity_Category'] == entity_category)
][[*index_columns,*forecase_columns]]

In [15]:
df_original.head(2)

Unnamed: 0,Entity,Code,Year,Annual CO2 emissions,Fossil fuels per capita (kWh)
34,Aruba,ABW,1961,645553.0,
35,Aruba,ABW,1962,708942.0,


In [16]:
df_original['Year'] = pd.to_datetime(df_original['Year'], format='%Y')

### Create Per Country Dataframes

In [17]:
x = 'Year'
y = 'Annual CO2 emissions'

In [18]:
entity_dfs_dict = {}

for entity in df_original['Entity'].unique():
    entity_dfs_dict[entity] = df_original.loc[df_original['Entity'] == entity][[x, y]]
    entity_dfs_dict[entity].columns = ['ds', 'y']

In [19]:
entity_dfs_dict[entity].head()

Unnamed: 0,ds,y
21818,1961-01-01,5061263.0
21819,1962-01-01,4888959.0
21820,1963-01-01,4700377.0
21821,1964-01-01,4469069.0
21822,1965-01-01,5209198.0


## Forecast Generation 

In [20]:
def create_predict_df_for_entity(entity_name):
    df = entity_dfs_dict[entity_name]
    
    m = Prophet(
        yearly_seasonality='auto',
        weekly_seasonality=False,
        daily_seasonality=False,
    )
    m.fit(df)
    
    future = m.make_future_dataframe(periods=forecast_horizon, freq='Y')
    forecast = m.predict(future)
    
    df = df.set_index('ds').join(forecast.set_index('ds')).reset_index()
    df['Entity'] = entity_name
    
    return df

In [21]:
def create_pred_df(entity_dfs_dict):
    all_pred_df = pd.DataFrame()

    try:
        for entity_name in entity_dfs_dict:
            print(f"Forecasting for {entity_name} done")
            
            try:
                df = create_predict_df_for_entity(entity_name)    
            except ValueError:
                print(f'Not enough observations for entity {entity_name} for forecasting')
                df = pd.DataFrame()

            all_pred_df = pd.concat([all_pred_df, df])
        return all_pred_df
    
    except KeyboardInterrupt:
        return all_pred_df

In [22]:
all_entities_pred_df = create_pred_df(entity_dfs_dict)

Forecasting for Aruba done
Forecasting for Afghanistan done
Forecasting for Angola done
Forecasting for Anguilla done
Forecasting for Albania done
Forecasting for Andorra done
Forecasting for Netherlands Antilles done
Not enough observations for forecasting
Forecasting for United Arab Emirates done
Forecasting for Argentina done
Forecasting for Armenia done
Forecasting for American Samoa done
Not enough observations for forecasting
Forecasting for Antigua And Barbuda done
Forecasting for Australia done
Forecasting for Austria done
Forecasting for Azerbaijan done
Forecasting for Burundi done
Forecasting for Belgium done
Forecasting for Benin done
Forecasting for Bonaire Sint Eustatius And Saba done
Forecasting for Burkina Faso done
Forecasting for Bangladesh done
Forecasting for Bulgaria done
Forecasting for Bahrain done
Forecasting for Bahamas done
Forecasting for Bosnia And Herzegovina done
Forecasting for Belarus done
Forecasting for Belize done
Forecasting for Bermuda done
Forecasti

## Save Tables

In [23]:
all_entities_pred_df.head()

Unnamed: 0,ds,y,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat,Entity
0,1961-01-01,645553.0,348571.96528,-410566.903827,1336181.0,348571.96528,348571.96528,116000.995604,116000.995604,116000.995604,116000.995604,116000.995604,116000.995604,0.0,0.0,0.0,464572.960884,Aruba
1,1962-01-01,708942.0,370306.27032,-383487.773904,1341374.0,370306.27032,370306.27032,99154.731984,99154.731984,99154.731984,99154.731984,99154.731984,99154.731984,0.0,0.0,0.0,469461.002304,Aruba
2,1963-01-01,679088.0,392040.575359,-300708.331478,1316585.0,392040.575359,392040.575359,89222.225921,89222.225921,89222.225921,89222.225921,89222.225921,89222.225921,0.0,0.0,0.0,481262.801281,Aruba
3,1964-01-01,660275.0,413774.880414,-381631.424981,1405229.0,413774.880414,413774.880414,86215.622238,86215.622238,86215.622238,86215.622238,86215.622238,86215.622238,0.0,0.0,0.0,499990.502652,Aruba
4,1965-01-01,592387.0,435568.73151,-256463.054285,1410202.0,435568.73151,435568.73151,116000.995604,116000.995604,116000.995604,116000.995604,116000.995604,116000.995604,0.0,0.0,0.0,551569.727113,Aruba


In [24]:
all_entities_pred_df['Entity'].value_counts()

Chad                 61
Romania              61
Israel               61
Singapore            61
Norway               61
                     ..
Eritrea              27
Kosovo               21
Wallis And Futuna    20
Timor                19
Christmas Island     14
Name: Entity, Length: 220, dtype: int64

In [28]:
all_entities_pred_df.to_csv('../data/forecasts_tables/all_entities_forecasts.csv', index=False)