# Germany - Final Model

## Fetch Pre-requisites

In [1]:
from typing import Tuple

import logging

import pandas

from prophet import Prophet
from prophet.serialize import model_to_json, model_from_json

from sklearn.metrics import mean_squared_error

In [2]:
logger = logging.getLogger('cmdstanpy')
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.CRITICAL)

logger = logging.getLogger('prophet')
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.CRITICAL)

logger = logging.getLogger('fbprophet')
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.CRITICAL)

In [3]:
country = 'germany'

df = pandas.read_excel(io = f'../../../data/processed/{country}.xlsx')

## Data Preparation

In [4]:
def make_dataset(df_processed: pandas.DataFrame, df_covid_measures: pandas.DataFrame = pandas.DataFrame()) -> Tuple[pandas.DataFrame, pandas.DataFrame]:
    df_mrd = df_processed[['Time', 'Unemployment_Rate_TOT']].rename(
        columns = {'Time': 'ds', 'Unemployment_Rate_TOT': 'y'}
    )
    df_mrd = df_mrd.drop(index = df_mrd[pandas.isnull(df_mrd['y'])].index, inplace = False)
    return df_mrd, df_covid_measures

In [5]:
df_mrd, _ = make_dataset(df)

In [6]:
def train_test_split(df_mrd: pandas.DataFrame, test_size: int = 12) -> Tuple[pandas.DataFrame, pandas.DataFrame]:
    df_test = df_mrd.tail(test_size)
    df_train = df_mrd.drop(index = df_mrd.tail(test_size).index, inplace = False)
    return df_train, df_test

In [7]:
df_train, df_test = train_test_split(df_mrd, 12)

## Model Training and Testing

In [8]:
model = Prophet().fit(df_train)

In [9]:
def test_model(df_test: pandas.DataFrame, model: Prophet) -> Tuple[pandas.DataFrame, float]:
    "return predicted values and rmse"
    df_predicted: pandas.DataFrame = model.predict(df_test)
    rmse: float = mean_squared_error(y_true = df_test['y'], y_pred = df_predicted['yhat'], squared = False)
    return df_predicted, rmse

In [10]:
df_predicted, rmse = test_model(df_test, model)

In [11]:
df_predicted[['ds', 'yhat', 'yhat_upper', 'yhat_lower']]

Unnamed: 0,ds,yhat,yhat_upper,yhat_lower
0,2022-03-01,3.140839,3.518895,2.744342
1,2022-04-01,3.116249,3.536859,2.76262
2,2022-05-01,3.073837,3.443537,2.70933
3,2022-06-01,3.063025,3.437324,2.664636
4,2022-07-01,3.051551,3.430613,2.636729
5,2022-08-01,3.046084,3.413936,2.626206
6,2022-09-01,3.044429,3.422596,2.655875
7,2022-10-01,3.03713,3.450356,2.625059
8,2022-11-01,3.015104,3.389279,2.610453
9,2022-12-01,3.000395,3.409444,2.597934


In [12]:
rmse

0.07215052847565874

## Final Model

In [13]:
model = Prophet().fit(df_mrd)

### Save Model

In [14]:
with open(f'{country}_final_model.json', 'w') as f:
    f.write(model_to_json(model))

## Final Results

In [15]:
df_future = pandas.DataFrame(data = {'ds': ['2023-03-01', '2023-04-01', '2023-05-01']})

In [16]:
df_future_prediction: pandas.DataFrame = model.predict(df_future)

In [17]:
df_future_prediction[['ds', 'yhat', 'yhat_upper', 'yhat_lower']]

Unnamed: 0,ds,yhat,yhat_upper,yhat_lower
0,2023-03-01,3.123132,3.45362,2.779315
1,2023-04-01,3.071937,3.437925,2.723891
2,2023-05-01,2.996328,3.34055,2.604131
