# Germany - Final Model

## Fetch Pre-requisites

In [18]:
from typing import Tuple

import logging

import pandas

from prophet import Prophet
from prophet.serialize import model_to_json, model_from_json

from sklearn.metrics import mean_squared_error

In [19]:
logger = logging.getLogger('cmdstanpy')
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.CRITICAL)

logger = logging.getLogger('prophet')
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.CRITICAL)

logger = logging.getLogger('fbprophet')
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.CRITICAL)

In [20]:
country = 'germany'

df = pandas.read_excel(io = f'../../../data/processed/{country}.xlsx')

## Data Preparation

In [21]:
def make_dataset(df_processed: pandas.DataFrame, df_covid_measures: pandas.DataFrame = pandas.DataFrame()) -> Tuple[pandas.DataFrame, pandas.DataFrame]:
    df_mrd = df_processed[['Time', 'Unemployment_Rate_TOT']].rename(
        columns = {'Time': 'ds', 'Unemployment_Rate_TOT': 'y'}
    )
    df_mrd = df_mrd.drop(index = df_mrd[pandas.isnull(df_mrd['y'])].index, inplace = False)
    return df_mrd, df_covid_measures

In [22]:
df_mrd, _ = make_dataset(df)

In [23]:
def train_test_split(df_mrd: pandas.DataFrame, test_size: int = 12) -> Tuple[pandas.DataFrame, pandas.DataFrame]:
    df_test = df_mrd.tail(test_size)
    df_train = df_mrd.drop(index = df_mrd.tail(test_size).index, inplace = False)
    return df_train, df_test

In [24]:
df_train, df_test = train_test_split(df_mrd, 12)

## Model Training and Testing

In [25]:
model = Prophet(interval_width=0.95).fit(df_train)

In [26]:
def test_model(df_test: pandas.DataFrame, model: Prophet) -> Tuple[pandas.DataFrame, float]:
    "return predicted values and rmse"
    df_predicted: pandas.DataFrame = model.predict(df_test)
    rmse: float = mean_squared_error(y_true = df_test['y'], y_pred = df_predicted['yhat'], squared = False)
    return df_predicted, rmse

In [27]:
df_predicted, rmse = test_model(df_test, model)

In [28]:
df_predicted[['ds', 'yhat', 'yhat_upper', 'yhat_lower']]

Unnamed: 0,ds,yhat,yhat_upper,yhat_lower
0,2022-03-01,3.140839,3.729709,2.575361
1,2022-04-01,3.116249,3.675472,2.533573
2,2022-05-01,3.073837,3.677421,2.474881
3,2022-06-01,3.063025,3.663279,2.484457
4,2022-07-01,3.051551,3.640152,2.501345
5,2022-08-01,3.046084,3.632216,2.388679
6,2022-09-01,3.044429,3.640863,2.417898
7,2022-10-01,3.03713,3.618238,2.430586
8,2022-11-01,3.015104,3.603431,2.432631
9,2022-12-01,3.000395,3.634844,2.327215


In [29]:
rmse

0.07215052847565874

## Final Model

In [30]:
model = Prophet(interval_width=0.95).fit(df_mrd)

### Save Model

In [31]:
with open(f'{country}_final_model.json', 'w') as f:
    f.write(model_to_json(model))

## Final Results

In [32]:
df_future = pandas.DataFrame(data = {'ds': ['2023-03-01', '2023-04-01', '2023-05-01']})

In [33]:
df_future_prediction: pandas.DataFrame = model.predict(df_future)

In [34]:
df_future_prediction[['ds', 'yhat', 'yhat_upper', 'yhat_lower']]

Unnamed: 0,ds,yhat,yhat_upper,yhat_lower
0,2023-03-01,3.123132,3.627197,2.561803
1,2023-04-01,3.071937,3.643932,2.58361
2,2023-05-01,2.996328,3.5309,2.481786
