In [19]:
from typing import Tuple

import itertools

import pandas

from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics
from prophet.serialize import model_to_json, model_from_json

from sklearn.metrics import mean_squared_error

In [4]:
country = 'france'

df = pandas.read_excel(io = f'../../../data/processed/{country}.xlsx')

In [5]:
def make_dataset(df_processed: pandas.DataFrame, df_covid_measures: pandas.DataFrame = pandas.DataFrame()) -> Tuple[pandas.DataFrame, pandas.DataFrame]:
    df_mrd = df_processed[['Time', 'Unemployment_Rate_TOT']].rename(
        columns = {'Time': 'ds', 'Unemployment_Rate_TOT': 'y'}
    )
    df_mrd = df_mrd.drop(index = df_mrd[pandas.isnull(df_mrd['y'])].index, inplace = False)
    df_covid_holidays = pandas.DataFrame()
    return df_mrd, df_covid_holidays

In [6]:
df_mrd, _ = make_dataset(df)

In [7]:
def train_test_split(df_mrd: pandas.DataFrame, test_size: int = 12) -> Tuple[pandas.DataFrame, pandas.DataFrame]:
    df_test = df_mrd.tail(test_size)
    df_train = df_mrd.drop(index = df_mrd.tail(test_size).index, inplace = False)
    return df_train, df_test

In [8]:
df_train, df_test = train_test_split(df_mrd, 12)

In [13]:
def train_model(df: pandas.DataFrame, param_grid: dict, eval_metric: str = 'rmse', eval_metric_optimize: str = 'min'):
    # Generate all combinations of parameters
    all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
    eval_metric_values = []  # Store the eval_metric_values for each params here

    # Use cross validation to evaluate all parameters
    for params in all_params:
        m = Prophet(**params).fit(df)  # Fit model with given params
        df_cv = cross_validation(m, horizon = '60 days', parallel="processes")
        df_p = performance_metrics(df_cv, rolling_window=1, monthly=True)
        eval_metric_values.append(df_p[eval_metric].values[0])

    # Find the best parameters
    tuning_results = pandas.DataFrame(all_params)
    tuning_results[eval_metric] = eval_metric_values

    tuning_results.sort_values(by = eval_metric, axis = 'index', ascending = True, inplace = True)
    best_params = {}
    if eval_metric_optimize == 'min':
        best_params = tuning_results.iloc[0].to_dict()
    elif eval_metric_optimize == 'max':
        best_params = tuning_results.iloc[-1].to_dict()
    else:
        raise ValueError("Invalid Parameter Value: param 'eval_metric_optimize' may only have values 'min' or 'max'.")


    optimal_model = Prophet(**params).fit(df)
    return optimal_model

In [14]:
param_grid = {  
    'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],
    'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
}

model = train_model(df_train, param_grid)

00:53:29 - cmdstanpy - INFO - Chain [1] start processing
00:53:29 - cmdstanpy - INFO - Chain [1] done processing
00:53:29 - cmdstanpy - ERROR - Chain [1] error: error during processing Communication error on send
Optimization terminated abnormally. Falling back to Newton.
00:53:29 - cmdstanpy - INFO - Chain [1] start processing
00:53:31 - cmdstanpy - INFO - Chain [1] done processing
00:53:31 - cmdstanpy - INFO - Chain [1] start processing
00:53:31 - cmdstanpy - INFO - Chain [1] start processing
00:53:31 - cmdstanpy - INFO - Chain [1] start processing
00:53:31 - cmdstanpy - INFO - Chain [1] start processing
00:53:31 - cmdstanpy - INFO - Chain [1] start processing
00:53:31 - cmdstanpy - INFO - Chain [1] start processing
00:53:31 - cmdstanpy - INFO - Chain [1] start processing
00:53:31 - cmdstanpy - INFO - Chain [1] start processing
00:53:31 - cmdstanpy - INFO - Chain [1] done processing
00:53:31 - cmdstanpy - INFO - Chain [1] done processing
00:53:31 - cmdstanpy - INFO - Chain [1] done p

In [15]:
def test_model(df_test: pandas.DataFrame, model):
    "return predicted values and rmse"
    df_predicted = model.predict(df_test)
    rmse = mean_squared_error(y_true = df_test['y'], y_pred = df_predicted['yhat'], squared = False)
    return df_predicted, rmse

In [16]:
df_predicted, rmse = test_model(df_test, model)

In [17]:
df_predicted

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2022-03-01,7.477145,7.192501,7.705617,7.477145,7.477145,-0.029348,-0.029348,-0.029348,-0.029348,-0.029348,-0.029348,0.0,0.0,0.0,7.447797
1,2022-04-01,7.443042,7.150717,7.677763,7.443042,7.443042,-0.036748,-0.036748,-0.036748,-0.036748,-0.036748,-0.036748,0.0,0.0,0.0,7.406294
2,2022-05-01,7.410039,7.127217,7.653162,7.410039,7.410039,-0.030116,-0.030116,-0.030116,-0.030116,-0.030116,-0.030116,0.0,0.0,0.0,7.379924
3,2022-06-01,7.375936,7.079268,7.642682,7.366326,7.375936,-0.015331,-0.015331,-0.015331,-0.015331,-0.015331,-0.015331,0.0,0.0,0.0,7.360605
4,2022-07-01,7.342933,7.078375,7.642513,7.30069,7.360095,0.017364,0.017364,0.017364,0.017364,0.017364,0.017364,0.0,0.0,0.0,7.360297
5,2022-08-01,7.30883,6.999217,7.616217,7.231457,7.351569,0.008436,0.008436,0.008436,0.008436,0.008436,0.008436,0.0,0.0,0.0,7.317266
6,2022-09-01,7.274727,6.954901,7.590659,7.161785,7.35179,0.01606,0.01606,0.01606,0.01606,0.01606,0.01606,0.0,0.0,0.0,7.290787
7,2022-10-01,7.241725,6.905893,7.579507,7.084051,7.35351,0.011578,0.011578,0.011578,0.011578,0.011578,0.011578,0.0,0.0,0.0,7.253303
8,2022-11-01,7.207622,6.842215,7.581786,6.987115,7.361235,0.029989,0.029989,0.029989,0.029989,0.029989,0.029989,0.0,0.0,0.0,7.237611
9,2022-12-01,7.174619,6.818007,7.570819,6.905816,7.366174,0.028139,0.028139,0.028139,0.028139,0.028139,0.028139,0.0,0.0,0.0,7.202758


In [18]:
rmse

0.09831839132623509

In [22]:
with open(f'{country}_prophet_cv_model.json', 'w') as f:
    f.write(model_to_json(model))

In [21]:
df_predicted

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2022-03-01,7.477145,7.192501,7.705617,7.477145,7.477145,-0.029348,-0.029348,-0.029348,-0.029348,-0.029348,-0.029348,0.0,0.0,0.0,7.447797
1,2022-04-01,7.443042,7.150717,7.677763,7.443042,7.443042,-0.036748,-0.036748,-0.036748,-0.036748,-0.036748,-0.036748,0.0,0.0,0.0,7.406294
2,2022-05-01,7.410039,7.127217,7.653162,7.410039,7.410039,-0.030116,-0.030116,-0.030116,-0.030116,-0.030116,-0.030116,0.0,0.0,0.0,7.379924
3,2022-06-01,7.375936,7.079268,7.642682,7.366326,7.375936,-0.015331,-0.015331,-0.015331,-0.015331,-0.015331,-0.015331,0.0,0.0,0.0,7.360605
4,2022-07-01,7.342933,7.078375,7.642513,7.30069,7.360095,0.017364,0.017364,0.017364,0.017364,0.017364,0.017364,0.0,0.0,0.0,7.360297
5,2022-08-01,7.30883,6.999217,7.616217,7.231457,7.351569,0.008436,0.008436,0.008436,0.008436,0.008436,0.008436,0.0,0.0,0.0,7.317266
6,2022-09-01,7.274727,6.954901,7.590659,7.161785,7.35179,0.01606,0.01606,0.01606,0.01606,0.01606,0.01606,0.0,0.0,0.0,7.290787
7,2022-10-01,7.241725,6.905893,7.579507,7.084051,7.35351,0.011578,0.011578,0.011578,0.011578,0.011578,0.011578,0.0,0.0,0.0,7.253303
8,2022-11-01,7.207622,6.842215,7.581786,6.987115,7.361235,0.029989,0.029989,0.029989,0.029989,0.029989,0.029989,0.0,0.0,0.0,7.237611
9,2022-12-01,7.174619,6.818007,7.570819,6.905816,7.366174,0.028139,0.028139,0.028139,0.028139,0.028139,0.028139,0.0,0.0,0.0,7.202758


In [23]:
df_future = pandas.DataFrame(data = {'ds': ['2023-03-01', '2023-04-01', '2023-05-01']})

In [24]:
df_future_prediction = model.predict(df_future)

In [25]:
df_future_prediction

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2023-03-01,7.07561,6.76032,7.276107,7.07561,7.07561,-0.047068,-0.047068,-0.047068,-0.047068,-0.047068,-0.047068,0.0,0.0,0.0,7.028542
1,2023-04-01,7.041507,6.729585,7.241449,7.041507,7.041507,-0.049739,-0.049739,-0.049739,-0.049739,-0.049739,-0.049739,0.0,0.0,0.0,6.991768
