# Baseline II: Facebook Prophet

[[Prophet]](https://facebook.github.io/prophet/)

In [2]:
from datetime import timedelta, datetime
import itertools
import json
import matplotlib.pyplot as plt
from matplotlib import rcParams
import numpy as np
import os
import pandas as pd
from pathlib import Path
import mlflow
from fbprophet import Prophet
import yaml

# Get the current project path (where you open the notebook)
# and go up two levels to get the project path
current_dir = Path.cwd()
proj_path = current_dir.parent.parent

# make the code in src available to import in this notebook
import sys
sys.path.append(os.path.join(proj_path,'src'))

from metrics import mean_absolute_percentage_error, get_metrics
from utils import make_dates, create_folder

# Catalog contains all the paths related to datasets
with open(os.path.join(proj_path, 'conf/catalog.yml'), "r") as f:
    catalog = yaml.safe_load(f)['breakfast']
    
# Params contains all of the dataset creation parameters and model parameters
with open(os.path.join(proj_path, 'conf/params.yml'), "r") as f:
    params = yaml.safe_load(f)

# Experiment

In [4]:
# mlflow.set_tracking_uri(os.path.join(proj_path, 'logging'))
# mlflow.set_experiment('experiment-prophet-delete')
# Create mlflow tracking folder
create_folder(os.path.join(proj_path, 'mlruns'))

# Step 1: Read data
merged_data = pd.read_csv(os.path.join(proj_path, 
                                       catalog['output_dir']['dir'], 
                                       catalog['output_dir']['merged']))
merged_data['WEEK_END_DATE'] = pd.to_datetime(merged_data['WEEK_END_DATE'])
merged_data['WEEK_END_DATE'] = merged_data['WEEK_END_DATE'] + timedelta(days=3)

# Step2: Create date folds
date_ranges = make_dates(params['breakfast']['experiment_dates'])

# stores = params['breakfast']['dataset']['store_ids']
# upcs = params['breakfast']['dataset']['upcs']
# full_search = list(itertools.product(stores, upcs))


# Step 3: Iterate over each store and upc pair.
# For each pair, iterate over each period (fold), find the optimal
# set of parameters for that fold and make the predictions
# on the test period
stores = list(params['breakfast']['dataset']['store_ids'].keys())
upcs = list(params['breakfast']['dataset']['upc_ids'].keys())
store_upc_pairs = list(itertools.product(stores, upcs))

for store_id, upc_id in store_upc_pairs: 
    print(f'Processing store {store_id} upc {upc_id}')
    mlflow.set_tracking_uri(os.path.join(proj_path, 'mlruns'))
    mlflow.set_experiment(f'{store_id}_{upc_id}')
    
    mlflow.runName = 'prophet_' + str(datetime.today())[:19]
    # Iterate over each period, unpack tuple in each variable.
    # in each of the period, we will find the best set of parameters,
    # which will represent the time-series cross validation methodology
    for _, train_start, train_end, valid_start, valid_end, test_start, test_end in date_ranges.itertuples():
        print(f'Processing range {str(train_start.date())} to {str(test_end.date())}')

        train_x = merged_data[(merged_data['WEEK_END_DATE']>=train_start) &
                                  (merged_data['WEEK_END_DATE']<=valid_end) &
                                  (merged_data['STORE_NUM']==store_id) &
                                  (merged_data['UPC']==upc_id)][['WEEK_END_DATE','UNITS']]
        # Doesn't need a validation period.
        test_y = merged_data[(merged_data['WEEK_END_DATE']>=test_start) &
                             (merged_data['WEEK_END_DATE']<=test_end) &
                             (merged_data['STORE_NUM']==store_id) &
                             (merged_data['UPC']==upc_id)][['WEEK_END_DATE','UNITS']]
        # Prophet expects two columns, one with the label 'ds' for the dates and y for the values
        train_x = train_x.rename(columns={'WEEK_END_DATE':'ds', 'UNITS':'y'})
        test_y = test_y.rename(columns={'WEEK_END_DATE':'ds', 'UNITS':'y'})

        # Iterate over the periods to make next-day forecasts
        predictions = []
        for i in range(test_y.shape[0]):

            #Instantiate a new Prophet object that represents the model
            model = Prophet(weekly_seasonality=True,
                            yearly_seasonality=True,
                            daily_seasonality=False)

            #Call the built-in holiday collection for US to be included in the model
            model.add_country_holidays(country_name='US')

            # Fit the FB Prohpet Model
            model.fit(pd.concat([train_x.iloc[i:], test_y.iloc[:i]]))
            future = model.make_future_dataframe(periods=1, freq='7D')
            fcst = model.predict(future)['yhat'].iloc[-1]
            predictions.append(fcst)
        
        run_metrics = get_metrics(test_y['y'].values, predictions)
        
        # store predictions
        fdir = os.path.join(proj_path, catalog['results']['dir'], f'{str(test_end.date())}')
        fname = os.path.join(fdir, f'prophet_{store_id}_{upc_id}.csv')
        create_folder(fdir)

        test_y['preds'] = predictions
        
        test_y.to_csv(fname)
        with mlflow.start_run():
            mlflow.log_artifact(fname)
            mlflow.log_param('model','prophet')
            mlflow.log_metrics(run_metrics)


Processing store 2277 upc 1600027527
Processing range 2009-01-17 to 2011-01-29
