In [None]:
import pandas as pd
import numpy as np
from pandas import read_csv
from pandas import datetime
from matplotlib import pyplot as plt
from pandas import Series
from sklearn.metrics import mean_squared_error
from datetime import datetime
from ipywidgets import IntProgress
from multiprocessing import Pool, cpu_count
import pickle

In [None]:
from math import sqrt
from multiprocessing import cpu_count
from joblib import Parallel
from joblib import delayed
from warnings import catch_warnings
from warnings import filterwarnings
from fbprophet import Prophet

In [None]:
import os

In [None]:
from tqdm import tqdm_notebook

In [None]:
import warnings
warnings.filterwarnings('ignore')

### GLOBAL VARIABLES

In [None]:
INPUT_PATH = '../../../data/processed'
INPUT_FILE_NAME = 'dataproc_v002_HNNMF'
MODEL_PATH = '../../../models/prophet'
MODEL_NAME = 'prophet_004'
OUTPUT_PATH = '../../../data/submission'
OUTPUT_NAME = 'submission_008'
NRUN = 2
DAYS_PRED = 28
METRIC = 'rmse'

### FUNCTIONS

In [None]:
def on_kaggle():
    return "KAGGLE_KERNEL_RUN_TYPE" in os.environ

In [None]:
def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

In [None]:
# score a model, return None on failure
def run_prophet(train, period=DAYS_PRED):
    cfg_prophet = dict()
    cfg_prophet['yearly_seasonality'] = True
    cfg_prophet['daily_seasonality'] = False
    cfg_prophet['weekly_seasonality'] = True
    cfg_prophet['seasonality_prior_scale'] = 0.1
   
    m = Prophet(**cfg_prophet)

    # show all warnings and fail on exception if debugging
    with catch_warnings():
        filterwarnings("ignore")

        m.fit(train)
        future = m.make_future_dataframe(periods=period,include_history=False)

        forecast = m.predict(future)

    return forecast

In [None]:
def make_submission(test, submission):
    preds = test[["id", "date", "demand"]]
    preds = preds.pivot(index="id", columns="date", values="demand").reset_index()
    preds.columns = ["id"] + ["F" + str(d + 1) for d in range(DAYS_PRED)]

    vals = submission[["id"]].merge(preds, how="inner", on="id")
    evals = submission[submission["id"].str.endswith("evaluation")]
    final = pd.concat([vals, evals])

    assert final.drop("id", axis=1).isnull().sum().sum() == 0
    assert final["id"].equals(submission["id"])

    if on_kaggle():
        final.to_csv("submission.csv", index=False)
    else:
        final.to_csv(f"{OUTPUT_PATH}/{OUTPUT_NAME}.csv", index=False)

### LOAD DATASET

In [None]:
data = pd.read_pickle(f'{INPUT_PATH}/{INPUT_FILE_NAME}.pkl')

In [None]:
submission = pd.read_csv(f'{INPUT_PATH}/sample_submission.csv')

In [None]:
train = data[data.part == 'train'] # select only train data

In [None]:
features = ['id','date', 'demand_hankel']

In [None]:
train = train[features]

In [None]:
train.sort_values(['id','date'], inplace=True, ascending=True)

In [None]:
idx_vector = train.id.unique()

In [None]:
train_vector = []
STEP=703
init = 0
for n in tqdm_notebook(range(1,train.id.nunique()+1)):
    end = STEP*n
    ts = train.iloc[init:end, -2:]
    ts.columns = ['ds', 'y']
    train_vector.append(ts)
    init+=STEP
    

## TRAIN MODEL

In [None]:
tic = datetime.now()
p = Pool(cpu_count())
predictions = list(tqdm_notebook(p.imap(run_prophet, train_vector), total=len(train_vector)))
p.close()
p.join()
toc = datetime.now()
print("Total time " ,(toc - tic).seconds/60, " min")

In [None]:
pickle.dump(predictions, open(f'{MODEL_PATH}/{MODEL_NAME}.pkl', 'wb'))

In [None]:
test = pd.DataFrame() 
for idx, pred in zip(idx_vector,tqdm_notebook(predictions)):
    df =pred.iloc[:,[0,-1]]
    df['id'] = idx
    test = test.append(df)
    

In [None]:
test.columns = ["date", "demand", "id"]

In [None]:
make_submission(test,submission)