In [4]:
import sys
import numpy as np
import pandas as pd
from statsmodels.tsa.arima_model import ARIMA

sys.path.append('./sources')
from models import predict_knn, predict_LDMM, predict_SAME
%load_ext autoreload
%autoreload 2

### Loading data

In [5]:
data = pd.read_csv('data/data.csv')

In [6]:
data = data.to_numpy()[:,1:].astype(np.float)

In [7]:
#Excluding seasonability 
timestamps = np.arange(data.shape[0])
mask = np.tile(np.append(np.ones(1), np.append(np.zeros(10), np.ones(1))), 20)[:-2]
timestamps = timestamps[mask==0]
data = data[mask==0]

### Setting parameters

In [8]:
num_series = data.shape[1]
# Number of test months
n_test = 40
# LDMM parameters
h_sqr = 0.001
bandwidth_ldmm, lambd, mu, n_iter_ldmm = 11, h_sqr / 7.0, 1500.0,  7
nn_ldmm = [60, 10, 7, 7]
# SAME parameters
bandwidth_same, tau, n_iter_same = 3, 1.0, 21
nn_same = [9, 21, 21, 15]
# kNN parameters
bandwidth_knn = 12
nn_knn = [5, 30, 30, 30]

In [15]:
losses = []
for lookfront in [1,2,3,4]:
    predictions_knn   = np.empty((n_test, lookfront, num_series))
    predictions_ldmm  = np.empty((n_test, lookfront, num_series))
    predictions_same  = np.empty((n_test, lookfront, num_series))
    predictions_arima = np.empty((n_test, lookfront, num_series))

    outcomes = np.empty((n_test, lookfront, num_series))
    for i in range(n_test):

        Y_train_knn   = data[:-n_test+i+1-lookfront, :]
        Y_train_ldmm  = data[:-n_test+i+1-lookfront, :]
        Y_train_same  = data[:-n_test+i+1-lookfront, :]
        Y_train_arima = data[:-n_test+i+1-lookfront, :]

        for k in range(lookfront):
            timestamps_train = timestamps[:-n_test+i+1-lookfront+k]
            timestamp_test = timestamps[-n_test+i+1-lookfront+k]
            Y_test = data[-n_test+i+1-lookfront+k, :]

            predictions_knn[i, k, :] = predict_knn(timestamps_train, Y_train_ldmm, timestamp_test,\
                                                   bandwidth_knn, nn_knn)[:]
            predictions_ldmm[i, k, :] = predict_LDMM(timestamps_train, Y_train_ldmm, timestamp_test,\
                                                    bandwidth_ldmm, lambd, mu, h_sqr, n_iter_ldmm, nn_ldmm)[:]
            predictions_same[i, k, :] = predict_SAME(timestamps_train, Y_train_same, timestamp_test,\
                                                    bandwidth_same, tau, n_iter_same, nn_same)[:]
            for t in range(num_series):
                arima_model = ARIMA(Y_train_arima[:, t], order=(6,1,0))
                predictions_arima[i, k, t] = arima_model.fit(disp=0, trend='nc').forecast(steps=1)[0] 


            outcomes[i, k, :] = Y_test[:]
            Y_train_knn = np.append(Y_train_knn, predictions_knn[i, k, :].reshape(1,-1), axis=0)
            Y_train_ldmm = np.append(Y_train_ldmm, predictions_ldmm[i, k, :].reshape(1,-1), axis=0)
            Y_train_same = np.append(Y_train_same, predictions_same[i, k, :].reshape(1,-1), axis=0)
            Y_train_arima = np.append(Y_train_arima, predictions_arima[i, k, :].reshape(1,-1), axis=0)


    new_loss_knn = np.mean((predictions_knn[:,-1]-outcomes[:,-1])**2/ outcomes[:,-1]**2, axis=0)
    new_loss_ldmm = np.mean((predictions_ldmm[:,-1]-outcomes[:,-1])**2 / outcomes[:,-1]**2, axis=0)
    new_loss_same = np.mean((predictions_same[:,-1]-outcomes[:,-1])**2 / outcomes[:,-1]**2, axis=0)
    new_loss_arima = np.mean((predictions_arima[:,-1]-outcomes[:,-1])**2 / outcomes[:,-1]**2, axis=0)

    print(lookfront)
    print('knn', new_loss_knn)
    print('ldmm', new_loss_ldmm)
    print('same', new_loss_same)
    print('arima', new_loss_arima)

    print()


1
knn [0.00167857 0.00393715 0.00239861 0.00177453]
ldmm [0.00155654 0.00243162 0.00152581 0.00142134]
same [0.00116254 0.0018553  0.00151837 0.0016549 ]
arima [0.00834563 0.00213668 0.00145322 0.0015864]

2
knn [0.00176320 0.00435124 0.00303796 0.00427745]
ldmm [0.0014592 0.0028762 0.00198019 0.0042891]
same [0.001451 0.00245318 0.00209719 0.00394271]
arima [0.0017620 0.00249471 0.00194026 0.00375188]

3
knn [0.0030488  0.00709743 0.0042438  0.00584049]
ldmm [0.00281301 0.00402242 0.00293258 0.00486501]
same [0.00189626 0.00334985 0.00278709 0.00510723]
arima [0.00183594 0.0031657  0.00202664 0.00488623]

4
knn [0.00314676 0.00723583 0.00450714 0.0060287 ]
ldmm [0.00304857 0.00387254 0.00345843 0.00501546]
same [0.00203765  0.00360721 0.00364687 0.00558677]
arima [0.00199366 0.00331548 0.00212741 0.00512351]

