In [19]:
import sys
import numpy as np
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from scipy.stats import boxcox
import matplotlib.pyplot as plt 
sys.path.append('./sources')
from models import predict_knn, predict_LDMM, predict_SAME, predict_AMD
from preprocessing import dao

import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
%load_ext autoreload
%autoreload 2

l2 = False

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Loading data

In [20]:
data = pd.read_csv('data/data_new.csv')

In [21]:
w = 6
data = data.rolling(w).mean().iloc[w-1:]

In [22]:
data = data.to_numpy()[:,1:].astype(np.float)

In [23]:
#Excluding seasonability 
timestamps = np.arange(data.shape[0])
# mask = np.tile(np.append(np.ones(1), np.append(np.zeros(10), np.ones(1))), 20)[:-7]
# timestamps = timestamps[mask==0]
# data = data[mask==0]
for i in range(data.shape[1]):
      data[:, i] = dao(data[:, i])  
#     data[:, i], _ = boxcox(data[:, i])

### Setting parameters

In [24]:
num_series = data.shape[1]
# Number of test months
n_test = 40
# LDMM parameters
h_sqr = 0.001
bandwidth_ldmm, lambd_ldmm, mu, n_iter_ldmm = 11, h_sqr / 7.0, 1500.0,  7
# nn_ldmm = 15
# SAME parameters
bandwidth_same, tau, n_iter_same = 3, 1.0, 21
# nn_same = 15
# kNN parameters
bandwidth_knn = 12
# nn_knn = 15
# ADM parameters
lambd_adm = 3.
# nn_adm = 15
n_iter_adm = 3

In [25]:
losses_ldmm, losses_ldmm_l1 = [], []
losses_same, losses_same_l1 = [], []
losses_knn, losses_knn_l1 = [], []
losses_adm, losses_adm_l1 = [], []
candidates = [1, 5, 9, 20, 50]
for n_neighbors in candidates:
    nn_ldmm = n_neighbors
    nn_same = n_neighbors
    nn_knn = n_neighbors
    nn_adm = n_neighbors
    lookfront = 1
    predictions_knn   = np.empty((n_test, lookfront, num_series))
    predictions_ldmm  = np.empty((n_test, lookfront, num_series))
    predictions_same  = np.empty((n_test, lookfront, num_series))
    predictions_arima = np.empty((n_test, lookfront, num_series))
    predictions_adm = np.empty((n_test, lookfront, num_series))

    outcomes = np.empty((n_test, lookfront, num_series))
    for i in range(n_test):

        Y_train_knn   = data[:-n_test+i+1-lookfront, :]
        Y_train_ldmm  = data[:-n_test+i+1-lookfront, :]
        Y_train_same  = data[:-n_test+i+1-lookfront, :]
        Y_train_arima = data[:-n_test+i+1-lookfront, :]
        Y_train_adm = data[:-n_test+i+1-lookfront, :]

        for k in range(lookfront):
            timestamps_train = timestamps[:-n_test+i+1-lookfront+k]
            timestamp_test = timestamps[-n_test+i+1-lookfront+k]
            Y_test = data[-n_test+i+1-lookfront+k, :]

            predictions_knn[i, k, :] = predict_knn(timestamps_train, Y_train_ldmm, timestamp_test,\
                                                   bandwidth_knn, nn_knn)[:]
            predictions_ldmm[i, k, :] = predict_LDMM(timestamps_train, Y_train_ldmm, timestamp_test,\
                                                    bandwidth_ldmm, lambd_ldmm, mu, h_sqr, n_iter_ldmm, nn_ldmm)[:]
            predictions_same[i, k, :] = predict_SAME(timestamps_train, Y_train_same, timestamp_test,\
                                                    bandwidth_same, tau, n_iter_same, nn_same)[:]
            predictions_adm[i, k, :] = predict_AMD(timestamps_train, Y_train_same, timestamp_test,\
                                                    bandwidth_same, lambd_adm, n_iter_adm, nn_adm)[:]
            for t in range(num_series):
                arima_model = ARIMA(Y_train_arima[:, t], order=(6,1,0))
                predictions_arima[i, k, t] = arima_model.fit().forecast(steps=1)[0] 


            outcomes[i, k, :] = Y_test[:]
            Y_train_knn = np.append(Y_train_knn, predictions_knn[i, k, :].reshape(1,-1), axis=0)
            Y_train_ldmm = np.append(Y_train_ldmm, predictions_ldmm[i, k, :].reshape(1,-1), axis=0)
            Y_train_same = np.append(Y_train_same, predictions_same[i, k, :].reshape(1,-1), axis=0)
            Y_train_arima = np.append(Y_train_arima, predictions_arima[i, k, :].reshape(1,-1), axis=0)
            Y_train_adm = np.append(Y_train_arima, predictions_arima[i, k, :].reshape(1,-1), axis=0)


    new_loss_knn = np.mean((predictions_knn[:,-1]-outcomes[:,-1])**2/ outcomes[:,-1]**2, axis=0)
    new_loss_ldmm = np.mean((predictions_ldmm[:,-1]-outcomes[:,-1])**2 / outcomes[:,-1]**2, axis=0)
    new_loss_same = np.mean((predictions_same[:,-1]-outcomes[:,-1])**2 / outcomes[:,-1]**2, axis=0)
    new_loss_arima = np.mean((predictions_arima[:,-1]-outcomes[:,-1])**2 / outcomes[:,-1]**2, axis=0)
    new_loss_adm = np.mean((predictions_adm[:,-1]-outcomes[:,-1])**2 / outcomes[:,-1]**2, axis=0)

    new_loss_knn_l1= np.mean(np.abs(predictions_knn[:,-1]-outcomes[:,-1])/ np.abs(outcomes[:,-1]), axis=0)
    new_loss_ldmm_l1 = np.mean(np.abs(predictions_ldmm[:,-1]-outcomes[:,-1]) / np.abs(outcomes[:,-1]), axis=0)
    new_loss_same_l1 = np.mean(np.abs(predictions_same[:,-1]-outcomes[:,-1]) / np.abs(outcomes[:,-1]), axis=0)
    new_loss_arima_l1 = np.mean(np.abs(predictions_arima[:,-1]-outcomes[:,-1]) / np.abs(outcomes[:,-1]), axis=0)
    new_loss_adm_l1 = np.mean(np.abs(predictions_adm[:,-1]-outcomes[:,-1]) / np.abs(outcomes[:,-1]), axis=0)
    
    print(lookfront)
    print('knn {:.3f} {:.3f}'.format(np.mean(new_loss_knn), np.mean(new_loss_knn_l1)))
    print('ldmm {:.3f} {:.3f}'.format(np.mean(new_loss_ldmm), np.mean(new_loss_ldmm_l1)))
    print('same {:.3f} {:.3f}'.format(np.mean(new_loss_same), np.mean(new_loss_same_l1)))
    print('arima {:.3f} {:.3f}'.format(np.mean(new_loss_arima), np.mean(new_loss_arima_l1)))
    print('adm {:.3f} {:.3f}'.format(np.mean(new_loss_adm), np.mean(new_loss_adm_l1)))
    
    losses_knn.append(np.mean(new_loss_knn))
    losses_ldmm.append(np.mean(new_loss_ldmm))
    losses_same.append(np.mean(new_loss_same))
    losses_adm.append(np.mean(new_loss_adm))
    
    losses_knn_l1.append(np.mean(new_loss_knn_l1))
    losses_ldmm_l1.append(np.mean(new_loss_ldmm_l1))
    losses_same_l1.append(np.mean(new_loss_same_l1))
    losses_adm_l1.append(np.mean(new_loss_adm_l1))

    print()
        
print('knn', candidates[np.argmin(losses_knn)])
print('ldmm', candidates[np.argmin(losses_ldmm)])
print('same', candidates[np.argmin(losses_same)])
print('adm', candidates[np.argmin(losses_adm)])

print('knn', candidates[np.argmin(losses_knn_l1)])
print('ldmm', candidates[np.argmin(losses_ldmm_l1)])
print('same', candidates[np.argmin(losses_same_l1)])
print('adm', candidates[np.argmin(losses_adm_l1)])

  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'


1
knn 0.001 0.012
ldmm 0.001 0.012
same 0.001 0.012
arima 0.000 0.004
adm 0.001 0.012



  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'


1
knn 0.000 0.013
ldmm 0.000 0.013
same 0.000 0.013
arima 0.000 0.004
adm 0.000 0.013



  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'


1
knn 0.000 0.013
ldmm 0.000 0.013
same 0.000 0.013
arima 0.000 0.004
adm 0.000 0.013



  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'


1
knn 0.000 0.012
ldmm 0.000 0.012
same 0.000 0.012
arima 0.000 0.004
adm 0.000 0.012



  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'


1
knn 0.000 0.012
ldmm 0.000 0.012
same 0.000 0.012
arima 0.000 0.004
adm 0.000 0.012

knn 50
ldmm 50
same 50
adm 50
knn 50
ldmm 50
same 50
adm 50


In [26]:
if l2:
    nn_knn = candidates[np.argmin(losses_knn)]
    nn_ldmm = candidates[np.argmin(losses_ldmm)]
    nn_same = candidates[np.argmin(losses_same)]
    nn_adm = candidates[np.argmin(losses_adm)]
else:
    nn_knn = candidates[np.argmin(losses_knn_l1)]
    nn_ldmm =candidates[np.argmin(losses_ldmm_l1)]
    nn_same =candidates[np.argmin(losses_same_l1)]
    nn_adm = candidates[np.argmin(losses_adm_l1)]

lookfronts = [1,2,3,4]
print(nn_knn, nn_ldmm, nn_same, nn_adm)

for lookfront in lookfronts:
    predictions_knn   = np.empty((n_test, lookfront, num_series))
    predictions_ldmm  = np.empty((n_test, lookfront, num_series))
    predictions_same  = np.empty((n_test, lookfront, num_series))
    predictions_arima = np.empty((n_test, lookfront, num_series))
    predictions_adm = np.empty((n_test, lookfront, num_series))

    outcomes = np.empty((n_test, lookfront, num_series))
    for i in range(n_test):

        Y_train_knn   = data[:-n_test+i+1-lookfront, :]
        Y_train_ldmm  = data[:-n_test+i+1-lookfront, :]
        Y_train_same  = data[:-n_test+i+1-lookfront, :]
        Y_train_arima = data[:-n_test+i+1-lookfront, :]
        Y_train_adm = data[:-n_test+i+1-lookfront, :]

        for k in range(lookfront):
            timestamps_train = timestamps[:-n_test+i+1-lookfront+k]
            timestamp_test = timestamps[-n_test+i+1-lookfront+k]
            Y_test = data[-n_test+i+1-lookfront+k, :]

            predictions_knn[i, k, :] = predict_knn(timestamps_train, Y_train_ldmm, timestamp_test,\
                                                   bandwidth_knn, nn_knn)[:]
            predictions_ldmm[i, k, :] = predict_LDMM(timestamps_train, Y_train_ldmm, timestamp_test,\
                                                    bandwidth_ldmm, lambd_ldmm, mu, h_sqr, n_iter_ldmm, nn_ldmm)[:]
            predictions_same[i, k, :] = predict_SAME(timestamps_train, Y_train_same, timestamp_test,\
                                                    bandwidth_same, tau, n_iter_same, nn_same)[:]
            predictions_adm[i, k, :] = predict_AMD(timestamps_train, Y_train_same, timestamp_test,\
                                                    bandwidth_same, lambd_adm, n_iter_adm, nn_adm)[:]
            for t in range(num_series):
                arima_model = ARIMA(Y_train_arima[:, t], order=(1,0,2))
                predictions_arima[i, k, t] = arima_model.fit().forecast(steps=1)[0]  


            outcomes[i, k, :] = Y_test[:]
            Y_train_knn = np.append(Y_train_knn, predictions_knn[i, k, :].reshape(1,-1), axis=0)
            Y_train_ldmm = np.append(Y_train_ldmm, predictions_ldmm[i, k, :].reshape(1,-1), axis=0)
            Y_train_same = np.append(Y_train_same, predictions_same[i, k, :].reshape(1,-1), axis=0)
            Y_train_arima = np.append(Y_train_arima, predictions_arima[i, k, :].reshape(1,-1), axis=0)
            Y_train_adm = np.append(Y_train_arima, predictions_arima[i, k, :].reshape(1,-1), axis=0)


    new_loss_knn = np.mean((predictions_knn[:,-1]-outcomes[:,-1])**2/ outcomes[:,-1]**2, axis=0)
    new_loss_ldmm = np.mean((predictions_ldmm[:,-1]-outcomes[:,-1])**2 / outcomes[:,-1]**2, axis=0)
    new_loss_same = np.mean((predictions_same[:,-1]-outcomes[:,-1])**2 / outcomes[:,-1]**2, axis=0)
    new_loss_arima = np.mean((predictions_arima[:,-1]-outcomes[:,-1])**2 / outcomes[:,-1]**2, axis=0)
    new_loss_adm = np.mean((predictions_adm[:,-1]-outcomes[:,-1])**2 / outcomes[:,-1]**2, axis=0)
    
    new_loss_knn_l1= np.mean(np.abs(predictions_knn[:,-1]-outcomes[:,-1])/ np.abs(outcomes[:,-1]), axis=0)
    new_loss_ldmm_l1 = np.mean(np.abs(predictions_ldmm[:,-1]-outcomes[:,-1]) / np.abs(outcomes[:,-1]), axis=0)
    new_loss_same_l1 = np.mean(np.abs(predictions_same[:,-1]-outcomes[:,-1]) / np.abs(outcomes[:,-1]), axis=0)
    new_loss_arima_l1 = np.mean(np.abs(predictions_arima[:,-1]-outcomes[:,-1]) / np.abs(outcomes[:,-1]), axis=0)
    new_loss_adm_l1 = np.mean(np.abs(predictions_adm[:,-1]-outcomes[:,-1]) / np.abs(outcomes[:,-1]), axis=0)
    
    print(lookfront)
    print('knn {:.4f} {:.4f}'.format(np.mean(new_loss_knn), np.mean(new_loss_knn_l1)))
    print('ldmm {:.4f} {:.4f}'.format(np.mean(new_loss_ldmm), np.mean(new_loss_ldmm_l1)))
    print('same {:.4f} {:.4f}'.format(np.mean(new_loss_same), np.mean(new_loss_same_l1)))
    print('arima {:.4f} {:.4f}'.format(np.mean(new_loss_arima), np.mean(new_loss_arima_l1)))
    print('adm {:.4f} {:.4f}'.format(np.mean(new_loss_adm), np.mean(new_loss_adm_l1)))

    print()

50 50 50 50


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-i

1
knn 0.0003 0.0117
ldmm 0.0003 0.0117
same 0.0003 0.0117
arima 0.0002 0.0081
adm 0.0003 0.0117



  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-i

2
knn 0.0008 0.0200
ldmm 0.0008 0.0200
same 0.0008 0.0200
arima 0.0005 0.0147
adm 0.0008 0.0200



  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-i

3
knn 0.0014 0.0275
ldmm 0.0014 0.0275
same 0.0014 0.0275
arima 0.0010 0.0222
adm 0.0014 0.0275



  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-i

4
knn 0.0022 0.0344
ldmm 0.0022 0.0344
same 0.0022 0.0344
arima 0.0017 0.0287
adm 0.0022 0.0344

