In [16]:
from numpy.random import seed
seed(42)
from tensorflow import set_random_seed
set_random_seed(42)
from sklearn.neural_network import MLPRegressor
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN
from keras.optimizers import rmsprop
from keras import backend as ker
from math import sqrt
import numpy as np
import tensorflow as tf
import pandas as pd
import gc
from sklearn.model_selection import train_test_split

def remov_nan (dataset):
    '''
    to remove all NaN Values in a 
    Time Serie Dataframe
    '''
    n = dataset.isnull().sum() 
    data = dataset[0:(len(dataset)-n)]
    return data


def detrend(insample_data):
    """
    Calculates a & b parameters of LRL

    :param insample_data:
    :return:
    """
    x = np.arange(len(insample_data))
    a, b = np.polyfit(x, insample_data, 1)
    return a, b


def deseasonalize(original_ts, ppy):
    """
    Calculates and returns seasonal indices

    :param original_ts: original data
    :param ppy: periods per year
    :return:
    """
    """
    # === get in-sample data
    original_ts = original_ts[:-out_of_sample]
    """
    if seasonality_test(original_ts, ppy):
        # print("seasonal")
        # ==== get moving averages
        ma_ts = moving_averages(original_ts, ppy)

        # ==== get seasonality indices
        le_ts = original_ts * 100 / ma_ts
        le_ts = np.hstack((le_ts, np.full((ppy - (len(le_ts) % ppy)), np.nan)))
        le_ts = np.reshape(le_ts, (-1, ppy))
        si = np.nanmean(le_ts, 0)
        norm = np.sum(si) / (ppy * 100)
        si = si / norm
    else:
        # print("NOT seasonal")
        si = np.full(ppy, 100)

    return si


def moving_averages(ts_init, window):
    """
    Calculates the moving averages for a given TS

    :param ts_init: the original time series
    :param window: window length
    :return: moving averages ts
    """
    if len(ts_init) % 2 == 0:
        ts_ma = pd.rolling_mean(ts_init, window, center=True)
        ts_ma = pd.rolling_mean(ts_ma, 2, center=True)
        ts_ma = np.roll(ts_ma, -1)
    else:
        ts_ma = pd.rolling_mean(ts_init, window, center=True)

    return ts_ma


def seasonality_test(original_ts, ppy):
    """
    Seasonality test

    :param original_ts: time series
    :param ppy: periods per year
    :return: boolean value: whether the TS is seasonal
    """
    s = acf(original_ts, 1)
    for i in range(2, ppy):
        s = s + (acf(original_ts, i) ** 2)

    limit = 1.645 * (sqrt((1 + 2 * s) / len(original_ts)))

    return (abs(acf(original_ts, ppy))) > limit


def acf(data, k):
    """
    Autocorrelation function

    :param data: time series
    :param k: lag
    :return:
    """
    m = np.mean(data)
    s1 = 0
    for i in range(k, len(data)):
        s1 = s1 + ((data[i] - m) * (data[i - k] - m))

    s2 = 0
    for i in range(0, len(data)):
        s2 = s2 + ((data[i] - m) ** 2)

    return float(s1 / s2)


def split_into_train_test(data, in_num, fh):
    """
    Splits the series into train and test sets. Each step takes multiple points as inputs

    :param data: an individual TS
    :param fh: number of out of sample points
    :param in_num: number of input points for the forecast
    :return:
    """
    train, test = data[:-fh], data[-(fh + in_num):]
    x_train, y_train = train[:-1], np.roll(train, -in_num)[:-in_num]
    x_test, y_test = train[-in_num:], np.roll(test, -in_num)[:-in_num]

    # reshape input to be [samples, time steps, features] (N-NF samples, 1 time step, 1 feature)
    x_train = np.reshape(x_train, (-1, 1))
    x_test = np.reshape(x_test, (-1, 1))
    temp_test = np.roll(x_test, -1)
    temp_train = np.roll(x_train, -1)
    for x in range(1, in_num):
        x_train = np.concatenate((x_train[:-1], temp_train[:-1]), 1)
        x_test = np.concatenate((x_test[:-1], temp_test[:-1]), 1)
        temp_test = np.roll(temp_test, -1)[:-1]
        temp_train = np.roll(temp_train, -1)[:-1]

    return x_train, y_train, x_test, y_test


def rnn_bench(x_train, y_train, x_test, fh, input_size):
    """
    Forecasts using 6 SimpleRNN nodes in the hidden layer and a Dense output layer

    :param x_train: train data
    :param y_train: target values for training
    :param x_test: test data
    :param fh: forecasting horizon
    :param input_size: number of points used as input
    :return:
    """
    # reshape to match expected input
    x_train = np.reshape(x_train, (-1, input_size, 1))
    x_test = np.reshape(x_test, (-1, input_size, 1))

    # create the model
    model = Sequential([
        SimpleRNN(6, input_shape=(input_size, 1), activation='linear',
                  use_bias=False, kernel_initializer='glorot_uniform',
                  recurrent_initializer='orthogonal', bias_initializer='zeros',
                  dropout=0.0, recurrent_dropout=0.0),
        Dense(1, use_bias=True, activation='linear')
    ])
    opt = rmsprop(lr=0.001)
    model.compile(loss='mean_squared_error', optimizer=opt)

    # fit the model to the training data
    model.fit(x_train, y_train, epochs=100, batch_size=1, verbose=0)

    # make predictions
    y_hat_test = []
    last_prediction = model.predict(x_test)[0]
    for i in range(0, fh):
        y_hat_test.append(last_prediction)
        x_test[0] = np.roll(x_test[0], -1)
        x_test[0, (len(x_test[0]) - 1)] = last_prediction
        last_prediction = model.predict(x_test)[0]

    return np.asarray(y_hat_test)


def mlp_bench(x_train, y_train, x_test, fh):
    """
    Forecasts using a simple MLP which 6 nodes in the hidden layer

    :param x_train: train input data
    :param y_train: target values for training
    :param x_test: test data
    :param fh: forecasting horizon
    :return:
    """
    y_hat_test = []

    model = MLPRegressor(hidden_layer_sizes=6, activation='identity', solver='adam',
                         max_iter=100, learning_rate='adaptive', learning_rate_init=0.001,
                         random_state=42)
    model.fit(x_train, y_train)

    last_prediction = model.predict(x_test)[0]
    for i in range(0, fh):
        y_hat_test.append(last_prediction)
        x_test[0] = np.roll(x_test[0], -1)
        x_test[0, (len(x_test[0]) - 1)] = last_prediction
        last_prediction = model.predict(x_test)[0]

    return np.asarray(y_hat_test)


def smape(a, b):
    """
    Calculates sMAPE

    :param a: actual values
    :param b: predicted values
    :return: sMAPE
    """
    a = np.reshape(a, (-1,))
    b = np.reshape(b, (-1,))
    return np.mean(2.0 * np.abs(a - b) / (np.abs(a) + np.abs(b))).item()


def mase(insample, y_test, y_hat_test, freq):
    """
    Calculates MAsE

    :param insample: insample data
    :param y_test: out of sample target values
    :param y_hat_test: predicted values
    :param freq: data frequency
    :return:
    """
    y_hat_naive = []
    for i in range(freq, len(insample)):
        y_hat_naive.append(insample[(i - freq)])

    masep = np.mean(abs(insample[freq:] - y_hat_naive))

    return np.mean(abs(y_test - y_hat_test)) / masep




In [20]:
def main(data_all,fh,freq,j):
    #fh = 6         # forecasting horizon
    #freq = 1       # data frequency
    in_size = 3    # number of points used as input for each forecast

    
    err_MLP_sMAPE = []
    err_MLP_MASE = []
    err_RNN_sMAPE = []
    err_RNN_MASE = []
    
    columnsname= ["sMape MLP","sMape RNN","Mase MLP","Mase RNN"]
    ds = pd.DataFrame(columns=columnsname )
    
    if j==0:
        ds.to_csv('out_yearly.csv')
    if j==1:
        ds.to_csv('out_quarterly.csv')
    if j==2:
        ds.to_csv('out_monthly.csv')
    if j==3:
        ds.to_csv('out_weekly.csv')
    if j==4:
        ds.to_csv('out_yearly.csv')
    if j==5:
        ds.to_csv('out_yearly.csv')
        

    # ===== In this example we produce forecasts for 100 randomly generated timeseries =====
    
    
    #df_yearly = pd.read_csv("../data/Yearly-train.csv", skiprows=0, index_col =0)
    #data_all = df_yearly.T
    #data_all = np.array(np.random.random_integers(0, 100, (100, 20)), dtype=np.float32)
    #for i in range(0, 100):
        #for j in range(0, 20):
            #data_all[i, j] = j * 10 + data_all[i, j]
    
    counter = 0
    # ===== Main loop which goes through all timeseries =====
    for j in range(len(data_all)):
        ts = data_all.iloc[j, :]
        ts = remov_nan(ts)

        # remove seasonality
        seasonality_in = deseasonalize(ts, freq)

        for i in range(0, len(ts)):
            ts[i] = ts[i] * 100 / seasonality_in[i % freq]

        # detrending
        a, b = detrend(ts)

        for i in range(0, len(ts)):
            ts[i] = ts[i] - ((a * i) + b)

        x_train, y_train, x_test, y_test = split_into_train_test(ts, in_size, fh)

        # RNN benchmark - Produce forecasts
        y_hat_test_RNN = np.reshape(rnn_bench(x_train, y_train, x_test, fh, in_size), (-1))

        # MLP benchmark - Produce forecasts
        y_hat_test_MLP = mlp_bench(x_train, y_train, x_test, fh)
        for i in range(0, 29):
            y_hat_test_MLP = np.vstack((y_hat_test_MLP, mlp_bench(x_train, y_train, x_test, fh)))
        y_hat_test_MLP = np.median(y_hat_test_MLP, axis=0)

        # add trend
        for i in range(0, len(ts)):
            ts[i] = ts[i] + ((a * i) + b)

        for i in range(0, fh):
            y_hat_test_MLP[i] = y_hat_test_MLP[i] + ((a * (len(ts) + i + 1)) + b)
            y_hat_test_RNN[i] = y_hat_test_RNN[i] + ((a * (len(ts) + i + 1)) + b)

        # add seasonality
        for i in range(0, len(ts)):
            ts[i] = ts[i] * seasonality_in[i % freq] / 100

        for i in range(len(ts), len(ts) + fh):
            y_hat_test_MLP[i - len(ts)] = y_hat_test_MLP[i - len(ts)] * seasonality_in[i % freq] / 100
            y_hat_test_RNN[i - len(ts)] = y_hat_test_RNN[i - len(ts)] * seasonality_in[i % freq] / 100

        # check if negative or extreme
        for i in range(len(y_hat_test_MLP)):
            if y_hat_test_MLP[i] < 0:
                y_hat_test_MLP[i] = 0
            if y_hat_test_RNN[i] < 0:
                y_hat_test_RNN[i] = 0
                
            if y_hat_test_MLP[i] > (1000 * max(ts)):
                y_hat_test_MLP[i] = max(ts)         
            if y_hat_test_RNN[i] > (1000 * max(ts)):
                y_hat_test_RNN[i] = max(ts)

        x_train, y_train, x_test, y_test = split_into_train_test(ts, in_size, fh)

        # Calculate errors
        err_MLP_sMAPE.append(smape(y_test, y_hat_test_MLP))
        err_RNN_sMAPE.append(smape(y_test, y_hat_test_RNN))
        err_MLP_MASE.append(mase(ts[:-fh], y_test, y_hat_test_MLP, freq))
        err_RNN_MASE.append(mase(ts[:-fh], y_test, y_hat_test_RNN, freq))

        # memory handling
        ker.clear_session()
        tf.reset_default_graph()
        gc.collect()

        counter = counter + 1
        #**********************************************************************
        print("-------------TS ID: ", counter, "-------------")
        print(" sMAPE_MLP:",err_MLP_sMAPE[-1], " sMAPE_RNN:",err_RNN_sMAPE[-1]," MASE_MLP:",err_MLP_MASE[-1]," MASE_RNN:",err_RNN_MASE[-1])
        
        p =[err_MLP_sMAPE[-1],err_RNN_sMAPE[-1],err_MLP_MASE[-1],err_RNN_MASE[-1]]
        ds.loc[i] = p        
        ds=ds.round(4)
        if j==0:
            ds.to_csv('out_yearly.csv', mode='a', header=False)
        if j==1:
            ds.to_csv('out_quarterly.csv', mode='a', header=False)
        if j==2:
            ds.to_csv('out_monthly.csv', mode='a', header=False)
        if j==3:
            ds.to_csv('out_weekly.csv', mode='a', header=False)
        if j==4:
            ds.to_csv('out_daily.csv', mode='a', header=False)
        if j==5:
            ds.to_csv('out_hourly.csv', mode='a', header=False)
        #********************************************************************    
    print("\n\n---------FINAL RESULTS---------")
    print("=============sMAPE=============\n")
    print("#### MLP ####\n", np.mean(err_MLP_sMAPE), "\n")
    print("#### RNN ####\n", np.mean(err_RNN_sMAPE), "\n")
    print("==============MASE=============")
    print("#### MLP ####\n", np.mean(err_MLP_MASE), "\n")
    print("#### RNN ####\n", np.mean(err_RNN_MASE), "\n")
    return np.mean(err_MLP_sMAPE),np.mean(err_RNN_sMAPE),np.mean(err_MLP_MASE),np.mean(err_RNN_MASE)



In [24]:
def main_all():
    
    print("### Load of Dataset  ###")
    df_yearly = pd.read_csv("../data/Yearly-train.csv", skiprows=0, index_col =0)
    df_quaterly = pd.read_csv("../data/Quarterly-train.csv", skiprows=0, index_col =0)
    df_monthly = pd.read_csv("../data/Monthly-train.csv", skiprows=0, index_col =0)
    df_weekly = pd.read_csv("../data/Weekly-train.csv", skiprows=0, index_col =0)
    df_daily = pd.read_csv("../data/Daily-train.csv", skiprows=0, index_col =0)
    df_hourly = pd.read_csv("../data/Hourly-train.csv", skiprows=0, index_col =0)
    
    X_train, X_test = train_test_split(data, test_size=0.02, random_state=RANDOM_SEED)
    
    
    D=[]
    D.append(df_yearly)
    D.append(df_quaterly)
    D.append(df_monthly)
    D.append(df_weekly)
    D.append(df_daily)
    D.append(df_hourly)
    
    columnsname= ["Data_Type","sMape MLP","sMape RNN","Mase MLP","Mase RNN"]
    ds = pd.DataFrame(columns=columnsname )
    ds.to_csv('outputM4.csv')
    
    
    for i in range (len(D)):
        if i==0:
            print( "*** Beginn of yearly dataset ***")
            a,b,c,d = main(D[i],6,1,i)
            p= ["Yearly_data",a,b,c,d]
            ds.iloc[i]= p
            ds=ds.round(4)
            ds.to_csv('outpoutM4.csv', mode='a', header=False)
        if i==1:
            print( "*** Beginn of Quarterly dataset ***")
            a,b,c,d = main(D[i],8,4,i)
            p= ["Quarterly_data",a,b,c,d]
            ds.iloc[i]= p
            ds=ds.round(4)
            ds.to_csv('outpoutM4.csv', mode='a', header=False)
        if i==2:
            print( "*** Beginn of Monthly dataset ***")
            a,b,c,d = main(D[i],18,12,i)
            p= ["Monthly_data",a,b,c,d]
            ds.iloc[i]= p
            ds=ds.round(4)
            ds.to_csv('outpoutM4.csv', mode='a', header=False)
        if i==3:
            print( "*** Beginn of Weekly dataset ***")
            a,b,c,d = main(D[i],13,1,i)
            p= ["Weekly_data",a,b,c,d]
            ds.iloc[i]= p
            ds=ds.round(4)
            ds.to_csv('outpoutM4.csv', mode='a', header=False)
        if i==4:
            print( "*** Beginn of Daily dataset ***")
            a,b,c,d = main(D[i],14,1,i)
            p= ["Daily_data",a,b,c,d]
            ds.iloc[i]= p
            ds=ds.round(4)
            ds.to_csv('outpoutM4.csv', mode='a', header=False)
        if i==5:
            print( "*** Beginn of Hourly dataset ***")
            a,b,c,d = main(D[i],48,24,i)
            p= ["Hourly_data",a,b,c,d]
            ds.iloc[i]= p
            ds=ds.round(4)
            ds.to_csv('outputM4.csv', mode='a', header=False)
    print("Done")

In [25]:
main_all()

### Load of Dataset  ###
*** Beginn of yearly dataset ***


	Series.rolling(window=1,center=True).mean()
  return getattr(obj, method)(*args, **kwds)


-------------TS ID:  1 -------------
 sMAPE_MLP: 0.10531365684804488  sMAPE_RNN: 0.09547596333707854  MASE_MLP: 7.382686578195777  MASE_RNN: 6.666194662058728
-------------TS ID:  2 -------------
 sMAPE_MLP: 0.19690236155452553  sMAPE_RNN: 0.20485200788894808  MASE_MLP: 1.1419663364148105  MASE_RNN: 1.1813237395877547
-------------TS ID:  3 -------------
 sMAPE_MLP: 0.15135493334352856  sMAPE_RNN: 0.15088102051276175  MASE_MLP: 6.886722993060876  MASE_RNN: 6.866131851230425
-------------TS ID:  4 -------------
 sMAPE_MLP: 0.1455145475285837  sMAPE_RNN: 0.14248781456562623  MASE_MLP: 6.841531656773988  MASE_RNN: 6.6922694052419365
-------------TS ID:  5 -------------
 sMAPE_MLP: 0.10921279996285528  sMAPE_RNN: 0.09937615506400947  MASE_MLP: 4.6721794634231175  MASE_RNN: 4.228635902189265
-------------TS ID:  6 -------------
 sMAPE_MLP: 0.17474525721815795  sMAPE_RNN: 0.17497816452062834  MASE_MLP: 7.897580580619153  MASE_RNN: 7.921933404474266
-------------TS ID:  7 -------------
 sMAPE

	Series.rolling(window=1,center=True).mean()
	Series.rolling(window=2,center=True).mean()


-------------TS ID:  39 -------------
 sMAPE_MLP: 0.11381990156679032  sMAPE_RNN: 0.03882936900132927  MASE_MLP: 4.392960399108378  MASE_RNN: 1.5797597670830998
-------------TS ID:  40 -------------
 sMAPE_MLP: 0.18919588833217493  sMAPE_RNN: 0.07157282113722481  MASE_MLP: 14.814590253071414  MASE_RNN: 6.4118215776030985
-------------TS ID:  41 -------------
 sMAPE_MLP: 0.07286612724123288  sMAPE_RNN: 0.0662560109176321  MASE_MLP: 5.421663155122629  MASE_RNN: 5.283060246723318
-------------TS ID:  42 -------------
 sMAPE_MLP: 0.06861521069988652  sMAPE_RNN: 0.08790611150736743  MASE_MLP: 3.876716727599316  MASE_RNN: 4.8676356792898865
-------------TS ID:  43 -------------
 sMAPE_MLP: 0.055800015126354276  sMAPE_RNN: 0.07105554947789892  MASE_MLP: 2.928943181345116  MASE_RNN: 3.7112312512520034
-------------TS ID:  44 -------------
 sMAPE_MLP: 0.04763973289517003  sMAPE_RNN: 0.08340008214905238  MASE_MLP: 2.734509536274948  MASE_RNN: 4.896923781960377
-------------TS ID:  45 -----------

-------------TS ID:  91 -------------
 sMAPE_MLP: 0.09272647304492548  sMAPE_RNN: 0.03393546802349806  MASE_MLP: 7.018524415853414  MASE_RNN: 2.498480513723273
-------------TS ID:  92 -------------
 sMAPE_MLP: 0.2738499503653244  sMAPE_RNN: 0.2659867693342563  MASE_MLP: 1.0540153290563523  MASE_RNN: 1.0191358060266882
-------------TS ID:  93 -------------
 sMAPE_MLP: 0.07908688540162766  sMAPE_RNN: 0.07995541026929247  MASE_MLP: 2.632301293918726  MASE_RNN: 2.6643584295697385
-------------TS ID:  94 -------------
 sMAPE_MLP: 0.0118323515179718  sMAPE_RNN: 0.0176003143298208  MASE_MLP: 0.7057880605542561  MASE_RNN: 1.0552295091840038
-------------TS ID:  95 -------------
 sMAPE_MLP: 0.01841070380564722  sMAPE_RNN: 0.010060439977374548  MASE_MLP: 1.6486315669852674  MASE_RNN: 0.8958414562657514
-------------TS ID:  96 -------------
 sMAPE_MLP: 0.08273693530900152  sMAPE_RNN: 0.0815279135297354  MASE_MLP: 2.6602792915909155  MASE_RNN: 2.622445946653194
-------------TS ID:  97 ------------

-------------TS ID:  142 -------------
 sMAPE_MLP: 0.2724843401502795  sMAPE_RNN: 0.2417338080074787  MASE_MLP: 5.58777624880154  MASE_RNN: 4.873348513249832
-------------TS ID:  143 -------------
 sMAPE_MLP: 0.06643685536672209  sMAPE_RNN: 0.07634930334912869  MASE_MLP: 4.014051741307568  MASE_RNN: 4.594228138895814
-------------TS ID:  144 -------------
 sMAPE_MLP: 0.091877634472061  sMAPE_RNN: 0.09165272400147684  MASE_MLP: 3.162012456268439  MASE_RNN: 3.153695936326765
-------------TS ID:  145 -------------
 sMAPE_MLP: 0.1105230580994172  sMAPE_RNN: 0.12128103436919734  MASE_MLP: 3.415606756907104  MASE_RNN: 3.7732543278098927
-------------TS ID:  146 -------------
 sMAPE_MLP: 0.29085358910628184  sMAPE_RNN: 0.13403003227986635  MASE_MLP: 5.198379182161353  MASE_RNN: 2.161034339072077
-------------TS ID:  147 -------------
 sMAPE_MLP: 0.26163055156610926  sMAPE_RNN: 0.25178619740983976  MASE_MLP: 6.616351158584847  MASE_RNN: 6.335216571195006
-------------TS ID:  148 -------------


-------------TS ID:  193 -------------
 sMAPE_MLP: 0.1797925558748137  sMAPE_RNN: 0.1652038632752884  MASE_MLP: 2.98891059521024  MASE_RNN: 2.717601054258173
-------------TS ID:  194 -------------
 sMAPE_MLP: 0.11265376985826625  sMAPE_RNN: 0.12798413998706729  MASE_MLP: 2.214412706049253  MASE_RNN: 2.536495605607988
-------------TS ID:  195 -------------
 sMAPE_MLP: 0.15223312090191213  sMAPE_RNN: 0.1390797169598144  MASE_MLP: 3.7203538901707156  MASE_RNN: 3.377123873781592
-------------TS ID:  196 -------------
 sMAPE_MLP: 0.1882426727709011  sMAPE_RNN: 0.17899039302345462  MASE_MLP: 4.203433433209474  MASE_RNN: 3.971659348367221
-------------TS ID:  197 -------------
 sMAPE_MLP: 0.14621190552119162  sMAPE_RNN: 0.15980786912338085  MASE_MLP: 1.2782465599644801  MASE_RNN: 1.3856880107904739
-------------TS ID:  198 -------------
 sMAPE_MLP: 0.1615371348746385  sMAPE_RNN: 0.15204550091992056  MASE_MLP: 1.6870627722035845  MASE_RNN: 1.576869658084608
-------------TS ID:  199 -----------

-------------TS ID:  244 -------------
 sMAPE_MLP: 0.02698408181267205  sMAPE_RNN: 0.09711695787681647  MASE_MLP: 1.539361135749997  MASE_RNN: 5.822116960583123


KeyboardInterrupt: 