In [71]:
import os
from numpy.random import seed
seed(42)
from tensorflow import set_random_seed
set_random_seed(42)
from sklearn.neural_network import MLPRegressor
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN
from keras.optimizers import rmsprop
from keras import backend as ker
from math import sqrt
import numpy as np
import tensorflow as tf
import pandas as pd
import gc
from sklearn.model_selection import train_test_split
RANDOM_SEED= 40




def remov_nan (dataset):
    '''
    to remove all NaN Values in a 
    Time Serie Dataframe
    '''
    n = dataset.isnull().sum() 
    data = dataset[0:(len(dataset)-n)]
    return data


def detrend(insample_data):
    """
    Calculates a & b parameters of LRL

    :param insample_data:
    :return:
    """
    x = np.arange(len(insample_data))
    a, b = np.polyfit(x, insample_data, 1)
    return a, b


def deseasonalize(original_ts, ppy):
    """
    Calculates and returns seasonal indices

    :param original_ts: original data
    :param ppy: periods per year
    :return:
    """
    """
    # === get in-sample data
    original_ts = original_ts[:-out_of_sample]
    """
    if seasonality_test(original_ts, ppy):
        # print("seasonal")
        # ==== get moving averages
        ma_ts = moving_averages(original_ts, ppy)

        # ==== get seasonality indices
        le_ts = original_ts * 100 / ma_ts
        le_ts = np.hstack((le_ts, np.full((ppy - (len(le_ts) % ppy)), np.nan)))
        le_ts = np.reshape(le_ts, (-1, ppy))
        si = np.nanmean(le_ts, 0)
        norm = np.sum(si) / (ppy * 100)
        si = si / norm
    else:
        # print("NOT seasonal")
        si = np.full(ppy, 100)

    return si


def moving_averages(ts_init, window):
    """
    Calculates the moving averages for a given TS

    :param ts_init: the original time series
    :param window: window length
    :return: moving averages ts
    """
    if len(ts_init) % 2 == 0:
        ts_ma = pd.rolling_mean(ts_init, window, center=True)
        ts_ma = pd.rolling_mean(ts_ma, 2, center=True)
        ts_ma = np.roll(ts_ma, -1)
    else:
        ts_ma = pd.rolling_mean(ts_init, window, center=True)

    return ts_ma


def seasonality_test(original_ts, ppy):
    """
    Seasonality test

    :param original_ts: time series
    :param ppy: periods per year
    :return: boolean value: whether the TS is seasonal
    """
    s = acf(original_ts, 1)
    for i in range(2, ppy):
        s = s + (acf(original_ts, i) ** 2)

    limit = 1.645 * (sqrt((1 + 2 * s) / len(original_ts)))

    return (abs(acf(original_ts, ppy))) > limit


def acf(data, k):
    """
    Autocorrelation function

    :param data: time series
    :param k: lag
    :return:
    """
    m = np.mean(data)
    s1 = 0
    for i in range(k, len(data)):
        s1 = s1 + ((data[i] - m) * (data[i - k] - m))

    s2 = 0
    for i in range(0, len(data)):
        s2 = s2 + ((data[i] - m) ** 2)

    return float(s1 / s2)


def split_into_train_test(data, in_num, fh):
    """
    Splits the series into train and test sets. Each step takes multiple points as inputs

    :param data: an individual TS
    :param fh: number of out of sample points
    :param in_num: number of input points for the forecast
    :return:
    """
    train, test = data[:-fh], data[-(fh + in_num):]
    x_train, y_train = train[:-1], np.roll(train, -in_num)[:-in_num]
    x_test, y_test = train[-in_num:], np.roll(test, -in_num)[:-in_num]

    # reshape input to be [samples, time steps, features] (N-NF samples, 1 time step, 1 feature)
    x_train = np.reshape(x_train, (-1, 1))
    x_test = np.reshape(x_test, (-1, 1))
    temp_test = np.roll(x_test, -1)
    temp_train = np.roll(x_train, -1)
    for x in range(1, in_num):
        x_train = np.concatenate((x_train[:-1], temp_train[:-1]), 1)
        x_test = np.concatenate((x_test[:-1], temp_test[:-1]), 1)
        temp_test = np.roll(temp_test, -1)[:-1]
        temp_train = np.roll(temp_train, -1)[:-1]

    return x_train, y_train, x_test, y_test


def rnn_bench(x_train, y_train, x_test, fh, input_size):
    """
    Forecasts using 6 SimpleRNN nodes in the hidden layer and a Dense output layer

    :param x_train: train data
    :param y_train: target values for training
    :param x_test: test data
    :param fh: forecasting horizon
    :param input_size: number of points used as input
    :return:
    """
    # reshape to match expected input
    x_train = np.reshape(x_train, (-1, input_size, 1))
    x_test = np.reshape(x_test, (-1, input_size, 1))

    # create the model
    model = Sequential([
        SimpleRNN(6, input_shape=(input_size, 1), activation='linear',
                  use_bias=False, kernel_initializer='glorot_uniform',
                  recurrent_initializer='orthogonal', bias_initializer='zeros',
                  dropout=0.0, recurrent_dropout=0.0),
        Dense(1, use_bias=True, activation='linear')
    ])
    opt = rmsprop(lr=0.001)
    model.compile(loss='mean_squared_error', optimizer=opt)

    # fit the model to the training data
    model.fit(x_train, y_train, epochs=100, batch_size=1, verbose=0)

    # make predictions
    y_hat_test = []
    last_prediction = model.predict(x_test)[0]
    for i in range(0, fh):
        y_hat_test.append(last_prediction)
        x_test[0] = np.roll(x_test[0], -1)
        x_test[0, (len(x_test[0]) - 1)] = last_prediction
        last_prediction = model.predict(x_test)[0]

    return np.asarray(y_hat_test)


def mlp_bench(x_train, y_train, x_test, fh):
    """
    Forecasts using a simple MLP which 6 nodes in the hidden layer

    :param x_train: train input data
    :param y_train: target values for training
    :param x_test: test data
    :param fh: forecasting horizon
    :return:
    """
    y_hat_test = []

    model = MLPRegressor(hidden_layer_sizes=6, activation='identity', solver='adam',
                         max_iter=100, learning_rate='adaptive', learning_rate_init=0.001,
                         random_state=42)
    model.fit(x_train, y_train)

    last_prediction = model.predict(x_test)[0]
    for i in range(0, fh):
        y_hat_test.append(last_prediction)
        x_test[0] = np.roll(x_test[0], -1)
        x_test[0, (len(x_test[0]) - 1)] = last_prediction
        last_prediction = model.predict(x_test)[0]

    return np.asarray(y_hat_test)


def smape(a, b):
    """
    Calculates sMAPE

    :param a: actual values
    :param b: predicted values
    :return: sMAPE
    """
    a = np.reshape(a, (-1,))
    b = np.reshape(b, (-1,))
    return np.mean(2.0 * np.abs(a - b) / (np.abs(a) + np.abs(b))).item()


def mase(insample, y_test, y_hat_test, freq):
    """
    Calculates MAsE

    :param insample: insample data
    :param y_test: out of sample target values
    :param y_hat_test: predicted values
    :param freq: data frequency
    :return:
    """
    y_hat_naive = []
    for i in range(freq, len(insample)):
        y_hat_naive.append(insample[(i - freq)])

    masep = np.mean(abs(insample[freq:] - y_hat_naive))

    return np.mean(abs(y_test - y_hat_test)) / masep




In [62]:
def main(data_all,fh,freq,j):
    #fh = 6         # forecasting horizon
    #freq = 1       # data frequency
    in_size = 3    # number of points used as input for each forecast

    
    err_MLP_sMAPE = []
    err_MLP_MASE = []
    err_RNN_sMAPE = []
    err_RNN_MASE = []
    
    columnsname= ["sMape MLP","sMape RNN","Mase MLP","Mase RNN"]
    ds = pd.DataFrame(columns=columnsname )
    
    if j==0:
        print( "csv yearly")
        ds.to_csv('out_yearly.csv')
    if j==1:
        print( "csvquter")
        ds.to_csv('out_quarterly.csv')
    if j==2:
        ds.to_csv('out_monthly.csv')
    if j==3:
        ds.to_csv('out_weekly.csv')
    if j==4:
        ds.to_csv('out_yearly.csv')
    if j==5:
        ds.to_csv('out_yearly.csv')
        

    # ===== In this example we produce forecasts for 100 randomly generated timeseries =====
    
    
    #df_yearly = pd.read_csv("../data/Yearly-train.csv", skiprows=0, index_col =0)
    #data_all = df_yearly.T
    #data_all = np.array(np.random.random_integers(0, 100, (100, 20)), dtype=np.float32)
    #for i in range(0, 100):
        #for j in range(0, 20):
            #data_all[i, j] = j * 10 + data_all[i, j]
    
    counter = 0
    # ===== Main loop which goes through all timeseries =====
    for j in range(len(data_all)):
        ts = data_all.iloc[j, :]
        ts = remov_nan(ts)

        # remove seasonality
        seasonality_in = deseasonalize(ts, freq)

        for i in range(0, len(ts)):
            ts[i] = ts[i] * 100 / seasonality_in[i % freq]

        # detrending
        a, b = detrend(ts)

        for i in range(0, len(ts)):
            ts[i] = ts[i] - ((a * i) + b)

        x_train, y_train, x_test, y_test = split_into_train_test(ts, in_size, fh)

        # RNN benchmark - Produce forecasts
        y_hat_test_RNN = np.reshape(rnn_bench(x_train, y_train, x_test, fh, in_size), (-1))

        # MLP benchmark - Produce forecasts
        y_hat_test_MLP = mlp_bench(x_train, y_train, x_test, fh)
        for i in range(0, 29):
            y_hat_test_MLP = np.vstack((y_hat_test_MLP, mlp_bench(x_train, y_train, x_test, fh)))
        y_hat_test_MLP = np.median(y_hat_test_MLP, axis=0)

        # add trend
        for i in range(0, len(ts)):
            ts[i] = ts[i] + ((a * i) + b)

        for i in range(0, fh):
            y_hat_test_MLP[i] = y_hat_test_MLP[i] + ((a * (len(ts) + i + 1)) + b)
            y_hat_test_RNN[i] = y_hat_test_RNN[i] + ((a * (len(ts) + i + 1)) + b)

        # add seasonality
        for i in range(0, len(ts)):
            ts[i] = ts[i] * seasonality_in[i % freq] / 100

        for i in range(len(ts), len(ts) + fh):
            y_hat_test_MLP[i - len(ts)] = y_hat_test_MLP[i - len(ts)] * seasonality_in[i % freq] / 100
            y_hat_test_RNN[i - len(ts)] = y_hat_test_RNN[i - len(ts)] * seasonality_in[i % freq] / 100

        # check if negative or extreme
        for i in range(len(y_hat_test_MLP)):
            if y_hat_test_MLP[i] < 0:
                y_hat_test_MLP[i] = 0
            if y_hat_test_RNN[i] < 0:
                y_hat_test_RNN[i] = 0
                
            if y_hat_test_MLP[i] > (1000 * max(ts)):
                y_hat_test_MLP[i] = max(ts)         
            if y_hat_test_RNN[i] > (1000 * max(ts)):
                y_hat_test_RNN[i] = max(ts)

        x_train, y_train, x_test, y_test = split_into_train_test(ts, in_size, fh)

        # Calculate errors
        err_MLP_sMAPE.append(smape(y_test, y_hat_test_MLP))
        err_RNN_sMAPE.append(smape(y_test, y_hat_test_RNN))
        err_MLP_MASE.append(mase(ts[:-fh], y_test, y_hat_test_MLP, freq))
        err_RNN_MASE.append(mase(ts[:-fh], y_test, y_hat_test_RNN, freq))

        # memory handling
        ker.clear_session()
        tf.reset_default_graph()
        gc.collect()

        counter = counter + 1
        #**********************************************************************
        print("-------------TS ID: ", counter, "-------------")
        print(" sMAPE_MLP:",err_MLP_sMAPE[-1], " sMAPE_RNN:",err_RNN_sMAPE[-1]," MASE_MLP:",err_MLP_MASE[-1]," MASE_RNN:",err_RNN_MASE[-1])
        
        p =[err_MLP_sMAPE[-1],err_RNN_sMAPE[-1],err_MLP_MASE[-1],err_RNN_MASE[-1]]
        ds.loc[i] = p        
        ds=ds.round(4)
        if j==0:
            ds.to_csv('out_yearly.csv', mode='a', header=False)
        if j==1:
            ds.to_csv('out_quarterly.csv', mode='a', header=False)
        if j==2:
            ds.to_csv('out_monthly.csv', mode='a', header=False)
        if j==3:
            ds.to_csv('out_weekly.csv', mode='a', header=False)
        if j==4:
            ds.to_csv('out_daily.csv', mode='a', header=False)
        if j==5:
            ds.to_csv('out_hourly.csv', mode='a', header=False)
        #********************************************************************    
    print("\n\n---------FINAL RESULTS---------")
    print("=============sMAPE=============\n")
    print("#### MLP ####\n", np.mean(err_MLP_sMAPE), "\n")
    print("#### RNN ####\n", np.mean(err_RNN_sMAPE), "\n")
    print("==============MASE=============")
    print("#### MLP ####\n", np.mean(err_MLP_MASE), "\n")
    print("#### RNN ####\n", np.mean(err_RNN_MASE), "\n")
    return np.mean(err_MLP_sMAPE),np.mean(err_RNN_sMAPE),np.mean(err_MLP_MASE),np.mean(err_RNN_MASE)



In [91]:
def main_all():
    
    print("### Load of Dataset  ###")
    df_yearly = pd.read_csv("Yearly-train-20-sample.csv", skiprows=0, index_col =0)
    df_quaterly = pd.read_csv("Quaterly-train-20-sample.csv", skiprows=0, index_col =0)
    df_monthly = pd.read_csv("Monthly-train-20-sample.csv", skiprows=0, index_col =0)
    df_weekly = pd.read_csv("Weekly-train-20-sample.csv", skiprows=0, index_col =0)
    df_daily = pd.read_csv("Daily-train-20-sample.csv", skiprows=0, index_col =0)
    df_hourly = pd.read_csv("Hourly-train-20-sample.csv", skiprows=0, index_col =0)
 
    
    D=[]
    D.append(df_yearly)
    D.append(df_quaterly)
    D.append(df_monthly)
    D.append(df_weekly)
    D.append(df_daily)
    D.append(df_hourly)
    
    columnsname= ["Data_Type","sMape MLP","sMape RNN","Mase MLP","Mase RNN"]
    ds = pd.DataFrame(columns=columnsname )
    ds.to_csv('outpoutM4.csv')
    print(ds.shape)
    
    for i in range (len(D)):
        if i==0:
            print( "*** Beginn of yearly dataset ***")
            a,b,c,d = main(df_yearly,6,1,i)
            p= ["Yearly_data",a,b,c,d]
            ds.iloc[:,i]= p
            ds=ds.round(4)
            ds.to_csv('outpoutM4.csv', mode='a', header=False)
        if i==1:
            print( "*** Beginn of Quarterly dataset ***")
            a,b,c,d = main(df_quaterly,8,4,i)
            p= ["Quarterly_data",a,b,c,d]
            ds.iloc[:,i]= p
            ds=ds.round(4)
            ds.to_csv('outpoutM4.csv', mode='a', header=False)
        if i==2:
            print( "*** Beginn of Monthly dataset ***")
            a,b,c,d = main(df_monthly,18,12,i)
            p= ["Monthly_data",a,b,c,d]
            ds.iloc[:,i]= p
            ds=ds.round(4)
            ds.to_csv('outpoutM4.csv', mode='a', header=False)
        if i==3:
            print( "*** Beginn of Weekly dataset ***")
            a,b,c,d = main(df_weekly,13,1,i)
            p= ["Weekly_data",a,b,c,d]
            ds.iloc[:,i]= p
            ds=ds.round(4)
            ds.to_csv('outpoutM4.csv', mode='a', header=False)
        if i==4:
            print( "*** Beginn of Daily dataset ***")
            a,b,c,d = main(df_daily,14,1,i)
            p= ["Daily_data",a,b,c,d]
            ds.iloc[:,i]= p
            ds=ds.round(4)
            ds.to_csv('outpoutM4.csv', mode='a', header=False)
        if i==5:
            print( "*** Beginn of Hourly dataset ***")
            a,b,c,d = main(df_hourly,48,24,i)
            p= ["Hourly_data",a,b,c,d]
            ds.iloc[:,i]= p
            ds=ds.round(4)
            ds.to_csv('outputM4.csv', mode='a', header=False)
    print("Done")

In [None]:

main_all()

### Load of Dataset  ###
(0, 5)
*** Beginn of yearly dataset ***
csv yearly


	Series.rolling(window=1,center=True).mean()
  return getattr(obj, method)(*args, **kwds)


-------------TS ID:  1 -------------
 sMAPE_MLP: 0.3584734932245481  sMAPE_RNN: 0.34604233338508955  MASE_MLP: 2.680517724453939  MASE_RNN: 2.5889679764725253


	Series.rolling(window=1,center=True).mean()
	Series.rolling(window=2,center=True).mean()


-------------TS ID:  2 -------------
 sMAPE_MLP: 0.030585986477033176  sMAPE_RNN: 0.0692723850250385  MASE_MLP: 1.2482914646629053  MASE_RNN: 2.851249868660566
-------------TS ID:  3 -------------
 sMAPE_MLP: 0.12998418798721378  sMAPE_RNN: 0.13065152157423054  MASE_MLP: 3.1001503090768145  MASE_RNN: 3.117402232776988
-------------TS ID:  4 -------------
 sMAPE_MLP: 0.3783366435174133  sMAPE_RNN: 0.47314249531299035  MASE_MLP: 1.2971383824349823  MASE_RNN: 1.4315969078463675
-------------TS ID:  5 -------------
 sMAPE_MLP: 0.13433316870620335  sMAPE_RNN: 0.1391794917602344  MASE_MLP: 4.064270428560452  MASE_RNN: 4.220181049200242
-------------TS ID:  6 -------------
 sMAPE_MLP: 0.14085679310652263  sMAPE_RNN: 0.15177554142452535  MASE_MLP: 3.1093496676625803  MASE_RNN: 3.3664876502774352
-------------TS ID:  7 -------------
 sMAPE_MLP: 0.07103948454418395  sMAPE_RNN: 0.07569926830360359  MASE_MLP: 0.5425244448022446  MASE_RNN: 0.5780039552359858
-------------TS ID:  8 -------------
 sM

	Series.rolling(window=4,center=True).mean()


-------------TS ID:  1 -------------
 sMAPE_MLP: 0.43272042873078564  sMAPE_RNN: 0.3731750111193118  MASE_MLP: 1.8431797523115816  MASE_RNN: 1.5607421747590629
-------------TS ID:  2 -------------
 sMAPE_MLP: 0.04161283249651398  sMAPE_RNN: 0.04416788385290991  MASE_MLP: 1.0643176148149038  MASE_RNN: 1.1307119126562701
-------------TS ID:  3 -------------
 sMAPE_MLP: 0.029138697139699295  sMAPE_RNN: 0.028589863922241956  MASE_MLP: 0.6674491967647515  MASE_RNN: 0.6542995371550665
-------------TS ID:  4 -------------
 sMAPE_MLP: 0.07620626221998061  sMAPE_RNN: 0.07420895096751665  MASE_MLP: 2.604341216885588  MASE_RNN: 2.5383658003637177
-------------TS ID:  5 -------------
 sMAPE_MLP: 0.41567144792321903  sMAPE_RNN: 0.271072622861239  MASE_MLP: 1.0547171828965671  MASE_RNN: 0.6393189295263043


	Series.rolling(window=4,center=True).mean()


-------------TS ID:  6 -------------
 sMAPE_MLP: 0.09828995184944048  sMAPE_RNN: 0.11454581638029215  MASE_MLP: 1.918959719793446  MASE_RNN: 2.255342981349508
-------------TS ID:  7 -------------
 sMAPE_MLP: 0.04011883875549932  sMAPE_RNN: 0.05388042501942721  MASE_MLP: 1.4502404277347836  MASE_RNN: 2.0360953406950015
-------------TS ID:  8 -------------
 sMAPE_MLP: 0.21942108927918522  sMAPE_RNN: 0.20507100694955133  MASE_MLP: 1.1822624975999547  MASE_RNN: 1.0943500312581897
-------------TS ID:  9 -------------
 sMAPE_MLP: 0.07663798712893333  sMAPE_RNN: 0.055196854988789394  MASE_MLP: 3.4818066176736964  MASE_RNN: 2.5462502017161888
-------------TS ID:  10 -------------
 sMAPE_MLP: 0.08764794972896894  sMAPE_RNN: 0.11561908522931295  MASE_MLP: 0.05059369611883428  MASE_RNN: 0.0659406846673696
-------------TS ID:  11 -------------
 sMAPE_MLP: 0.0658972609979439  sMAPE_RNN: 0.07352690474300785  MASE_MLP: 0.7911585675387736  MASE_RNN: 0.88515314857998
-------------TS ID:  12 -----------

	Series.rolling(window=12,center=True).mean()


-------------TS ID:  1 -------------
 sMAPE_MLP: 0.1222023519496226  sMAPE_RNN: 0.022324081037367376  MASE_MLP: 2.2457725152535035  MASE_RNN: 0.43572722115552404
-------------TS ID:  2 -------------
 sMAPE_MLP: 0.20653082514327367  sMAPE_RNN: 0.20199249198549188  MASE_MLP: 0.9067427515370637  MASE_RNN: 0.8853950111497115
-------------TS ID:  3 -------------
 sMAPE_MLP: 0.12340138064088323  sMAPE_RNN: 0.053756827224342024  MASE_MLP: 3.8732216720335546  MASE_RNN: 1.6335572011066883
-------------TS ID:  4 -------------
 sMAPE_MLP: 0.1271520069616712  sMAPE_RNN: 0.1300946991244107  MASE_MLP: 1.5801414789024246  MASE_RNN: 1.6176461538051166
-------------TS ID:  5 -------------
 sMAPE_MLP: 0.06968242214192484  sMAPE_RNN: 0.06319577918212424  MASE_MLP: 0.9920981218513315  MASE_RNN: 0.9040777551140312
-------------TS ID:  6 -------------
 sMAPE_MLP: 0.08293724697249488  sMAPE_RNN: 0.09410571831245636  MASE_MLP: 1.0988610035468906  MASE_RNN: 1.2532204245852283
-------------TS ID:  7 -----------

	Series.rolling(window=12,center=True).mean()


-------------TS ID:  16 -------------
 sMAPE_MLP: 0.6553015732459506  sMAPE_RNN: 0.6897563298692648  MASE_MLP: 5.809239024939545  MASE_RNN: 2.5894318829798593
-------------TS ID:  17 -------------
 sMAPE_MLP: 0.04520069406687073  sMAPE_RNN: 0.043658267862219644  MASE_MLP: 1.0417416524349636  MASE_RNN: 1.006989520170323
-------------TS ID:  18 -------------
 sMAPE_MLP: 0.021682149685438263  sMAPE_RNN: 0.02128331346888869  MASE_MLP: 2.040224152041268  MASE_RNN: 2.0079256112450565
-------------TS ID:  19 -------------
 sMAPE_MLP: 0.04849577034148117  sMAPE_RNN: 0.013158083995194397  MASE_MLP: 2.7599101389660285  MASE_RNN: 0.727858895316883
-------------TS ID:  20 -------------
 sMAPE_MLP: 0.03268029997590299  sMAPE_RNN: 0.012614943696302337  MASE_MLP: 1.8938932553129093  MASE_RNN: 0.7240218065899355


---------FINAL RESULTS---------

#### MLP ####
 0.14699267077543907 

#### RNN ####
 0.13399612975238795 

#### MLP ####
 2.004132024837385 

#### RNN ####
 1.2319844833538438 

*** Beginn o