In [None]:
import random
import numpy as np
import tensorflow as tf

seed_value= 1111
random.seed(seed_value)
np.random.seed(seed_value)
tf.set_random_seed(seed_value)

import warnings
import sys
import os
import pandas as pd
from keras.optimizers import Adam
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
# from sklearn.metrics import mean_absolute_percentage_error
from keras import backend as K
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras.models import Sequential,Model
from keras.layers import Input,LSTM, Dense, Flatten, Conv1D, Lambda, Reshape, RepeatVector
from keras.layers.merge import concatenate, multiply,add
from keras import regularizers
from keras.initializers import glorot_uniform
from tqdm import tqdm
from keras import regularizers
from keras.models import load_model
import datetime
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from copy import deepcopy
from scipy import stats
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
import random as python_random

In [None]:
# utility function to make input window for training and test sets

def make_input(data,window_size,horizon=1):
    length=data.shape[0]
    y = np.zeros([length-window_size+1-horizon,horizon])
    output=np.zeros([length-window_size+1-horizon,window_size])
    for i in range(length-window_size-horizon+1):
        output[i:i+1,:]=data[i:i+window_size]
        y[i,:]= data[i+window_size:i+window_size+horizon]
    return output.reshape(output.shape[0],window_size), y

def make_k_input(data,horizon):
    length = data.shape[0]
    output= np.zeros([length+1-horizon,horizon])
    for i in range(length-window_size-horizon+1):
        output[i:i+1,:]=data[i:i+horizon]
    return output.reshape(output.shape[0],horizon)

def nonov_make_input(data,window_size,horizon=1):
    length=data.shape[0]-window_size
    loop=length//horizon
    extra = length%horizon

    data = np.append(data,np.zeros([horizon-extra]))

    if extra ==0:
        i_val = loop
    else:
        i_val=loop+1
        
    output=np.zeros([i_val,window_size])
    y=np.zeros([i_val,horizon])
    for i in range(i_val):
        output[i:i+1,:]=data[i*horizon:(i*horizon)+window_size]
        y[i,:]= data[(i*horizon)+window_size:(i*horizon)+window_size+horizon]
        
    return output.reshape(output.shape[0],window_size), y

def nonov_make_k_input(data,horizon):
    length = data.shape[0]
    loop=length//horizon
    extra = length%horizon
    data_app = np.repeat(data[-1],(horizon-extra))
    data = np.append(data,data_app)    

    if extra ==0:
        i_val = loop
    else:
        i_val=loop+1
    output=np.zeros([i_val,horizon])
    for i in range(i_val):
        output[i:i+1,:]=data[(i*horizon):(i*horizon)+horizon]
    return output.reshape(output.shape[0],horizon)


def metrics(pred,gt):
    l = pred.shape[1]
#     print(l)
    err_mse = np.zeros((l))
    err_mae = np.zeros((l))

    for i in range(l):
        err_mse[i] = mse(pred[:,i],gt[:,i])
        err_mae[i] = mae(pred[:,i],gt[:,i])
        
    return np.sqrt(np.mean(err_mse)),np.mean(err_mae)

In [None]:
#------------------variables to change according to dataset-------------------------

dataset = "traffic" # can be chosen from ["traffic","nasdaq","energy"]

# for traffic, horizon can be choosen from [3,6,9] for other datasets horizon value can be chosen [3,6,12]
horizon = 9

knowledge_pred_path ='knowledge_preds/'+dataset+'/horizon_'+str(horizon)+'/t_preds.csv' # according to directory containing knowledge predictions
data_path = '/data/'+dataset # according to path containing data 


In [None]:
#----------------------vairables initialization according to dataset---------------------


if dataset=="traffic":
    window_size = 12   
    data = np.asarray(pd.read_csv(data_path+'.csv',header=None))
    knowledge_preds = np.asarray(pd.read_csv(knowledge_pred_path,header=None))
    n_val=2880         # index from where validation set starts
    n_test=1440        # index from where test set starts
    data_length = data.shape[1]
    t_size=data.shape[0]
    output = np.zeros((n_test,data_length))
    final_in_train = np.zeros([1,window_size,1])
    final_in_val =final_in_train
    final_in_test=final_in_train
    final_lbl_train = np.zeros([1,horizon])
    final_lbl_val = final_lbl_train
    final_lbl_test = final_lbl_train
    final_p_train = final_lbl_train
    final_p_val= final_p_train
    final_p_test = final_p_train
elif dataset=="nasdaq":
    window_size = 180
    data = np.asarray(pd.read_csv(data_path+'.csv'))
    knowledge_preds = np.asarray(pd.read_csv(knowledge_pred_path,header=None))
    n_val = 4056       # index from where validation set starts
    n_test = 2028      # index from where validation set starts
    data_length = data.shape[1]
    t_size=data.shape[0]
    output = np.zeros((n_test,data_length))
    final_in_train = np.zeros([1,window_size,1])
    final_in_val =final_in_train
    final_in_test=final_in_train
    final_lbl_train = np.zeros([1,horizon])
    final_lbl_val = final_lbl_train
    final_lbl_test = final_lbl_train
    final_p_train = final_lbl_train
    final_p_val= final_p_train
    final_p_test = final_p_train
    
else:
    window_size = 144    
    data = np.asarray(pd.read_csv(data_path+'.txt',header=None))
    knowledge_preds = np.asarray(pd.read_csv(knowledge_pred_path,header=None))
    n_val = 3947       # index from where validation set starts
    n_test = 1973      # index from where validation set starts
    data_length = data.shape[1]
    t_size=data.shape[0]
    output = np.zeros((n_test,data_length))
    final_in_train = np.zeros([1,window_size,1])
    final_in_val =final_in_train
    final_in_test=final_in_train
    final_lbl_train = np.zeros([1,horizon])
    final_lbl_val = final_lbl_train
    final_lbl_test = final_lbl_train
    final_p_train = final_lbl_train
    final_p_val= final_p_train
    final_p_test = final_p_train



In [None]:
#--------------Data preprocessing and preparation--------------------------

if dataset =="traffic":

    with tqdm(total=data_length) as pbar:
        for i in range(data_length):
            current_row= data[:,i]


            train = current_row[:-n_val]
            val = current_row[-(n_val+window_size):-n_test]
            test = current_row[-(n_test+window_size):]
            train_sequence = make_input(train, window_size,horizon)
            val_sequence = make_input(val,window_size,horizon)
            test_sequence = nonov_make_input(test,window_size,horizon)

            temp_train_x=train_sequence[0]
            temp_train_y=train_sequence[1]
            final_in_train =np.append(final_in_train,temp_train_x.reshape(temp_train_x.shape[0],temp_train_x.shape[1],1),axis=0)
            final_lbl_train = np.append(final_lbl_train,temp_train_y,axis=0)

            temp_val_x=val_sequence[0]
            temp_val_y=val_sequence[1] 
            final_in_val =np.append(final_in_val,temp_val_x.reshape(temp_val_x.shape[0],temp_val_x.shape[1],1),axis=0)
            final_lbl_val = np.append(final_lbl_val,temp_val_y,axis=0)

            temp_test_x=test_sequence[0]
            temp_test_y=test_sequence[1]
            final_in_test =np.append(final_in_test,temp_test_x.reshape(temp_test_x.shape[0],temp_test_x.shape[1],1),axis=0)
            final_lbl_test = np.append(final_lbl_test,temp_test_y,axis=0)

            current_pred= knowledge_preds[:,i]
            series_p=current_pred
            series_pred=series_p[:-n_test]    
            train_p = series_pred[:-n_val]                                        
            val_p = series_pred[-n_val:]
            test_p = series_p[-n_test:]
            train_pred = make_k_input(train_p,horizon)
            val_pred = make_k_input(val_p,horizon)
            test_pred = nonov_make_k_input(test_p,horizon)

            temp_train_p_x=train_pred
            final_p_train =np.append(final_p_train,temp_train_p_x,axis=0)

            temp_val_p_x=val_pred
            final_p_val =np.append(final_p_val,temp_val_p_x,axis=0)

            temp_test_p_x=test_pred
            final_p_test =np.append(final_p_test,temp_test_p_x,axis=0)

            pbar.update(1)
    final_in_train = final_in_train[1:,:,:]
    final_in_val = final_in_val[1:,:,:]
    final_in_test = final_in_test[1:,:,:]
    final_lbl_train = final_lbl_train[1:,:]
    final_lbl_val = final_lbl_val[1:,:]
    final_lbl_test = final_lbl_test[1:,:]
    final_p_train = final_p_train[1:,:]
    final_p_val = final_p_val[1:,:]
    final_p_test = final_p_test[1:,:]
else:
    
    with tqdm(total=data_length) as pbar:
        for i in range(data_length):
            current_row= data[:,i]

            train = current_row[:-n_val]
            val = current_row[-(n_val+window_size):-n_test]
            test = current_row[-(n_test+window_size):]
            train_sequence = make_input(train, window_size,horizon)
            val_sequence = make_input(val,window_size,horizon)
            test_sequence = nonov_make_input(test,window_size,horizon)

            temp_train_x=train_sequence[0]    
            min_in = temp_train_x.min(1).reshape(temp_train_x.shape[0],1)
            max_in = temp_train_x.max(1).reshape(temp_train_x.shape[0],1)
            denom = (max_in-min_in)
            a = np.where(denom == 0)[0]
            denom[a] = max_in[a] 
            a = np.where(denom == 0)[0]
            if a.size >0:
                denom[a]=1
            temp_train_x = (temp_train_x-min_in)/denom
            temp_train_x=temp_train_x.reshape(temp_train_x.shape[0],temp_train_x.shape[1],1)
            temp_train_y=(train_sequence[1]-min_in)/denom
            final_in_train =np.append(final_in_train,temp_train_x,axis=0)
            final_lbl_train = np.append(final_lbl_train,temp_train_y,axis=0)

            temp_val_x=val_sequence[0]    
            min_in = temp_val_x.min(1).reshape(temp_val_x.shape[0],1)
            max_in = temp_val_x.max(1).reshape(temp_val_x.shape[0],1)
            denom = (max_in-min_in)
            a = np.where(denom == 0)[0]
            denom[a] = max_in[a] 
            temp_val_x = (temp_val_x-min_in)/denom 
            temp_val_x=temp_val_x.reshape(temp_val_x.shape[0],temp_val_x.shape[1],1)
            temp_val_y=(val_sequence[1]-min_in)/denom
            final_in_val =np.append(final_in_val,temp_val_x,axis=0)
            final_lbl_val = np.append(final_lbl_val,temp_val_y,axis=0)

            temp_test_x=test_sequence[0]    
            min_in_test = temp_test_x.min(1).reshape(temp_test_x.shape[0],1)
            max_in_test = temp_test_x.max(1).reshape(temp_test_x.shape[0],1)
            denom_test = (max_in_test-min_in_test)
            a = np.where(denom_test == 0)[0]
            denom_test[a] = max_in_test[a] 
            temp_test_x = (temp_test_x-min_in_test)/denom_test
            temp_test_x=temp_test_x.reshape(temp_test_x.shape[0],temp_test_x.shape[1],1)
            temp_test_y=(test_sequence[1]-min_in_test)/denom_test

            final_in_test =np.append(final_in_test,temp_test_x,axis=0)
            final_lbl_test = np.append(final_lbl_test,temp_test_y,axis=0)

            current_pred= knowledge_preds[:(t_size-window_size),i]
            train_p = current_pred[:-n_val]                                        
            val_p = current_pred[-n_val:-n_test]
            test_p = current_pred[-n_test:]
            train_pred = make_k_input(train_p,horizon)
            val_pred = make_k_input(val_p,horizon)
            test_pred = nonov_make_k_input(test_p,horizon)

            temp_train_p_x=train_pred
            min_in = temp_train_p_x.min(1).reshape(temp_train_p_x.shape[0],1)
            max_in = temp_train_p_x.max(1).reshape(temp_train_p_x.shape[0],1)
            denom = (max_in-min_in)
            a = np.where(denom == 0)[0]
            denom[a] = max_in[a] #--------------------------------------check
            a = np.where(denom == 0)[0]
            if len(a)>0:
                denom[a]=1
            temp_train_p_x = (temp_train_p_x-min_in)/denom 
            temp_train_p_x[a] =0.5

            final_p_train =np.append(final_p_train,temp_train_p_x,axis=0)

            temp_val_p_x=val_pred
            min_in = temp_val_p_x.min(1).reshape(temp_val_p_x.shape[0],1)
            max_in = temp_val_p_x.max(1).reshape(temp_val_p_x.shape[0],1)
            denom = (max_in-min_in)
            a = np.where(denom == 0)[0]
            denom[a] = max_in[a] #--------------------------------------check
            a = np.where(denom == 0)[0]
            if len(a)>0:
                denom[a]=1
            temp_val_p_x = (temp_val_p_x-min_in)/denom
            temp_val_p_x[a] = 0.5

            final_p_val =np.append(final_p_val,temp_val_p_x,axis=0)

            temp_test_p_x=test_pred
            min_in = temp_test_p_x.min(1).reshape(temp_test_p_x.shape[0],1)
            max_in = temp_test_p_x.max(1).reshape(temp_test_p_x.shape[0],1)
            denom = (max_in-min_in)
            a = np.where(denom == 0)[0]
            denom[a] = max_in[a] #--------------------------------------check
            a = np.where(denom == 0)[0]
            if len(a)>0:
                denom[a]=1
            temp_test_p_x = (temp_test_p_x-min_in)/denom
            temp_test_p_x[a] = 0.5



            final_p_test =np.append(final_p_test,temp_test_p_x,axis=0)

            pbar.update(1)
    final_in_train = final_in_train[1:,:,:]
    final_in_val = final_in_val[1:,:,:]
    final_in_test = final_in_test[1:,:,:]
    final_lbl_train = final_lbl_train[1:,:]
    final_lbl_val = final_lbl_val[1:,:]
    final_lbl_test = final_lbl_test[1:,:]
    final_p_train = final_p_train[1:,:]
    final_p_val = final_p_val[1:,:]
    final_p_test = final_p_test[1:,:]

In [None]:
tf.reset_default_graph()
K.clear_session()  
encoder=load_model('/DeepLSF_models/'+dataset+'/horizon_'+str(horizon)+'/encoder_h'+str(horizon)+'.h5',compile=False)
model=load_model('/DeepLSF_models/'+dataset+'/horizon_'+str(horizon)+'/'+str(horizon)+'.h5')

In [None]:
model.summary()

In [None]:
#---------------------test set eva


for i in range(data_length):
    
    if dataset=="traffic":
        current_row= data[:,i]
        series_d = current_row
        test = series_d[-(n_test+window_size):]
        test_sequence = nonov_make_input(test,window_size,horizon)

        temp_test_x=test_sequence[0]
        temp_test_y=test_sequence[1]


        current_pred= knowledge_preds[:,i]
        
        test_p = current_pred[-n_test:]
        test_k_pred = nonov_make_k_input(test_p,horizon)        
        temp_auto_test_p=np.array(encoder.predict(test_k_pred))

        pred = model.predict({'input_data':temp_test_x.reshape(temp_test_x.shape[0],temp_test_x.shape[1],1), 'input_pred':temp_auto_test_p})
   
        prediction = pred.flatten()

        output[:,i]=np.transpose(prediction)   
        
        
    else:
        current_row= data[:,i]
        series_d = current_row
        test = series_d[-(n_test+window_size):]
        test_sequence = nonov_make_input(test,window_size,horizon)

        temp_test_x=test_sequence[0]    

        min_in_test = temp_test_x.min(1).reshape(temp_test_x.shape[0],1)
        max_in_test = temp_test_x.max(1).reshape(temp_test_x.shape[0],1)
        denom_test = (max_in_test-min_in_test)
        a = np.where(denom_test == 0)[0]
        denom_test[a] = max_in_test[a]  #----------------------------------check
        temp_test_x = (temp_test_x-min_in_test)/denom_test
        temp_test_x=temp_test_x.reshape(temp_test_x.shape[0],temp_test_x.shape[1],1)
        temp_test_y=(test_sequence[1]-min_in_test)/denom_test

        current_pred= knowledge_preds[:,i]
        
        test_p = current_pred[-n_test:]
        test_k_pred = nonov_make_k_input(test_p,horizon)

        temp_test_p_x=test_k_pred
        min_in = temp_test_p_x.min(1).reshape(temp_test_p_x.shape[0],1)
        max_in = temp_test_p_x.max(1).reshape(temp_test_p_x.shape[0],1)
        denom = (max_in-min_in)
        a = np.where(denom == 0)[0]
        denom[a] = max_in[a] #--------------------------------------check
        a = np.where(denom == 0)[0]
        if len(a)>0:
            denom[a]=1
        temp_test_p_x = (temp_test_p_x-min_in)/denom
        temp_test_p_x[a] = 0.5

        temp_auto_test_p=np.array(encoder.predict(temp_test_p_x))

        pred = model.predict({'input_data':temp_test_x, 'input_pred':temp_auto_test_p})
        prediction = pred*(max_in_test-min_in_test)+min_in_test
        prediction = prediction.flatten()[:n_test]

        output[:,i]=np.transpose(prediction)

In [None]:
temp1, temp2= metrics(output,data[-n_test:,:])
[temp1,temp2]