In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM, GRU
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import load_model
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import time

In [None]:
lst_users = [25461, 24124, 24030, 24198, 15482, 15171, 23881, 24235, 23743,  
              25169, 23875, 15441, 24007, 25569, 23754, 24192, 24041, 15530, 
              24109, 23768, 23883, 23921, 23769, 23889, 15110, 15373, 18001,  
              15242, 24224, 23917, 25208, 24243, 25557, 15273, 23770, 23884,
              24350, 23926, 23893, 15044, 23961, 24227, 23935, 23737, 23906,  
              23982, 24121, 23909, 17984, 25498, 23738, 23985, 15170, 14995, 
              15115, 15510, 24127, 25175, 25170, 15419, 24324, 24197, 25163, 
              23955, 23740, 24348, 15205, 24343, 23837, 24206, 24079, 23872,
              23871, 15208, 23756, 15729, 23829, 25502, 15280, 24196, 23914,
              24470, 25235, 15270, 25172, 23745, 23899, 15304, 24203, 23984,  
              15335, 15448, 15052, 15159, 24467, 24225, 15405, 25174, 25171,  
              24213, 23822, 23876, 24232, 25197, 23988, 14981, 23753, 25180,
              24039, 23800, 24204, 24248, 23913, 25250, 25178, 24212, 23833, 
              15024, 25167, 15382, 10599, 24125, 23995, 15187, 25201, 25227,
              23900, 25203, 24223, 23953, 15209, 25275, 23981, 25176, 15462,
              24120, 23924, 15173, 23936, 24349, 23915, 15363, 15277, 24226,  
              23878, 15258, 24081, 24207, 23919, 15445, 25189, 15484, 24612,   
              24249, 15444, 25207, 23767, 24033, 23898, 15588, 24071, 24122,
              24087, 25278, 25547, 24346, 24199, 25504, 25164, 24040, 23785, 
              24185, 23787, 25522, 24461, 15051, 24193, 24167, 15151, 23867,
              24246, 15031, 15241, 24083, 23897, 23983, 24195, 23778]
lookback_ranges = [5, 10, 20]
neurons = [64, 128, 256]
batch_sizes = [64, 128, 256]


In [None]:
#Determine split
def splits(dataset):
    instances_256 = len(dataset)/256
    train_split = round(instances_256*0.8)*256
    val_split = round(instances_256*0.9)*256
    return train_split, val_split

In [None]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=5):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), :-1]
        dataX.append(a)
        dataY.append(dataset[i + look_back, -1])
    return np.array(dataX), np.array(dataY)

In [None]:
count = 1
cols = ['user', 'user_length', 'train_length', 'val_length', 'test_length']
for user in lst_users1:
    print(user)
    out = [user]

    dataframe = pd.read_pickle('Users_prepped/All/PerUser/{}_final'.format(str(user)))
    dataframe = dataframe[list(dataframe.columns)[1:]]
    dataset = dataframe.values
    dataset = dataset.astype('float32')
    
    #Scale Features
    scaler = MinMaxScaler(feature_range=(0, 1))
    arrX = dataset[:, :-1]
    arrY = dataset[:, -1]
    arrY = arrY.reshape(arrY.shape[0], 1)
    arrX = scaler.fit_transform(arrX)
    dataset = np.concatenate((arrX, arrY), axis = 1)
    
    #split dataset
    train_split, val_split = splits(dataset)
    data_train = dataset[:train_split]
    data_val = dataset[train_split:val_split]
    data_test = dataset[val_split:]
    
    out.append(dataset.shape[0])
    out.append(data_train.shape[0])
    out.append(data_val.shape[0])
    out.append(data_test.shape[0])
    
    if count == 0:
        print(dataset.shape[0])
        print(data_train.shape[0])
        print(data_val.shape[0])
        print(data_test.shape[0])
    
    for lookback_range in lookback_ranges:
        #reshape into X=t and Y=t+1
        trainX, trainY = create_dataset(data_train, lookback_range)
        valX, valY = create_dataset(data_val, lookback_range)
        testX, testY = create_dataset(data_test, lookback_range)
        for n in neurons:
            for batch_size in batch_sizes:
                print('LookBack: {}, Neurons: {}, Batch_size:{}'.format(lookback_range, n, batch_size))
                
                model = Sequential()
                model.add(LSTM(n, input_shape=trainX.shape[1:], return_sequences = False, dropout =0.4))
                model.add(Dense(1))
                model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse'])
                
                es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5, min_delta=100)
                mc = ModelCheckpoint('RQ1_LSTM_best_model.h5', monitor='val_mse', mode='min', verbose=1, save_best_only=True)
                
                start_time = time.time()
                model.fit(trainX, trainY, epochs=100, batch_size=batch_size, verbose=1, validation_data=(valX, valY), callbacks=[es, mc])
                training_time = time.time()-start_time
                
                saved_model = load_model('RQ1_LSTM_best_model.h5')
                
                trainPredict = saved_model.predict(trainX)
                valPredict = saved_model.predict(valX)
                testPredict = saved_model.predict(testX)
                
                trainScore = np.sqrt(mean_squared_error(trainY[:], trainPredict[:,0]))
                valScore = np.sqrt(mean_squared_error(valY[:], valPredict[:,0]))
                testScore = np.sqrt(mean_squared_error(testY[:], testPredict[:,0]))
                testMAE = mean_absolute_error(testY[:], testPredict[:,0])
                
                saved_model.save('RQ1_LSTM_models/RQ1_LSTM_{}_L{}_N{}_B{}'.format(user, lookback_range, n, batch_size))
                
                out.append(round(training_time,4))
                out.append(trainScore)
                out.append(valScore)
                out.append(testScore)
                out.append(testMAE)
                
                if count == 0:
                    cols.append('time_l{}_n{}_b{}'.format(lookback_range, n, batch_size))
                    cols.append('trainRMSE_l{}_n{}_b{}'.format(lookback_range, n, batch_size))
                    cols.append('valRMSE_l{}_n{}_b{}'.format(lookback_range, n, batch_size))
                    cols.append('testRMSE_l{}_n{}_b{}'.format(lookback_range, n, batch_size))
                    cols.append('testMAE_l{}_n{}_b{}'.format(lookback_range, n, batch_size))
                with open('RQ1_LSTM_docu/RQ1_LSTM_{}.txt'.format(user), 'a') as docu_file:
                    docu_file.write("L: {}, N: {}, B:{}, time:{}, train_score: {}, val_score: {}, test_score: {}, test_mae: {} \n".format(lookback_range, n, 
                                                                                                                            batch_size, 
                                                                                                                            training_time, 
                                                                                                                            trainScore, 
                                                                                                                            valScore, 
                                                                                                                            testScore, testMAE))
                    
    if count == 0:
        df_out = pd.DataFrame(out).T
        df_out.set_axis(cols, axis = 1, inplace = True)
        count += 1
    else:
        df_out = pd.read_pickle('RQ1_LSTM_docu/df_results_RQ1_LSTM')
        df_out.loc[len(df_out)] = out
    df_out.to_pickle('RQ1_LSTM_docu/df_results_RQ1_LSTM')

In [None]:
count = 1
cols = ['user', 'user_length', 'train_length', 'val_length', 'test_length']
for user in lst_users1:
    print(user)
    out = [user]

    dataframe = pd.read_pickle('Users_prepped/All/PerUser/{}_final'.format(str(user)))
    dataframe = dataframe[list(dataframe.columns)[1:]]
    dataset = dataframe.values
    dataset = dataset.astype('float32')
    
    #Scale Features
    scaler = MinMaxScaler(feature_range=(0, 1))
    arrX = dataset[:, :-1]
    arrY = dataset[:, -1]
    arrY = arrY.reshape(arrY.shape[0], 1)
    arrX = scaler.fit_transform(arrX)
    dataset = np.concatenate((arrX, arrY), axis = 1)
    
    #split dataset
    train_split, val_split = splits(dataset)
    data_train = dataset[:train_split]
    data_val = dataset[train_split:val_split]
    data_test = dataset[val_split:]
    
    out.append(dataset.shape[0])
    out.append(data_train.shape[0])
    out.append(data_val.shape[0])
    out.append(data_test.shape[0])
    
    if count == 0:
        print(dataset.shape[0])
        print(data_train.shape[0])
        print(data_val.shape[0])
        print(data_test.shape[0])
    
    for lookback_range in lookback_ranges:
        #reshape into X=t and Y=t+1
        trainX, trainY = create_dataset(data_train, lookback_range)
        valX, valY = create_dataset(data_val, lookback_range)
        testX, testY = create_dataset(data_test, lookback_range)
        for n in neurons:
            for batch_size in batch_sizes:
                print('LookBack: {}, Neurons: {}, Batch_size:{}'.format(lookback_range, n, batch_size))
                
                model = Sequential()
                model.add(GRU(n, input_shape=trainX.shape[1:], return_sequences = False, dropout =0.4))
                model.add(Dense(1))
                model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse'])
                
                es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5, min_delta=100)
                mc = ModelCheckpoint('RQ1_GRU_best_model.h5', monitor='val_mse', mode='min', verbose=1, save_best_only=True)
                
                start_time = time.time()
                model.fit(trainX, trainY, epochs=100, batch_size=batch_size, verbose=1, validation_data=(valX, valY), callbacks=[es, mc])
                training_time = time.time()-start_time
                
                saved_model = load_model('RQ1_GRU_best_model.h5')
                
                trainPredict = saved_model.predict(trainX)
                valPredict = saved_model.predict(valX)
                testPredict = saved_model.predict(testX)
                
                trainScore = np.sqrt(mean_squared_error(trainY[:], trainPredict[:,0]))
                valScore = np.sqrt(mean_squared_error(valY[:], valPredict[:,0]))
                testScore = np.sqrt(mean_squared_error(testY[:], testPredict[:,0]))
                testMAE = mean_absolute_error(testY[:], testPredict[:,0])
                
                saved_model.save('RQ1_GRU_models/RQ1_GRU_{}_L{}_N{}_B{}'.format(user, lookback_range, n, batch_size))
                
                out.append(round(training_time,4))
                out.append(trainScore)
                out.append(valScore)
                out.append(testScore)
                out.append(testMAE)
                
                if count == 0:
                    cols.append('time_l{}_n{}_b{}'.format(lookback_range, n, batch_size))
                    cols.append('trainRMSE_l{}_n{}_b{}'.format(lookback_range, n, batch_size))
                    cols.append('valRMSE_l{}_n{}_b{}'.format(lookback_range, n, batch_size))
                    cols.append('testRMSE_l{}_n{}_b{}'.format(lookback_range, n, batch_size))
                    cols.append('testMAE_l{}_n{}_b{}'.format(lookback_range, n, batch_size))
                with open('RQ1_GRU_docu/RQ1_GRU_{}.txt'.format(user), 'a') as docu_file:
                    docu_file.write("L: {}, N: {}, B:{}, time:{}, train_score: {}, val_score: {}, test_score: {}, test_mae: {} \n".format(lookback_range, n, 
                                                                                                                            batch_size, 
                                                                                                                            training_time, 
                                                                                                                            trainScore, 
                                                                                                                            valScore, 
                                                                                                                            testScore, testMAE))
                    
    if count == 0:
        df_out = pd.DataFrame(out).T
        df_out.set_axis(cols, axis = 1, inplace = True)
        count += 1
    else:
        df_out = pd.read_pickle('RQ1_GRU_docu/df_results_RQ1_GRU')
        df_out.loc[len(df_out)] = out
    df_out.to_pickle('RQ1_GRU_docu/df_results_RQ1_GRU')