In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM, GRU
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import load_model
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import time

In [None]:
lst_users = [14995, 15044, 15051, 15052, 15110, 15115, 15151, 15159, 15170, 15171, 15173,
                15208, 15241, 15258, 15280, 15335, 15405, 15419, 15445, 15448, 15482, 15510,
                15588, 15729, 18001, 23737, 23743, 23753, 23756, 23768, 23769, 23770, 23785,
                23787, 23800, 23829, 23867, 23871, 23875, 23878, 23881, 23883, 23889, 23893,
                23897, 23906, 23913, 23914, 23915, 23919, 23921, 23924, 23926, 23935, 23936,
                23961, 23983, 24030, 24039, 24040, 24079, 24109, 24120, 24124, 24127, 24167,
                24185, 24193, 24195, 24196, 24197, 24198, 24204, 24207, 24212, 24225, 24226,
                24227, 24235, 24246, 24248, 24324, 24346, 24350, 24461, 24467, 25163, 25164,
                25169, 25170, 25171, 25174, 25175, 25178, 25180, 25189, 25250, 25461, 25522,
                24081, 24203, 24249, 24612, 25498]
lookback_ranges = [5, 10, 20]
neurons = [64, 128, 256]
batch_sizes = [64, 128, 256]


In [None]:
#Determine split
def splits(dataset):
    instances_256 = len(dataset)/256
    train_split = round(instances_256*0.8)*256
    val_split = round(instances_256*0.9)*256
    return train_split, val_split

In [None]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=5):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), :-1]
        dataX.append(a)
        dataY.append(dataset[i + look_back, -1])
    return np.array(dataX), np.array(dataY)

In [None]:
count = 1
cols = ['user', 'user_length', 'train_length', 'val_length', 'test_length']
for user in lst_users1:
    print(user)
    out = [user]

    dataframe = pd.read_pickle('Users_prepped/All/filtered/{}_final'.format(str(user)))
    dataframe = dataframe[list(dataframe.columns)[1:]]
    cols2 = [col for col in dataframe.columns if col != 'battery']
    dataframe = dataframe[cols2]
    dataset = dataframe.values
    dataset = dataset.astype('float32')
    
    #Scale Features
    scaler = MinMaxScaler(feature_range=(0, 1))
    arrX = dataset[:, :-1]
    arrY = dataset[:, -1]
    arrY = arrY.reshape(arrY.shape[0], 1)
    arrX = scaler.fit_transform(arrX)
    dataset = np.concatenate((arrX, arrY), axis = 1)
    
    #split dataset
    train_split, val_split = splits(dataset)
    data_train = dataset[:train_split]
    data_val = dataset[train_split:val_split]
    data_test = dataset[val_split:]
    
    out.append(dataset.shape[0])
    out.append(data_train.shape[0])
    out.append(data_val.shape[0])
    out.append(data_test.shape[0])
    
    if count == 0:
        print(dataset.shape[0])
        print(data_train.shape[0])
        print(data_val.shape[0])
        print(data_test.shape[0])
    
    for lookback_range in lookback_ranges:
        #reshape into X=t and Y=t+1
        trainX, trainY = create_dataset(data_train, lookback_range)
        valX, valY = create_dataset(data_val, lookback_range)
        testX, testY = create_dataset(data_test, lookback_range)
        for n in neurons:
            for batch_size in batch_sizes:
                print('LookBack: {}, Neurons: {}, Batch_size:{}'.format(lookback_range, n, batch_size))
                
                model = Sequential()
                model.add(LSTM(n, input_shape=trainX.shape[1:], return_sequences = False, dropout =0.4))
                model.add(Dense(1))
                model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse'])
                
                es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5, min_delta=100)
                mc = ModelCheckpoint('RQ4_LSTM_best_model.h5', monitor='val_mse', mode='min', verbose=1, save_best_only=True)
                
                start_time = time.time()
                model.fit(trainX, trainY, epochs=100, batch_size=batch_size, verbose=1, validation_data=(valX, valY), callbacks=[es, mc])
                training_time = time.time()-start_time
                
                saved_model = load_model('RQ4_LSTM_best_model.h5')
                
                trainPredict = saved_model.predict(trainX)
                valPredict = saved_model.predict(valX)
                testPredict = saved_model.predict(testX)
                
                trainScore = np.sqrt(mean_squared_error(trainY[:], trainPredict[:,0]))
                valScore = np.sqrt(mean_squared_error(valY[:], valPredict[:,0]))
                testScore = np.sqrt(mean_squared_error(testY[:], testPredict[:,0]))
                testMAE = mean_absolute_error(testY[:], testPredict[:,0])
                
                saved_model.save('RQ4_LSTM_models/RQ4_LSTM_{}_L{}_N{}_B{}'.format(user, lookback_range, n, batch_size))
                
                out.append(round(training_time,4))
                out.append(trainScore)
                out.append(valScore)
                out.append(testScore)
                out.append(testMAE)
                
                if count == 0:
                    cols.append('time_l{}_n{}_b{}'.format(lookback_range, n, batch_size))
                    cols.append('trainRMSE_l{}_n{}_b{}'.format(lookback_range, n, batch_size))
                    cols.append('valRMSE_l{}_n{}_b{}'.format(lookback_range, n, batch_size))
                    cols.append('testRMSE_l{}_n{}_b{}'.format(lookback_range, n, batch_size))
                    cols.append('testMAE_l{}_n{}_b{}'.format(lookback_range, n, batch_size))
                with open('RQ4_LSTM_docu/RQ4_LSTM_{}.txt'.format(user), 'a') as docu_file:
                    docu_file.write("L: {}, N: {}, B:{}, time:{}, train_score: {}, val_score: {}, test_score: {}, test_mae: {} \n".format(lookback_range, n, 
                                                                                                                            batch_size, 
                                                                                                                            training_time, 
                                                                                                                            trainScore, 
                                                                                                                            valScore, 
                                                                                                                            testScore, testMAE))
                    
    if count == 0:
        df_out = pd.DataFrame(out).T
        df_out.set_axis(cols, axis = 1, inplace = True)
        count += 1
    else:
        df_out = pd.read_pickle('RQ4_LSTM_docu/df_results_RQ4_LSTM')
        df_out.loc[len(df_out)] = out
    df_out.to_pickle('RQ4_LSTM_docu/df_results_RQ4_LSTM')

In [None]:
count = 1
cols = ['user', 'user_length', 'train_length', 'val_length', 'test_length']
for user in lst_users1:
    print(user)
    out = [user]

    dataframe = pd.read_pickle('Users_prepped/All/filtered/{}_final'.format(str(user)))
    dataframe = dataframe[list(dataframe.columns)[1:]]
    cols2 = [col for col in dataframe.columns if col != 'battery']
    dataframe = dataframe[cols2]
    dataset = dataframe.values
    dataset = dataset.astype('float32')
    
    #Scale Features
    scaler = MinMaxScaler(feature_range=(0, 1))
    arrX = dataset[:, :-1]
    arrY = dataset[:, -1]
    arrY = arrY.reshape(arrY.shape[0], 1)
    arrX = scaler.fit_transform(arrX)
    dataset = np.concatenate((arrX, arrY), axis = 1)
    
    #split dataset
    train_split, val_split = splits(dataset)
    data_train = dataset[:train_split]
    data_val = dataset[train_split:val_split]
    data_test = dataset[val_split:]
    
    out.append(dataset.shape[0])
    out.append(data_train.shape[0])
    out.append(data_val.shape[0])
    out.append(data_test.shape[0])
    
    if count == 0:
        print(dataset.shape[0])
        print(data_train.shape[0])
        print(data_val.shape[0])
        print(data_test.shape[0])
    
    for lookback_range in lookback_ranges:
        #reshape into X=t and Y=t+1
        trainX, trainY = create_dataset(data_train, lookback_range)
        valX, valY = create_dataset(data_val, lookback_range)
        testX, testY = create_dataset(data_test, lookback_range)
        for n in neurons:
            for batch_size in batch_sizes:
                print('LookBack: {}, Neurons: {}, Batch_size:{}'.format(lookback_range, n, batch_size))
                
                model = Sequential()
                model.add(GRU(n, input_shape=trainX.shape[1:], return_sequences = False, dropout =0.4))
                model.add(Dense(1))
                model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse'])
                
                es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5, min_delta=100)
                mc = ModelCheckpoint('RQ4_GRU_best_model.h5', monitor='val_mse', mode='min', verbose=1, save_best_only=True)
                
                start_time = time.time()
                model.fit(trainX, trainY, epochs=100, batch_size=batch_size, verbose=1, validation_data=(valX, valY), callbacks=[es, mc])
                training_time = time.time()-start_time
                
                saved_model = load_model('RQ4_GRU_best_model.h5')
                
                trainPredict = saved_model.predict(trainX)
                valPredict = saved_model.predict(valX)
                testPredict = saved_model.predict(testX)
                
                trainScore = np.sqrt(mean_squared_error(trainY[:], trainPredict[:,0]))
                valScore = np.sqrt(mean_squared_error(valY[:], valPredict[:,0]))
                testScore = np.sqrt(mean_squared_error(testY[:], testPredict[:,0]))
                testMAE = mean_absolute_error(testY[:], testPredict[:,0])
                
                saved_model.save('RQ4_GRU_models/RQ4_GRU_{}_L{}_N{}_B{}'.format(user, lookback_range, n, batch_size))
                
                out.append(round(training_time,4))
                out.append(trainScore)
                out.append(valScore)
                out.append(testScore)
                out.append(testMAE)
                
                if count == 0:
                    cols.append('time_l{}_n{}_b{}'.format(lookback_range, n, batch_size))
                    cols.append('trainRMSE_l{}_n{}_b{}'.format(lookback_range, n, batch_size))
                    cols.append('valRMSE_l{}_n{}_b{}'.format(lookback_range, n, batch_size))
                    cols.append('testRMSE_l{}_n{}_b{}'.format(lookback_range, n, batch_size))
                    cols.append('testMAE_l{}_n{}_b{}'.format(lookback_range, n, batch_size))
                with open('RQ4_GRU_docu/RQ4_GRU_{}.txt'.format(user), 'a') as docu_file:
                    docu_file.write("L: {}, N: {}, B:{}, time:{}, train_score: {}, val_score: {}, test_score: {}, test_mae: {} \n".format(lookback_range, n, 
                                                                                                                            batch_size, 
                                                                                                                            training_time, 
                                                                                                                            trainScore, 
                                                                                                                            valScore, 
                                                                                                                            testScore, testMAE))
                    
    if count == 0:
        df_out = pd.DataFrame(out).T
        df_out.set_axis(cols, axis = 1, inplace = True)
        count += 1
    else:
        df_out = pd.read_pickle('RQ4_GRU_docu/df_results_RQ4_GRU')
        df_out.loc[len(df_out)] = out
    df_out.to_pickle('RQ4_GRU_docu/df_results_RQ4_GRU')