In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM, GRU
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import load_model
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import time
import random

In [None]:
lst_complete = [25461, 24124, 24030, 24198, 15482, 15171, 23881, 24235, 23743,  
              25169, 23875, 15441, 24007, 25569, 23754, 24192, 24041, 15530, 
              24109, 23768, 23883, 23921, 23769, 23889, 15110, 15373, 18001,  
              15242, 24224, 23917, 25208, 24243, 25557, 15273, 23770, 23884,
              24350, 23926, 23893, 15044, 23961, 24227, 23935, 23737, 23906,  
              23982, 24121, 23909, 17984, 25498, 23738, 23985, 15170, 14995, 
              15115, 15510, 24127, 25175, 25170, 15419, 24324, 24197, 25163, 
              23955, 23740, 24348, 15205, 24343, 23837, 24206, 24079, 23872,
              23871, 15208, 23756, 15729, 23829, 25502, 15280, 24196, 23914,
              24470, 25235, 15270, 25172, 23745, 23899, 15304, 24203, 23984,  
              15335, 15448, 15052, 15159, 24467, 24225, 15405, 25174, 25171,  
              24213, 23822, 23876, 24232, 25197, 23988, 14981, 23753, 25180,
              24039, 23800, 24204, 24248, 23913, 25250, 25178, 24212, 23833, 
              15024, 25167, 15382, 10599, 24125, 23995, 15187, 25201, 25227,
              23900, 25203, 24223, 23953, 15209, 25275, 23981, 25176, 15462,
              24120, 23924, 15173, 23936, 24349, 23915, 15363, 15277, 24226,  
              23878, 15258, 24081, 24207, 23919, 15445, 25189, 15484, 24612,   
              24249, 15444, 25207, 23767, 24033, 23898, 15588, 24071, 24122,
              24087, 25278, 25547, 24346, 24199, 25504, 25164, 24040, 23785, 
              24185, 23787, 25522, 24461, 15051, 24193, 24167, 15151, 23867,
              24246, 15031, 15241, 24083, 23897, 23983, 24195, 23778]
random.Random(7).shuffle(lst_complete)

In [None]:
lst_train = lst_complete[:150]
lst_val = lst_complete[150:169]
lst_test = lst_complete[169:]
columns_sort =['Age', 'battery', 'duration', 'Workday', 'hour',
               'time_since_last_app', 'Gender_Female', 'Gender_Male',
               'Gender_Other', 'Gender_unknown', 'notification_False',
               'notification_True', 'AppCategory_Art & Design',
               'AppCategory_Auto & Vehicles', 'AppCategory_Beauty',
               'AppCategory_Books & Reference', 'AppCategory_Business',
               'AppCategory_Comics', 'AppCategory_Communication', 'AppCategory_Dating',
               'AppCategory_Education', 'AppCategory_Entertainment',
               'AppCategory_Events', 'AppCategory_Finance', 'AppCategory_Food & Drink',
               'AppCategory_Gaming', 'AppCategory_Health & Fitness',
               'AppCategory_House & Home', 'AppCategory_Lifestyle',
               'AppCategory_Maps & Navigation', 'AppCategory_Medical',
               'AppCategory_Music & Audio', 'AppCategory_News & Magazines',
               'AppCategory_Parenting', 'AppCategory_Personalization',
               'AppCategory_Photography', 'AppCategory_Productivity',
               'AppCategory_Shopping', 'AppCategory_Social', 'AppCategory_Sports',
               'AppCategory_Tools', 'AppCategory_Travel & Local',
               'AppCategory_Video Players & Editors', 'AppCategory_Weather',
               'AppCategory_x', 'Weekday_0', 'Weekday_1', 'Weekday_2', 'Weekday_3',
               'Weekday_4', 'Weekday_5', 'Weekday_6', 'DayTime_Evening',
               'DayTime_Morning', 'DayTime_Night', 'DayTime_Noon', 'remainingMin']

In [None]:
df_all = pd.read_pickle('Users_prepped/All/all_onehot')
df_train = df_all.loc[df_all['UserID'].isin(lst_train)][columns_sort]
df_val = df_all.loc[df_all['UserID'].isin(lst_val)][columns_sort]
df_test = df_all.loc[df_all['UserID'].isin(lst_test)][columns_sort]

In [None]:
ds_train = df_train.values
ds_train = ds_train.astype('float32')
ds_val = df_val.values
ds_val = ds_val.astype('float32')
ds_test = df_test.values
ds_test = ds_test.astype('float32')

In [None]:
#Scale Features
scaler = MinMaxScaler(feature_range=(0, 1))
arrX_train = ds_train[:, :-1]
arrY_train = ds_train[:, -1]
arrY_train = arrY_train.reshape(arrY_train.shape[0], 1)
arrX_val = ds_val[:, :-1]
arrY_val = ds_val[:, -1]
arrY_val = arrY_val.reshape(arrY_val.shape[0], 1)
arrX_test = ds_test[:, :-1]
arrY_test = ds_test[:, -1]
arrY_test = arrY_test.reshape(arrY_test.shape[0], 1)
  
arrX_train = scaler.fit_transform(arrX_train)
arrX_val = scaler.transform(arrX_val)
arrX_test = scaler.transform(arrX_test)
ds_train = np.concatenate((arrX_train, arrY_train), axis = 1)
ds_val = np.concatenate((arrX_val, arrY_val), axis = 1)
ds_test = np.concatenate((arrX_test, arrY_test), axis = 1)

In [None]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=5):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), :-1]
        dataX.append(a)
        dataY.append(dataset[i + look_back, -1])
    return np.array(dataX), np.array(dataY)

In [None]:
lookback_ranges = [5, 10, 20]
neurons = [64, 128, 256]
batch_sizes = [64, 128, 256]

In [None]:
for lookback_range in lookback_ranges:
    #reshape into X=t and Y=t+1
    trainX, trainY = create_dataset(ds_train, lookback_range)
    valX, valY = create_dataset(ds_val, lookback_range)
    testX, testY = create_dataset(ds_test, lookback_range)
    for n in neurons:
        for batch_size in batch_sizes:
            
            print('LookBack: {}, Neurons: {}, Batch_size:{}'.format(lookback_range, n, batch_size))
                
            model = Sequential()
            model.add(LSTM(n, input_shape=trainX.shape[1:], return_sequences = False, dropout =0.4))
            model.add(Dense(1))
            model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse'])
                
            es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5, min_delta=100)
            mc = ModelCheckpoint('RQ2_LSTM_best_model.h5', monitor='val_mse', mode='min', verbose=1, save_best_only=True)
                
            start_time = time.time()
            model.fit(trainX, trainY, epochs=100, batch_size=batch_size, verbose=1, validation_data=(valX, valY), callbacks=[es, mc])
            training_time = time.time()-start_time
                
            saved_model = load_model('RQ2_LSTM_best_model.h5')
                
            trainPredict = saved_model.predict(trainX)
            valPredict = saved_model.predict(valX)
            testPredict = saved_model.predict(testX)
                
            trainScore_rmse = np.sqrt(mean_squared_error(trainY[:], trainPredict[:,0]))
            valScore_rmse = np.sqrt(mean_squared_error(valY[:], valPredict[:,0]))
            testScore_rmse= np.sqrt(mean_squared_error(testY[:], testPredict[:,0]))

            testScore_mae= mean_absolute_error(testY[:], testPredict[:,0])
            
            saved_model.save('RQ2_LSTM3_models/RQ2_LSTM_L{}_N{}_B{}'.format(lookback_range, n, batch_size))
            with open('RQ2_LSTM_docu/RQ2_LSTM_docu.txt', 'a') as file:
                file.write("L: {}, N: {}, B:{}, time:{}, train_RMSE: {}, val_RMSE: {}, test_RMSE: {},  test_MAE: {} \n".format(
                    lookback_range, n, batch_size, 
                    training_time, trainScore_rmse, valScore_rmse, 
                    testScore_rmse, testScore_mae))
            

In [None]:
for lookback_range in lookback_ranges:
    #reshape into X=t and Y=t+1
    trainX, trainY = create_dataset(ds_train, lookback_range)
    valX, valY = create_dataset(ds_val, lookback_range)
    testX, testY = create_dataset(ds_test, lookback_range)
    for n in neurons:
        for batch_size in batch_sizes:
            
            print('LookBack: {}, Neurons: {}, Batch_size:{}'.format(lookback_range, n, batch_size))
                
            model = Sequential()
            model.add(GRU(n, input_shape=trainX.shape[1:], return_sequences = False, dropout =0.4))
            model.add(Dense(1))
            model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse'])
                
            es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5, min_delta=100)
            mc = ModelCheckpoint('RQ2_GRU_best_model.h5', monitor='val_mse', mode='min', verbose=1, save_best_only=True)
                
            start_time = time.time()
            model.fit(trainX, trainY, epochs=100, batch_size=batch_size, verbose=1, validation_data=(valX, valY), callbacks=[es, mc])
            training_time = time.time()-start_time
                
            saved_model = load_model('RQ2_GRU_best_model.h5')
                
            trainPredict = saved_model.predict(trainX)
            valPredict = saved_model.predict(valX)
            testPredict = saved_model.predict(testX)
                
            trainScore_rmse = np.sqrt(mean_squared_error(trainY[:], trainPredict[:,0]))
            valScore_rmse = np.sqrt(mean_squared_error(valY[:], valPredict[:,0]))
            testScore_rmse= np.sqrt(mean_squared_error(testY[:], testPredict[:,0]))

            testScore_mae= mean_absolute_error(testY[:], testPredict[:,0])
            
            saved_model.save('RQ2_GRU3_models/RQ2_GRU_L{}_N{}_B{}'.format(lookback_range, n, batch_size))
            with open('RQ2_GRU_docu/RQ2_GRU_docu.txt', 'a') as file:
                file.write("L: {}, N: {}, B:{}, time:{}, train_RMSE: {}, val_RMSE: {}, test_RMSE: {},  test_MAE: {} \n".format(
                    lookback_range, n, batch_size, 
                    training_time, trainScore_rmse, valScore_rmse, 
                    testScore_rmse, testScore_mae))
            