In [None]:
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import mean_squared_error
from sklearn import preprocessing
from keras.models import Sequential, model_from_json
from keras.layers import Dense, LSTM
from keras.layers.core import Dense, Dropout, Activation
import pandas as pd
import json, os
from matplotlib import pyplot as plt
import pywt
from statsmodels.robust import mad

In [None]:
def save_model (dir_path, model, history,cnt):
    history_dict = history.history; 
    fname = dir_path + 'history_' + str(cnt) + '.json'
    histfile = open(fname, 'w')
    json.dump(history_dict, histfile)
    histfile.close()
    model_json = model.to_json()
    mfname = dir_path + '/model_' + str(cnt) + '.json'
    mname = dir_path + '/model_' + str(cnt) + '.h5'
    with open(mfname, "w") as json_file:
        json_file.write(model_json)
    model.save_weights(mname)
    json_file.close()
    

    
def LSTM_MODEL (epochs, neurons, dropout, batch_size, train_X,train_y, test_X, test_y ):
    # default values: neurons=100;epochs=100;dropout=0.2;batch_size=200
    model = Sequential()
    model.add(LSTM(neurons, input_shape=(train_X.shape[1], train_X.shape[2])))
    model.add(Dropout(dropout))
    model.add(Dense(1))
    model.compile(loss='mae', optimizer='adam', metrics=['mae'])
    history = model.fit(train_X, train_y, epochs=epochs, batch_size=batch_size, validation_data=(test_X, test_y), verbose=2, shuffle=False)
    return [model, history]

def norm_data_f(data, colv):
    min_max_scaler = preprocessing.MinMaxScaler()
    data[colv] = min_max_scaler.fit_transform(data[colv].astype(float).values.reshape(-1,1))
    return data

def split_train_test(values):
    n_train_hours = int(len(values)*(2/3))
    train = values[:n_train_hours, :]
    test = values[n_train_hours:, :]
    # split into input and outputs
    train_X, train_y = train[:, :-1], train[:, -1]
    test_X, test_y = test[:, :-1], test[:, -1]
    # reshape input to be 3D [samples, timesteps, features]
    train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
    test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
    print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)
    return [train_X,train_y,test_X,test_y]

In [None]:
training_data_path = 'datasets/training_datasets/Freeway12m_10months_1.csv'
merged_data = pd.read_csv(training_data_path)
merged_data.head()

In [None]:
clx = [ 'month','hour','dow','device_id','device_speed','meadianv','speed_2','INRIX_speed']
cnt = -1
neurons=100;epochs=[30,60,100];dropout=0.2;batch_size=200
dir_path = 'results/epoch/'
if not(os.path.isdir(dir_path)):
    os.makedirs(dir_path)
for epoch in epochs:
    cnt+=1
    new_col_data = merged_data[clx]
    for norm_col in clx: 
        new_col_data = norm_data_f(new_col_data, norm_col)
    values = new_col_data.values
    [train_X,train_y,test_X,test_y] = split_train_test(values)
    [model, history] = LSTM_MODEL (epoch, neurons, dropout, batch_size,train_X,train_y, test_X, test_y )
    save_model (dir_path, model,history,epoch)
    print('model ' + str(cnt) + ' completed ....')

In [None]:
histFiles = os.listdir(dir_path)
for histFile in histFiles:
    if histFile.startswith('history'):
        jname = dir_path + histFile.split('.')[0] + '.json'
        print (jname)
        json_file = open(jname, 'r')
        loaded_model_json = json.load(json_file)
        json_file.close()
        output = loaded_model_json['val_mean_absolute_error']
        output_main = loaded_model_json['mean_absolute_error']
        plt.plot(output, label=histFile.split('_')[1])
        plt.plot(output_main, label=histFile.split('_')[1])
        plt.legend()
        plt.show()

In [None]:
clx = [ 'month','hour','dow','device_id','device_speed','meadianv','speed_2','INRIX_speed']
cnt = -1
neurons=[50,100,250,500];epoch=100;dropout=0.2;batch_size=200
dir_path = 'results/neuron/'
if not(os.path.isdir(dir_path)):
    os.makedirs(dir_path)
for neuron in neurons:
    cnt+=1
    new_col_data = merged_data[clx]
    for norm_col in clx: 
        new_col_data = norm_data_f(new_col_data, norm_col)
    values = new_col_data.values
    [train_X,train_y,test_X,test_y] = split_train_test(values)
    [model, history] = LSTM_MODEL (epoch, neuron, dropout, batch_size,train_X,train_y, test_X, test_y )
    save_model (dir_path, model,history,neuron)
    print('model ' + str(cnt) + ' completed ....')

In [None]:
histFiles = os.listdir(dir_path)
for histFile in histFiles:
    if histFile.startswith('history'):
        jname = dir_path + histFile.split('.')[0] + '.json'
        print (jname)
        json_file = open(jname, 'r')
        loaded_model_json = json.load(json_file)
        json_file.close()
        output = loaded_model_json['val_mean_absolute_error']
        output_main = loaded_model_json['mean_absolute_error']
        plt.plot(output, label=histFile.split('_')[1])
        plt.plot(output_main, label=histFile.split('_')[1])
        plt.legend()
        plt.show()

In [None]:
clx = [ 'month','hour','dow','device_id','device_speed','meadianv','speed_2','INRIX_speed']
cnt = -1
neuron=100;epoch=100;dropout=0.2;batch_sizes=[50,100,200,500]
dir_path = 'results/batchsize/'
if not(os.path.isdir(dir_path)):
    os.makedirs(dir_path)
for batch_size in batch_sizes:
    cnt+=1
    new_col_data = merged_data[clx]
    for norm_col in clx: 
        new_col_data = norm_data_f(new_col_data, norm_col)
    values = new_col_data.values
    [train_X,train_y,test_X,test_y] = split_train_test(values)
    [model, history] = LSTM_MODEL (epoch, neuron, dropout, batch_size,train_X,train_y, test_X, test_y )
    save_model (dir_path, model,history,batch_size)
    print('model ' + str(cnt) + ' completed ....')

In [None]:
histFiles = os.listdir(dir_path)
for histFile in histFiles:
    if histFile.startswith('history'):
        jname = dir_path + histFile.split('.')[0] + '.json'
        print (jname)
        json_file = open(jname, 'r')
        loaded_model_json = json.load(json_file)
        json_file.close()
        output = loaded_model_json['val_mean_absolute_error']
        output_main = loaded_model_json['mean_absolute_error']
        plt.plot(output, label=histFile.split('_')[1])
        plt.plot(output_main, label=histFile.split('_')[1])
        plt.legend()
        plt.show()

In [None]:
def save_all_model (dir_path, model, history,batch_size,neuron, dropout, epoch, filename):
    history_dict = history.history; 
    fname = dir_path + '_' + filename + '_'+'history_' + str(batch_size) + str(neuron) +str(dropout) +str(epoch) +'.json'
    histfile = open(fname, 'w')
    json.dump(history_dict, histfile)
    histfile.close()
    model_json = model.to_json()
    mfname = dir_path + '_' + filename + '_'+'model_' + str(batch_size) + str(neuron) +str(dropout) +str(epoch) +'.json'
    mname = dir_path + '_' + filename + '_'+ 'model_' + str(batch_size) + str(neuron) +str(dropout) +str(epoch) +'.h5'
    with open(mfname, "w") as json_file:
        json_file.write(model_json)
    model.save_weights(mname)
    json_file.close()
    
def LSTM_MODEL_shuffle (epochs, neurons, dropout, batch_size, train_X,train_y, test_X, test_y ):
    # default values: neurons=100;epochs=100;dropout=0.2;batch_size=200
    model = Sequential()
    model.add(LSTM(neurons, input_shape=(train_X.shape[1], train_X.shape[2])))
    model.add(Dropout(dropout))
    model.add(Dense(1))
    model.compile(loss='mae', optimizer='adam', metrics=['mae'])
    history = model.fit(train_X, train_y, epochs=epochs, batch_size=batch_size, validation_data=(test_X, test_y), verbose=2, shuffle=True)
    return [model, history]

In [None]:
clx = [ 'month','hour','dow','device_id','device_speed','meadianv','speed_2','INRIX_speed']
cnt = -1
# neurons=[10,50,100,200];epochs=[150];dropouts=[0.02,0.2,0.4];batch_sizes=[500]
neurons=[200, 100, 10,50];epochs=[100, 30, 50];dropouts=[0.2];batch_sizes=[1000, 500, 200]
dir_path = 'results/all/'
training_data_folder = 'datasets/training_datasets/'
all_csv_files = os.listdir(training_data_folder)
for cur_tdata_file in all_csv_files:
    training_data_path = training_data_folder + cur_tdata_file
    filename = cur_tdata_file.split('.csv')[0]
    merged_data = pd.read_csv(training_data_path)
    if not(os.path.isdir(dir_path)):
        os.makedirs(dir_path)
    for batch_size in batch_sizes:
        for neuron in neurons:
            for dropout in dropouts:
                for epoch in epochs:
                    new_col_data = merged_data[clx]
                    for norm_col in clx: 
                        new_col_data = norm_data_f(new_col_data, norm_col)
                    values = new_col_data.values
                [train_X,train_y,test_X,test_y] = split_train_test(values)
                [model, history] = LSTM_MODEL_shuffle (epoch, neuron, dropout, batch_size,train_X,train_y, test_X, test_y )
                save_all_model (dir_path, model,history,batch_size,neuron, dropout, epoch, filename)
                print('model ' + str(cnt) + ' completed ....')