In [1]:
import numpy as np
import pandas as pd
import os
import glob
import shutil
from tensorflow import keras
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
from keras.layers import Dense, LSTM
from keras.models import Sequential
from keras.layers import Masking
import keras_tuner as kt
from pathlib import Path
import joblib # for saving and loading variables
import io
import contextlib
from sklearn.metrics import confusion_matrix
import seaborn as sns

In [2]:
data_path = Path('C:/Users/ilkele4s/')
print(data_path)

C:\Users\ilkele4s


In [None]:
File = 'LSTM_data.csv'
data = os.path.join(data_path,File)

In [None]:
os.mkdir(os.path.join(data_path,'New_LSTM_comp_results'))

In [None]:
NewDirectory = os.path.join(data_path,'New_LSTM_comp_results')

In [6]:
# load the data (samples only)
data_samples = pd.read_csv(data, header = None)

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\ilkele4s\\LSTM_data.csv'

In [11]:
# structure samples for LSTM

data_array = np.array(data_samples)

n_samples = data_array.shape[0]
n_features = data_array.shape[1]

bin_cur = 1
bin_past = 3

bins_predict = bin_cur + bin_past # 3 previous + 1 current

formatted_data = np.zeros([n_samples,bins_predict,n_features])

start_idx=0
for i in range(n_samples-bin_past):
    end_idx=start_idx+bins_predict
    formatted_data[i+bin_past,:,:]=data_array[start_idx:end_idx,:]
    start_idx=start_idx+1

In [12]:
# create a label set

n_zones = 45
n_trials = 32


cat_zones = keras.utils.to_categorical(np.r_[1:n_zones+1] ,num_classes=46)
cat_zones = np.delete(cat_zones,0,1)
labels_all_trials = np.tile(cat_zones, 32)

reshaped_labels_all_trials = np.reshape(labels_all_trials, [1440,45])



In [13]:
# test - validation - train split (20/20/60)

all_ind = np.random.choice(np.arange(0, n_samples, 1), n_samples) # replace=True

test_ind = all_ind[:int(n_samples/5)] # 20

validation_ind = all_ind[int(n_samples/5):int(n_samples/5+n_samples/5)] # 20 

train_ind = all_ind[int(n_samples/5+n_samples/5):] # 60


In [14]:
np.shape(all_ind)

(1440,)

In [15]:
np.shape(np.unique(all_ind))

(1440,)

In [19]:
print(np.shape(test_ind))
print(np.shape(validation_ind))
print(np.shape(train_ind))

(288,)
(288,)
(864,)


In [20]:
test_samples = formatted_data[test_ind,:,:]
test_labels = reshaped_labels_all_trials[test_ind,:]

train_samples = formatted_data[train_ind,:,:]
train_labels = reshaped_labels_all_trials[train_ind,:]

validation_samples = formatted_data[validation_ind,:,:]
validation_labels = reshaped_labels_all_trials[validation_ind,:]

In [21]:
train_samples[np.isnan(train_samples)] = 0
test_samples[np.isnan(test_samples)] = 0
validation_samples[np.isnan(validation_samples)] = 0

In [22]:
print(np.shape(test_samples))
print(np.shape(train_samples))
print(np.shape(validation_samples))

print(np.shape(test_labels))
print(np.shape(train_labels))
print(np.shape(validation_labels))

(288, 4, 17400)
(864, 4, 17400)
(288, 4, 17400)
(288, 45)
(864, 45)
(288, 45)


In [19]:
joblib.dump(train_samples, os.path.join(NewDirectory,'train_samples.sav'))
joblib.dump(train_labels, os.path.join(NewDirectory,'train_labels.sav'))

joblib.dump(test_samples, os.path.join(NewDirectory,'test_samples.sav'))
joblib.dump(test_labels, os.path.join(NewDirectory,'test_labels.sav'))

joblib.dump(validation_samples, os.path.join(NewDirectory,'validation_samples.sav'))
joblib.dump(validation_labels, os.path.join(NewDirectory,'validation_labels.sav'))

['C:\\Users\\ilkele4s\\New_LSTM_comp_results\\validation_labels.sav']

In [23]:
class MyHyperModel(kt.HyperModel):

    def build(self,hp):
        
            LSTM_units = [30, 40, 50]
            Drop_rate = [.20, .30, .40]
            RecDrop_rate = [.20, .30, .40]
            learning_rate = hp.Float("lr", min_value=1e-4, max_value=1e-2, sampling="log")

            model=Sequential()
            model.add(Masking(mask_value=0, input_shape=(4, 17400))),
            model.add(LSTM(hp.Choice('LSTM_units',LSTM_units),                
                           input_shape=(4, 17400), 
                           dropout=hp.Choice('Drop_rate',Drop_rate),
                           recurrent_dropout=hp.Choice('RecDrop_rate', RecDrop_rate)))

            model.add(Dense(45, activation='softmax'))
            model.compile(loss='categorical_crossentropy',optimizer=keras.optimizers.Adam(learning_rate=learning_rate),metrics=['accuracy'])

            return model 
        
    def fit(self, hp, model, *args, **kwargs):
        return model.fit(
            *args,
            # Tune whether to shuffle the data in each epoch.
            shuffle=hp.Boolean("shuffle"),
            **kwargs,
        )




In [24]:
#tuner = kt.GridSearch(
#    MyHyperModel(),
#    objective =kt.Objective("val_loss", direction="min"),
#    overwrite=True,
#    directory=NewDirectory ,
#    project_name="tune_hypermodel",
#)



In [25]:
tuner = kt.RandomSearch(
    MyHyperModel(),
    objective =kt.Objective("val_loss", direction="min"),
    overwrite=True,
    max_trials=30,
    directory= NewDirectory ,
    project_name="tune_hypermodel_random",
)


In [26]:
tuner.search_space_summary()

Search space summary
Default search space size: 4
lr (Float)
{'default': 0.0001, 'conditions': [], 'min_value': 0.0001, 'max_value': 0.01, 'step': None, 'sampling': 'log'}
LSTM_units (Choice)
{'default': 30, 'conditions': [], 'values': [30, 40, 50], 'ordered': True}
Drop_rate (Choice)
{'default': 0.2, 'conditions': [], 'values': [0.2, 0.3, 0.4], 'ordered': True}
RecDrop_rate (Choice)
{'default': 0.2, 'conditions': [], 'values': [0.2, 0.3, 0.4], 'ordered': True}


In [27]:
tuner.search(train_samples, train_labels, epochs=20, validation_data=(validation_samples, validation_labels))

Trial 30 Complete [00h 02m 03s]
val_loss: 1.5170698165893555

Best val_loss So Far: 0.8130357265472412
Total elapsed time: 01h 03m 28s
INFO:tensorflow:Oracle triggered exit


In [28]:
tuner.results_summary(num_trials=3)


Results summary
Results in C:\Users\ilkele4s\New_LSTM_comp_results\tune_hypermodel_random
Showing 3 best trials
Objective(name="val_loss", direction="min")

Trial 11 summary
Hyperparameters:
lr: 0.0007425894066139745
LSTM_units: 50
Drop_rate: 0.3
RecDrop_rate: 0.4
shuffle: False
Score: 0.8130357265472412

Trial 28 summary
Hyperparameters:
lr: 0.0007352872452552378
LSTM_units: 40
Drop_rate: 0.2
RecDrop_rate: 0.4
shuffle: False
Score: 0.8525015115737915

Trial 00 summary
Hyperparameters:
lr: 0.001726082464062503
LSTM_units: 50
Drop_rate: 0.3
RecDrop_rate: 0.4
shuffle: False
Score: 0.9577274322509766


In [None]:
# Capture the printed output using io.StringIO
summary_output = io.StringIO()

# Use contextlib.redirect_stdout to redirect the print output to the StringIO object
with contextlib.redirect_stdout(summary_output):
    tuner.results_summary(num_trials=3)

# Get the captured output
summary_text = summary_output.getvalue()


In [None]:
file = open(os.path.join(NewDirectory,'best_3params.txt'), 'w')
# Write content to the file
file.write(summary_text)

In [29]:
best_hyperparameters = tuner.get_best_hyperparameters()

In [None]:
def build_model(hp):
    model_opt=Sequential()
    model_opt.add(Masking(mask_value=0, input_shape=(4, n_features)))
    model_opt.add(LSTM(50,input_shape=(4, n_features),dropout=.30,recurrent_dropout=.40))
    model_opt.add(Dense(45, activation='softmax'))

    model_opt.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    return model_opt


In [33]:
best_model = build_model(best_hyperparameters)

In [31]:
directory_path = os.path.join(NewDirectory,'tune_hypermodel_random')

# Define a pattern to match files with names like "trial1", "trial2", etc.

pattern = "trial*"

# Use glob to find matching files
matching_files = glob.glob(os.path.join(directory_path, pattern))

# Iterate over matching files and delete them
for file_path in matching_files:
    try:
        shutil.rmtree(file_path)
        print(f"File '{file_path}' deleted successfully.")
    except OSError as e:
        print(f"Error deleting '{file_path}': {e}")

File 'C:\Users\ilkele4s\New_LSTM_comp_results\tune_hypermodel_random\trial_00' deleted successfully.
File 'C:\Users\ilkele4s\New_LSTM_comp_results\tune_hypermodel_random\trial_01' deleted successfully.
File 'C:\Users\ilkele4s\New_LSTM_comp_results\tune_hypermodel_random\trial_02' deleted successfully.
File 'C:\Users\ilkele4s\New_LSTM_comp_results\tune_hypermodel_random\trial_03' deleted successfully.
File 'C:\Users\ilkele4s\New_LSTM_comp_results\tune_hypermodel_random\trial_04' deleted successfully.
File 'C:\Users\ilkele4s\New_LSTM_comp_results\tune_hypermodel_random\trial_05' deleted successfully.
File 'C:\Users\ilkele4s\New_LSTM_comp_results\tune_hypermodel_random\trial_06' deleted successfully.
File 'C:\Users\ilkele4s\New_LSTM_comp_results\tune_hypermodel_random\trial_07' deleted successfully.
File 'C:\Users\ilkele4s\New_LSTM_comp_results\tune_hypermodel_random\trial_08' deleted successfully.
File 'C:\Users\ilkele4s\New_LSTM_comp_results\tune_hypermodel_random\trial_09' deleted succ

In [None]:

best_model_trained = best_model.fit(train_samples, train_labels,validation_data=(validation_samples, validation_labels),
                      epochs = 30, verbose=1)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30

In [None]:
plt.plot(best_model_trained.history['loss'])
plt.plot(best_model_trained.history['val_loss'])

plt.legend(['train', 'validation'], loc='upper left')

ay = plt.gca()

ay.yaxis.set_major_locator(MultipleLocator(base=0.50)) 


plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')

plt.savefig(os.path.join(NewDirectory,'Model_loss.png'))
plt.show()

In [29]:
plt.plot(best_model_trained.history['accuracy'])
plt.plot(best_model_trained.history['val_accuracy'])


ay = plt.gca()
ay.yaxis.set_major_locator(MultipleLocator(base=0.10)) 

plt.legend(['train', 'validation'], loc='upper left')

plt.title('Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')

plt.savefig(os.path.join(NewDirectory,'Model_accuracy.png'))
plt.show()

NameError: name 'best_model_trained' is not defined

In [None]:
predictions = best_model.predict(test_samples)
#%%
predicted_labels = np.argmax(predictions, axis=1) + 1
#%%
true_label = np.argmax(test_labels, axis=1) + 1
#%%


In [None]:
#%%
cm = confusion_matrix(true_label, predicted_labels, normalize='true')
#%%
class_labels = [str(i) for i in range(1, 46)]
#%%
plt.figure(figsize=(20, 16))
sns.heatmap(cm, fmt='.2',annot=True, annot_kws={"size": 8}, xticklabels=class_labels, yticklabels=class_labels)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')

plt.savefig(os.path.join(NewDirectory,'Cm_optim.png'))
plt.show()

In [None]:
from sklearn.metrics import classification_report
print(classification_report(true_label, predicted_labels))