In [3]:
from joblib import load, dump
import numpy as np
import tensorflow as tf 
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, StratifiedKFold
from tensorflow.keras.optimizers import SGD, Adam, RMSprop
from scikeras.wrappers import KerasClassifier
import optuna

def create_model(optimizer, num_lstm, lstm_1, dropout_1, lstm_2 = 0, drop_2 = 0, lstm_3 = 0, drop_3 = 0):
    model = Sequential()
    model.add(LSTM(units=lstm_1, input_shape=(input_sh[1], input_sh[2]), return_sequences=True if num_lstm > 1 else False)) 
    model.add(Dropout(dropout_1))
    

    if num_lstm > 1:
        model.add(LSTM(units=lstm_2, input_shape=(input_sh[1], input_sh[2]), return_sequences=True if num_lstm == 3 else False)) 
        model.add(Dropout(drop_2))
    
    if num_lstm > 2:
            model.add(LSTM(units=lstm_3, input_shape=(input_sh[1], input_sh[2]), return_sequences= False)) 
            model.add(Dropout(drop_3))
    
    model.add(Dense(1, activation='sigmoid')) 

    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])   
    return model

In [3]:
X_train_val = load('../../BEST SET/X_train_fft.joblib')
y_train_val = load('../../BEST SET/y_Train.joblib')
X_test = load('../../BEST SET/X_test_fft.joblib')
y_test = load('../../BEST SET/y_Test.joblib')
half = X_train_val[::2]
half_labels = y_train_val[::2]
second_half = X_train_val[1::2]
second_half_labels = y_train_val[1::2]
X_test = np.append(X_test, second_half, axis=0)
y_test = np.append(y_test, second_half_labels, axis=0)
X_train_val = half
y_train_val = half_labels

input_sh = X_train_val.shape




# Optuna Objective Function
def objective(trial):
    lstm_2 = 0
    drop_2 = 0
    lstm_3 = 0
    drop_3 = 0
    num_lstm = trial.suggest_int('num_lstm_layers', 1, 3)
    lstm_1 = trial.suggest_int('lstm_units_1', 16, 128)
    dropout_1 = trial.suggest_float('dropout_1', 0.0, 0.5)
    optimizer = trial.suggest_categorical('optimizer', ['SGD', 'RMSprop', 'Adam'])

    if num_lstm > 1:
        lstm_2 = trial.suggest_int(f'lstm_units_2', 16, 128)
        drop_2 = trial.suggest_float(f'dropout_2', 0.0, 0.5)
    if num_lstm > 2:
        lstm_3 = trial.suggest_int(f'lstm_units_3', 16, 128)
        drop_3 = trial.suggest_float(f'dropout_3', 0.0, 0.5)


    # Assuming StratifiedKFold, customize if needed
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)  
    scores = []
    for train_index, test_index in skf.split(X_train_val, y_train_val):
        X_tr, X_val = X_train_val[train_index], X_train_val[test_index]
        y_tr, y_val = y_train_val[train_index], y_train_val[test_index]

        model = create_model(optimizer, num_lstm, lstm_1, dropout_1, lstm_2, drop_2, lstm_3, drop_3)
        model.fit(X_tr, y_tr,epochs=20, verbose = 0) 
        score = model.evaluate(X_val, y_val, verbose = 0)
        scores.append(score[1])
    return np.array(scores).mean()

# Hyperparameter Optimization
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)

best_params = study.best_params
print("Best hyperparameters:", best_params)

[I 2024-05-24 20:52:41,746] A new study created in memory with name: no-name-89599ead-eb9d-4b6f-9520-894f282336b1
[I 2024-05-24 20:52:57,173] Trial 0 finished with value: 0.7397663474082947 and parameters: {'num_lstm_layers': 1, 'lstm_units_1': 107, 'dropout_1': 0.013702874590588887, 'optimizer': 'SGD'}. Best is trial 0 with value: 0.7397663474082947.
[I 2024-05-24 20:53:17,006] Trial 1 finished with value: 0.8619934320449829 and parameters: {'num_lstm_layers': 2, 'lstm_units_1': 119, 'dropout_1': 0.36537811871716036, 'optimizer': 'Adam', 'lstm_units_2': 76, 'dropout_2': 0.2518581599776218}. Best is trial 1 with value: 0.8619934320449829.
[I 2024-05-24 20:53:28,677] Trial 2 finished with value: 0.7446148157119751 and parameters: {'num_lstm_layers': 1, 'lstm_units_1': 69, 'dropout_1': 0.18881614580152317, 'optimizer': 'Adam'}. Best is trial 1 with value: 0.8619934320449829.
[I 2024-05-24 20:53:53,187] Trial 3 finished with value: 0.6949981689453125 and parameters: {'num_lstm_layers': 3,

Best hyperparameters: {'num_lstm_layers': 3, 'lstm_units_1': 124, 'dropout_1': 0.24829603755957766, 'optimizer': 'RMSprop', 'lstm_units_2': 119, 'dropout_2': 0.383528600256297, 'lstm_units_3': 24, 'dropout_3': 0.0017415629411977266}


In [4]:
X_train_val = load('../../BEST SET/X_train_fft.joblib')
y_train_val = load('../../BEST SET/y_Train.joblib')
X_test = load('../../BEST SET/X_test_fft.joblib')
y_test = load('../../BEST SET/y_Test.joblib') 

quarter = X_train_val[::4]
quarter_labels = y_train_val[::4]
second_qtr = X_train_val[1::4]
second_qtr_labels = y_train_val[1::4]
third_qtr = X_train_val[2::4]
third_qtr_labels = y_train_val[2::4]
fourth_qtr = X_train_val[3::4]
fourth_qtr_labels = y_train_val[3::4]

X_train_val = np.append(quarter, second_qtr, axis=0)
X_train_val = np.append(X_train_val, third_qtr, axis=0)
y_train_val = np.append(quarter_labels, second_qtr_labels, axis=0)
y_train_val = np.append(y_train_val, third_qtr_labels, axis=0)
X_test = np.append(X_test, fourth_qtr, axis=0)
y_test = np.append(y_test, fourth_qtr_labels, axis=0)


input_sh = X_train_val.shape




# Optuna Objective Function
def objective(trial):
    lstm_2 = 0
    drop_2 = 0
    lstm_3 = 0
    drop_3 = 0
    num_lstm = trial.suggest_int('num_lstm_layers', 1, 3)
    lstm_1 = trial.suggest_int('lstm_units_1', 16, 128)
    dropout_1 = trial.suggest_float('dropout_1', 0.0, 0.5)
    optimizer = trial.suggest_categorical('optimizer', ['SGD', 'RMSprop', 'Adam'])

    if num_lstm > 1:
        lstm_2 = trial.suggest_int(f'lstm_units_2', 16, 128)
        drop_2 = trial.suggest_float(f'dropout_2', 0.0, 0.5)
    if num_lstm > 2:
        lstm_3 = trial.suggest_int(f'lstm_units_3', 16, 128)
        drop_3 = trial.suggest_float(f'dropout_3', 0.0, 0.5)


    # Assuming StratifiedKFold, customize if needed
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)  
    scores = []
    for train_index, test_index in skf.split(X_train_val, y_train_val):
        X_tr, X_val = X_train_val[train_index], X_train_val[test_index]
        y_tr, y_val = y_train_val[train_index], y_train_val[test_index]

        model = create_model(optimizer, num_lstm, lstm_1, dropout_1, lstm_2, drop_2, lstm_3, drop_3)
        model.fit(X_tr, y_tr,epochs=20, verbose = 0) 
        score = model.evaluate(X_val, y_val, verbose = 0)
        scores.append(score[1])
    return np.array(scores).mean()

# Hyperparameter Optimization
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)

best_params = study.best_params
print("Best hyperparameters:", best_params)

[I 2024-05-24 22:05:35,448] A new study created in memory with name: no-name-a84b27a0-b0a2-4aff-bb87-324e2fe85066
[I 2024-05-24 22:05:48,474] Trial 0 finished with value: 0.7336358666419983 and parameters: {'num_lstm_layers': 1, 'lstm_units_1': 54, 'dropout_1': 0.10435671802174634, 'optimizer': 'SGD'}. Best is trial 0 with value: 0.7336358666419983.
[I 2024-05-24 22:06:18,364] Trial 1 finished with value: 0.8700763940811157 and parameters: {'num_lstm_layers': 3, 'lstm_units_1': 98, 'dropout_1': 0.041309228892223326, 'optimizer': 'RMSprop', 'lstm_units_2': 78, 'dropout_2': 0.26019508326620544, 'lstm_units_3': 29, 'dropout_3': 0.08726921059782766}. Best is trial 1 with value: 0.8700763940811157.
[I 2024-05-24 22:06:39,095] Trial 2 finished with value: 0.863608467578888 and parameters: {'num_lstm_layers': 2, 'lstm_units_1': 90, 'dropout_1': 0.05316720774925998, 'optimizer': 'Adam', 'lstm_units_2': 113, 'dropout_2': 0.12052146512450146}. Best is trial 1 with value: 0.8700763940811157.
[I 2

Best hyperparameters: {'num_lstm_layers': 2, 'lstm_units_1': 126, 'dropout_1': 0.3341380439081693, 'optimizer': 'RMSprop', 'lstm_units_2': 48, 'dropout_2': 0.41652198990743566}


In [5]:
X_train_val = load('../../BEST SET/X_train_fft.joblib')
y_train_val = load('../../BEST SET/y_Train.joblib')
X_test = load('../../BEST SET/X_test_fft.joblib')
y_test = load('../../BEST SET/y_Test.joblib') 

quarter = X_train_val[::4]
quarter_labels = y_train_val[::4]
second_qtr = X_train_val[1::4]
second_qtr_labels = y_train_val[1::4]
third_qtr = X_train_val[2::4]
third_qtr_labels = y_train_val[2::4]
fourth_qtr = X_train_val[3::4]
fourth_qtr_labels = y_train_val[3::4]


X_test = np.append(X_test,second_qtr, axis=0)
X_test = np.append(X_test, third_qtr, axis=0)
X_test = np.append(X_test, fourth_qtr, axis=0)
y_test = np.append(y_test,second_qtr_labels, axis=0)
y_test = np.append(y_test, third_qtr_labels, axis=0)
y_test = np.append(y_test, fourth_qtr_labels, axis=0)

X_train_val = quarter
y_train_val = quarter_labels


input_sh = X_train_val.shape




# Optuna Objective Function
def objective(trial):
    lstm_2 = 0
    drop_2 = 0
    lstm_3 = 0
    drop_3 = 0
    num_lstm = trial.suggest_int('num_lstm_layers', 1, 3)
    lstm_1 = trial.suggest_int('lstm_units_1', 16, 128)
    dropout_1 = trial.suggest_float('dropout_1', 0.0, 0.5)
    optimizer = trial.suggest_categorical('optimizer', ['SGD', 'RMSprop', 'Adam'])

    if num_lstm > 1:
        lstm_2 = trial.suggest_int(f'lstm_units_2', 16, 128)
        drop_2 = trial.suggest_float(f'dropout_2', 0.0, 0.5)
    if num_lstm > 2:
        lstm_3 = trial.suggest_int(f'lstm_units_3', 16, 128)
        drop_3 = trial.suggest_float(f'dropout_3', 0.0, 0.5)


    # Assuming StratifiedKFold, customize if needed
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)  
    scores = []
    for train_index, test_index in skf.split(X_train_val, y_train_val):
        X_tr, X_val = X_train_val[train_index], X_train_val[test_index]
        y_tr, y_val = y_train_val[train_index], y_train_val[test_index]

        model = create_model(optimizer, num_lstm, lstm_1, dropout_1, lstm_2, drop_2, lstm_3, drop_3)
        model.fit(X_tr, y_tr,epochs=20, verbose = 0) 
        score = model.evaluate(X_val, y_val, verbose = 0)
        scores.append(score[1])
    return np.array(scores).mean()

# Hyperparameter Optimization
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)

best_params = study.best_params
print("Best hyperparameters:", best_params)

[I 2024-05-24 23:28:04,614] A new study created in memory with name: no-name-caab8c09-db7b-4399-8a66-36f7b367e22d




[I 2024-05-24 23:28:13,769] Trial 0 finished with value: 0.745724356174469 and parameters: {'num_lstm_layers': 1, 'lstm_units_1': 126, 'dropout_1': 0.21172412254961664, 'optimizer': 'RMSprop'}. Best is trial 0 with value: 0.745724356174469.
[I 2024-05-24 23:28:33,243] Trial 1 finished with value: 0.8377607822418213 and parameters: {'num_lstm_layers': 3, 'lstm_units_1': 77, 'dropout_1': 0.2753449725798874, 'optimizer': 'RMSprop', 'lstm_units_2': 51, 'dropout_2': 0.3669421029662463, 'lstm_units_3': 115, 'dropout_3': 0.4033343386525616}. Best is trial 1 with value: 0.8377607822418213.
[I 2024-05-24 23:28:41,823] Trial 2 finished with value: 0.753158974647522 and parameters: {'num_lstm_layers': 1, 'lstm_units_1': 77, 'dropout_1': 0.15482883417819576, 'optimizer': 'RMSprop'}. Best is trial 1 with value: 0.8377607822418213.
[I 2024-05-24 23:28:50,053] Trial 3 finished with value: 0.656273889541626 and parameters: {'num_lstm_layers': 1, 'lstm_units_1': 51, 'dropout_1': 0.463373054563063, 'opt

Best hyperparameters: {'num_lstm_layers': 3, 'lstm_units_1': 126, 'dropout_1': 0.25314584250529726, 'optimizer': 'Adam', 'lstm_units_2': 68, 'dropout_2': 0.3938336292925794, 'lstm_units_3': 39, 'dropout_3': 0.2584061977691302}


In [20]:
''' optimizer: Any,
    num_lstm: Any,
    lstm_1: Any,
    dropout_1: Any,
    lstm_2: int = 0,
    drop_2: int = 0,
    lstm_3: int = 0,
    drop_3: int = 0 '''

SyntaxError: invalid syntax (435191884.py, line 1)

In [13]:
from tensorflow.keras.callbacks import ModelCheckpoint

X_train_val = load('../../BEST SET/X_train_fft.joblib')
y_train_val = load('../../BEST SET/y_Train.joblib')
X_test = load('../../BEST SET/X_test_fft.joblib')
y_test = load('../../BEST SET/y_Test.joblib')

quarter = X_train_val[::4]
quarter_labels = y_train_val[::4]
second_qtr = X_train_val[1::4]
second_qtr_labels = y_train_val[1::4]
third_qtr = X_train_val[2::4]
third_qtr_labels = y_train_val[2::4]
fourth_qtr = X_train_val[3::4]
fourth_qtr_labels = y_train_val[3::4]

X_train_val = np.append(quarter, second_qtr, axis=0)
X_train_val = np.append(X_train_val, third_qtr, axis=0)
y_train_val = np.append(quarter_labels, second_qtr_labels, axis=0)
y_train_val = np.append(y_train_val, third_qtr_labels, axis=0)
X_test = np.append(X_test, fourth_qtr, axis=0)
y_test = np.append(y_test, fourth_qtr_labels, axis=0)


input_sh = X_train_val.shape
input_sh = X_train_val.shape


best_model = create_model("Adam", 3, 126, 0.25314584250529726, 68, 0.3938336292925794, 39, 0.2584061977691302)

# Define a ModelCheckpoint callback to save the best model weights
checkpoint = ModelCheckpoint(filepath='test.h5', 
                              monitor='val_accuracy', 
                              save_best_only=True,
                              mode='max',
                              verbose=1)

X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.25, random_state=42)

# Train the model with the callback
history = best_model.fit(X_train, y_train,
                    epochs=500,
                    validation_data=(X_val, y_val),
                    callbacks=[checkpoint])

# After training, load the best weights
best_model.load_weights('test.h5')

# Evaluate the model using the best weights
loss, accuracy = best_model.evaluate(X_test, y_test)
print(f'Test loss: {loss}')
print(f'Test accuracy: {accuracy}')

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score
print("Test Stats")
predictions_test = best_model.predict(X_test)
binary_predictions = np.where(predictions_test >= 0.5, 1, 0)

f1_scores = f1_score(y_test, binary_predictions)
print("F-Score on Test Set:", f1_scores)

accuracy_test = accuracy_score(y_test, binary_predictions)
print("Accuracy on Test Set:", accuracy_test)
print(classification_report(y_test, binary_predictions))
print(confusion_matrix(y_test, binary_predictions))


Epoch 1/500
Epoch 1: val_accuracy improved from -inf to 0.79032, saving model to test.h5
Epoch 2/500
Epoch 2: val_accuracy improved from 0.79032 to 0.86129, saving model to test.h5
Epoch 3/500
Epoch 3: val_accuracy improved from 0.86129 to 0.87419, saving model to test.h5
Epoch 4/500
Epoch 4: val_accuracy improved from 0.87419 to 0.88065, saving model to test.h5
Epoch 5/500
Epoch 5: val_accuracy did not improve from 0.88065
Epoch 6/500
Epoch 6: val_accuracy did not improve from 0.88065
Epoch 7/500
Epoch 7: val_accuracy did not improve from 0.88065
Epoch 8/500
Epoch 8: val_accuracy did not improve from 0.88065
Epoch 9/500
Epoch 9: val_accuracy did not improve from 0.88065
Epoch 10/500
Epoch 10: val_accuracy did not improve from 0.88065
Epoch 11/500
Epoch 11: val_accuracy did not improve from 0.88065
Epoch 12/500
Epoch 12: val_accuracy improved from 0.88065 to 0.88710, saving model to test.h5
Epoch 13/500
Epoch 13: val_accuracy improved from 0.88710 to 0.90000, saving model to test.h5
Ep

# Test on INTL

In [8]:
from joblib import load, dump
import numpy as np
import tensorflow as tf 
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, StratifiedKFold
from tensorflow.keras.optimizers import SGD, Adam, RMSprop
from scikeras.wrappers import KerasClassifier
import optuna
from tensorflow.keras.callbacks import ModelCheckpoint

X_train_val = load('../../BEST SET/X_train_fft.joblib')
y_train_val = load('../../BEST SET/y_Train.joblib')
X_test = load('../../BEST SET/international_fft.joblib')
y_test = load('../../BEST SET/international_labels.joblib')

input_sh = X_train_val.shape


best_model = create_model("RMSprop", 2, 124, 0.01786723081280845, 76,0.4373552242267542)

best_model.load_weights('./Model Dumps/LSTM-Base2.h5')

# Evaluate the model using the best weights
loss, accuracy = best_model.evaluate(X_test, y_test)
print(f'Test loss: {loss}')
print(f'Test accuracy: {accuracy}')

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score
print("Test Stats")
predictions_test = best_model.predict(X_test)
binary_predictions = np.where(predictions_test >= 0.5, 1, 0)

f1_scores = f1_score(y_test, binary_predictions)
print("F-Score on Test Set:", f1_scores)

accuracy_test = accuracy_score(y_test, binary_predictions)
print("Accuracy on Test Set:", accuracy_test)
print(classification_report(y_test, binary_predictions))
print(confusion_matrix(y_test, binary_predictions))

Test loss: 2.1696653366088867
Test accuracy: 0.723296046257019
Test Stats
F-Score on Test Set: 0.6777251184834123
Accuracy on Test Set: 0.7232960325534079
              precision    recall  f1-score   support

           0       0.67      0.88      0.76       484
           1       0.83      0.57      0.68       499

    accuracy                           0.72       983
   macro avg       0.75      0.73      0.72       983
weighted avg       0.75      0.72      0.72       983

[[425  59]
 [213 286]]
