# Test files for Recurrent Neural Network

In [16]:
import numpy as np
import tensorflow as tf
import keras
from keras import layers
import matplotlib.pyplot as plt

### Loading data

In [17]:
from load_data import load_and_transform_data

In [18]:
X_train, y_train, X_val, y_val, X_test, y_test = load_and_transform_data(SEED=42)

Dataset already exists at: chest_xray_data

Dataset ready at: c:\Users\eirik\Desktop\FYSSTK3155_Dataprocessing_and_MachineLearning\Project3_FYSSTK3155-4155\Code\chest_xray_data
Dataset already exists at: chest_xray_data_split



Loading train: 100%|██████████| 4684/4684 [00:52<00:00, 89.75it/s] 
Loading val: 100%|██████████| 585/585 [00:06<00:00, 89.33it/s] 
Loading test: 100%|██████████| 587/587 [00:05<00:00, 106.35it/s]


Train: (4684, 480, 480), Val: (585, 480, 480), Test: (587, 480, 480)


In [19]:
# Check shape of subsets
print(f"X train shape:\t", X_train.shape)
print(f"y train shape:\t", y_train.shape)
print(f"X val shape:\t", X_val.shape)
print(f"y val shape:\t", y_val.shape)
print(f"X test shape:\t", X_test.shape)
print(f"y test shape:\t", y_test.shape)

X train shape:	 (4684, 480, 480)
y train shape:	 (4684,)
X val shape:	 (585, 480, 480)
y val shape:	 (585,)
X test shape:	 (587, 480, 480)
y test shape:	 (587,)


### Initial testing for RNN, LSTM and GRU

In [None]:
RNN = keras.Sequential()                          # Create a base sequential model
RNN.add(keras.Input(shape=(480, 480)))            # Set the input shape
RNN.add(layers.SimpleRNN(units=240))      # Add a simple RNN layer
RNN.add(layers.Dense(units=1, activation="sigmoid")) # Output layer

# Get a summary of model configuration
RNN.summary()

In [None]:
LSTM = keras.Sequential()                          # Create a base sequential model
LSTM.add(keras.Input(shape=(480, 480)))            # Set the input shape
LSTM.add(layers.LSTM(units=240))      # Add a LSTM layer
LSTM.add(layers.Dense(units=1, activation="sigmoid")) # Output layer

# Get a summary of model configuration
LSTM.summary()

In [21]:
GRU = keras.Sequential()                          # Create a base sequential model
GRU.add(keras.Input(shape=(480, 480)))            # Set the input shape
GRU.add(layers.GRU(units=240))      # Add a GRU layer
GRU.add(layers.Dense(units=1, activation="sigmoid")) # Output layer

# Get a summary of model configuration
GRU.summary()

### Model Training

In [23]:
batch_size = 32
epochs = 10

In [None]:
RNN.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

RNN_trainer = RNN.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=batch_size, epochs=epochs)

RNN_test_loss, RNN_test_acc = RNN.evaluate(X_test, y_test)
print(f'RNN Test Accuracy: {RNN_test_acc}')

In [None]:
LSTM.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

LSTM_trainer = LSTM.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=batch_size, epochs=epochs)

LSTM_test_loss, LSTM_test_acc = LSTM.evaluate(X_test, y_test)
print(f'LSTM Test Accuracy: {LSTM_test_acc}')

In [24]:
GRU.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

GRU_trainer = GRU.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=batch_size, epochs=epochs)

GRU_test_loss, GRU_test_acc = GRU.evaluate(X_test, y_test)
print(f'GRU Test Accuracy: {GRU_test_acc}')

Epoch 1/10
[1m112/147[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m1:38[0m 3s/step - accuracy: 0.6978 - loss: 0.6745

KeyboardInterrupt: 

### Plotting Results

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

# Accuracy plot
ax1.plot(RNN_trainer.history['val_accuracy'], label='RNN Val Accuracy')
ax1.plot(LSTM_trainer.history['val_accuracy'], label='LSTM Val Accuracy')
ax1.plot(GRU_trainer.history['val_accuracy'], label='GRU Val Accuracy')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Accuracy')
ax1.set_title('Model Accuracy')
ax1.legend()
ax1.grid(True)

# Loss plot
ax2.plot(RNN_trainer.history['val_loss'], label='RNN Val Loss')
ax2.plot(LSTM_trainer.history['val_loss'], label='LSTM Val Loss')
ax2.plot(GRU_trainer.history['val_loss'], label='GRU Val Loss')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Loss')
ax2.set_title('Model Loss')
ax2.legend()
ax2.grid(True)

plt.tight_layout()
plt.show()

In [None]:
y_pred = RNN.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

cm = confusion_matrix(y_test, y_pred_classes)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, 
                               display_labels=['NORMAL', 'PNEUMONIA'])
disp.plot(cmap='Blues')
plt.title('Confusion Matrix - Test Set')
plt.show()

# 3. Print test metrics
test_loss, test_acc = RNN.evaluate(X_test, y_test)
print(f'Test Accuracy: {test_acc:.4f}')
print(f'Test Loss: {test_loss:.4f}')

In [None]:
y_pred = LSTM.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

cm = confusion_matrix(y_test, y_pred_classes)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, 
                               display_labels=['NORMAL', 'PNEUMONIA'])
disp.plot(cmap='Blues')
plt.title('Confusion Matrix - Test Set')
plt.show()

# 3. Print test metrics
test_loss, test_acc = LSTM.evaluate(X_test, y_test)
print(f'Test Accuracy: {test_acc:.4f}')
print(f'Test Loss: {test_loss:.4f}')

In [None]:
y_pred = GRU.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

cm = confusion_matrix(y_test, y_pred_classes)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, 
                               display_labels=['NORMAL', 'PNEUMONIA'])
disp.plot(cmap='Blues')
plt.title('Confusion Matrix - Test Set')
plt.show()

# 3. Print test metrics
test_loss, test_acc = GRU.evaluate(X_test, y_test)
print(f'Test Accuracy: {test_acc:.4f}')
print(f'Test Loss: {test_loss:.4f}')

## Initial Rough Grid Search

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import itertools
from tqdm import tqdm
import pandas as pd
import os
from load_data import load_and_transform_data

# Speed optimizations
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '1'


In [None]:

# Create results directory
os.makedirs("CSV results", exist_ok=True)

# Hyperparameters
units = [30, 120, 480]
epochs = [5]
n_layers = [1, 2, 3]
layer_types = [layers.SimpleRNN, layers.LSTM, layers.GRU]

# Generate all layer configurations
def generate_layer_configs(n_layers, layer_types, units_options):
    configs = []
    
    for layer_type in layer_types:
        # Generate all unit combinations for this layer type
        if n_layers == 1:
            for u in units_options:
                configs.append({
                    'type': layer_type,
                    'units': (u,)
                })
        elif n_layers == 2:
            for u1, u2 in itertools.product(units_options, units_options):
                configs.append({
                    'type': layer_type,
                    'units': (u1, u2)
                })
        elif n_layers == 3:
            for u1, u2, u3 in itertools.product(units_options, units_options, units_options):
                configs.append({
                    'type': layer_type,
                    'units': (u1, u2, u3)
                })
    
    return configs

In [3]:
# Load data
from load_data import load_and_transform_data
X_train, y_train, X_val, y_val, X_test, y_test = load_and_transform_data(SEED=42)

Dataset already exists at: chest_xray_data

Dataset ready at: c:\Users\eirik\Desktop\FYSSTK3155_Dataprocessing_and_MachineLearning\Project3_FYSSTK3155-4155\Code\chest_xray_data
Dataset already exists at: chest_xray_data_split



Loading train: 100%|██████████| 4684/4684 [00:36<00:00, 127.32it/s]
Loading val: 100%|██████████| 585/585 [00:04<00:00, 122.05it/s]
Loading test: 100%|██████████| 587/587 [00:04<00:00, 136.46it/s]


Train: (4684, 480, 480), Val: (585, 480, 480), Test: (587, 480, 480)


In [None]:
# Calculate total combinations
layer_configs = []
for n_lay in n_layers:
    configs = generate_layer_configs(n_lay, layer_types, units)
    for config in configs:
        layer_configs.append((n_lay, config))

total_combinations = len(epochs) * len(layer_configs)
print(f"Total combinations: {total_combinations}")

# Store results
results1 = []

# Train all models
for idx, (ep, (n_lay, layer_config)) in enumerate(tqdm(
    itertools.product(epochs, layer_configs), 
    total=total_combinations)):
    
    # Clear previous sessions
    tf.keras.backend.clear_session()
    
    # Build model based on layer configuration
    model = keras.Sequential()
    model.add(keras.Input(shape=(480, 480)))
    
    # Add layers with varying units
    layer_type = layer_config['type']
    layer_units = layer_config['units']
    
    for i, u in enumerate(layer_units):
        return_sequences = (i < len(layer_units) - 1)
        model.add(layer_type(units=u, return_sequences=return_sequences))
    
    # Output layer
    model.add(layers.Dense(units=1, activation="sigmoid"))
    
    # Compile
    model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])

    # Train with early stopping
    early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=4, verbose=0)
    
    trainer = model.fit(X_train, y_train, validation_data=(X_val, y_val), 
                       epochs=ep, verbose=0, callbacks=[early_stop])

    # Get metrics
    val_acc = trainer.history['val_accuracy'][-1]
    val_loss = trainer.history['val_loss'][-1]
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)

    # Create readable string
    layer_name = layer_type.__name__
    layer_str = ' -> '.join([f"{layer_name}({u})" for u in layer_units])

    # Store results
    results1.append({
        'n-Layers': n_lay,
        'Layer-Config': layer_str,
        'Units': str(layer_units),
        'n-Epochs': ep,
        'Val Accuracy': val_acc,
        'Val Loss': val_loss,
        'Test Accuracy': test_acc,
        'Test Loss': test_loss
    })
    
    # Save progress
    pd.DataFrame(results1).to_csv("CSV results/GRU_results_rough_temp.csv", index=False)

# Save final results
df_results1 = pd.DataFrame(results1)
df_results1.to_csv("CSV results/GRU_results_rough.csv", index=False)

# Show top 20
top_20 = df_results1.nlargest(20, 'Val Accuracy')
print(f"\nComplete! {len(results1)} models trained")
print("\nTop 20:")
print(top_20[['Layer-Config', 'Val Accuracy', 'Test Accuracy']])

Total combinations: 234


 34%|███▍      | 80/234 [45:51:24<88:16:26, 2063.55s/it] 


KeyboardInterrupt: 

In [None]:
# Save final results
df_results1 = pd.DataFrame(results1)
df_results1.to_csv("CSV results/RNN_results_rough.csv", index=False)

# Show top 20
top_20 = df_results1.nlargest(20, 'Test Accuracy')
print(f"\nComplete! {len(results1)} models trained")
print("\nTop 20:")
print(top_20[['Layer-Config', 'Val Accuracy', 'Test Accuracy']])


Complete! 80 models trained

Top 20:
                           Layer-Config  Dropout  Val Accuracy  Test Accuracy
34                 GRU(480) -> GRU(120)      0.0      0.911111       0.909710
29                  GRU(30) -> GRU(480)      0.0      0.904274       0.908007
31                 GRU(120) -> GRU(120)      0.0      0.900855       0.904600
32                 GRU(120) -> GRU(480)      0.0      0.905983       0.902896
27                   GRU(30) -> GRU(30)      0.0      0.912821       0.901192
35                 GRU(480) -> GRU(480)      0.0      0.887179       0.901192
64    LSTM(30) -> LSTM(30) -> LSTM(120)      0.0      0.876923       0.890971
6                               GRU(30)      0.0      0.907692       0.889267
79  LSTM(120) -> LSTM(480) -> LSTM(120)      0.0      0.887179       0.889267
30                  GRU(120) -> GRU(30)      0.0      0.894017       0.885860
7                              GRU(120)      0.0      0.885470       0.884157
8                         

In [26]:
# Hyperparameters
units = [30, 120, 240, 480]
layer_types = [layers.GRU]
epochs = [5]

# Generate all layer configurations
def generate_layer_configs(layer_types, units_options):
    configs = []
    
    for layer_type in layer_types:
        # Generate all unit combinations for this layer type
        for u1, u2 in itertools.product(units_options, units_options):
            configs.append({
                'type': layer_type,
                'units': (u1, u2)
            })
    
    return configs

In [None]:
# Calculate total combinations
layer_configs = []
for n_lay in n_layers:
    configs = generate_layer_configs(layer_types, units)
    for config in configs:
        layer_configs.append((n_lay, config))

total_combinations = len(epochs) * len(layer_configs)
print(f"Total combinations: {total_combinations}")

# Store results
results2 = []

# Train all models
for idx, (ep, (n_lay, layer_config)) in enumerate(tqdm(
    itertools.product(epochs, layer_configs), 
    total=total_combinations)):
    
    # Clear previous sessions
    tf.keras.backend.clear_session()
    
    # Build model based on layer configuration
    model = keras.Sequential()
    model.add(keras.Input(shape=(480, 480)))
    
    # Add layers with varying units
    layer_type = layer_config['type']
    layer_units = layer_config['units']
    
    for i, u in enumerate(layer_units):
        return_sequences = (i < len(layer_units) - 1)
        model.add(layer_type(units=u, return_sequences=return_sequences))
    
    # Output layer
    model.add(layers.Dense(units=1, activation="sigmoid"))
    
    # Compile
    model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])

    # Train with early stopping
    early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=4, verbose=0)
    
    trainer = model.fit(X_train, y_train, validation_data=(X_val, y_val), 
                       epochs=ep, verbose=0, callbacks=[early_stop])

    # Get metrics
    val_acc = trainer.history['val_accuracy'][-1]
    val_loss = trainer.history['val_loss'][-1]
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)

    # Create readable string
    layer_name = layer_type.__name__
    layer_str = ' -> '.join([f"{layer_name}({u})" for u in layer_units])

    # Store results
    results2.append({
        'n-Layers': n_lay,
        'Layer-Config': layer_str,
        'Units': str(layer_units),
        'n-Epochs': ep,
        'Val Accuracy': val_acc,
        'Val Loss': val_loss,
        'Test Accuracy': test_acc,
        'Test Loss': test_loss
    })
    
    # Save progress
    pd.DataFrame(results2).to_csv("CSV results/RNN_results_rough_temp2.csv", index=False)

# Save final results
df_results2 = pd.DataFrame(results2)
df_results2.to_csv("CSV results/RNN_results_rough2.csv", index=False)

# Show top 20
top_20 = df_results2.nlargest(20, 'Val Accuracy')
print(f"\nComplete! {len(results2)} models trained")
print("\nTop 20:")
print(top_20[['Layer-Config', 'Val Accuracy', 'Test Accuracy']])

Total combinations: 48


  0%|          | 0/48 [00:00<?, ?it/s]