In [None]:
import time
import copy
import numpy as np
import os
import zipfile
import timm
from sklearn.metrics import f1_score, precision_score, recall_score
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, regularizers, optimizers, callbacks

Dataset Loading

In [None]:
zip_path = "/home/en520-ldsilva1/scr4-en520-lmorove1/en520-ldsilva1/Project/augumented_final.zip" # Path to zipped dataset
extract_path = "/home/en520-ldsilva1/scr4-en520-lmorove1/en520-ldsilva1/Project" # Path to unzip

if not os.path.exists(os.path.join(extract_path, "augumented_final")):
    print("Unzipping dataset...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print("Unzipping completed.")
else:
    print("Dataset already unzipped.")

Dataset already unzipped.


In [None]:
BATCH_SIZE = 32 # Batch size
IMG_SIZE = (224, 224)  # Default input size of the model
EPOCHS = 10 # Number of epochs

# Paths to the train, validation and test sets
train_dir = "/home/en520-ldsilva1/scr4-en520-lmorove1/en520-ldsilva1/Project/augumented_final/train"
valid_dir = "/home/en520-ldsilva1/scr4-en520-lmorove1/en520-ldsilva1/Project/augumented_final/valid"
test_dir  = "/home/en520-ldsilva1/scr4-en520-lmorove1/en520-ldsilva1/Project/augumented_final/test"

# Load the train, validation and test sets
print("Loading datasets...")
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    train_dir,
    label_mode="int",
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=True
)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    valid_dir,
    label_mode="int",
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=False
)
test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    test_dir,
    label_mode="int",
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=False
)

class_names = train_ds.class_names # Class names
num_classes = len(class_names) # Number of classes
print("Classes:", class_names)

AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)

# Compute and print class weights
class_counts = np.zeros(num_classes, dtype=int)
for images, labels in train_ds.unbatch():
    class_counts[labels.numpy()] += 1
print("Class counts:", class_counts)
class_weights = {i: float(num_classes / count) for i, count in enumerate(class_counts)}
print("Class weights:", class_weights)

Loading datasets...
Found 8958 files belonging to 8 classes.


2025-04-23 07:34:59.498132: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 18250 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-80GB MIG 2g.20gb, pci bus id: 0000:01:00.0, compute capability: 8.0
2025-04-23 07:34:59.770643: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory


Found 1282 files belonging to 8 classes.
Found 631 files belonging to 8 classes.
Classes: ['A', 'C', 'D', 'G', 'H', 'M', 'N', 'O']
Class counts: [ 370  414 2262  388  182  338 4032  972]
Class weights: {0: 0.021621621621621623, 1: 0.01932367149758454, 2: 0.0035366931918656055, 3: 0.020618556701030927, 4: 0.04395604395604396, 5: 0.023668639053254437, 6: 0.001984126984126984, 7: 0.00823045267489712}


Model Training and Evaluation

In [None]:
# Function to build DenseNet121 model
def build_densenet_model(learning_rate, weight_decay):
    base_model = tf.keras.applications.DenseNet121(
        weights='imagenet', include_top=False, input_shape=IMG_SIZE + (3,)
    )

    base_model.trainable = False   # Freeze the base model
    inputs = tf.keras.Input(shape=IMG_SIZE + (3,))
    x = tf.keras.applications.densenet.preprocess_input(inputs)
    x = base_model(x, training=False)
    x = layers.GlobalAveragePooling2D()(x)
    outputs = layers.Dense(num_classes, activation="softmax",
                           kernel_regularizer=regularizers.l2(weight_decay))(x)
    model = tf.keras.Model(inputs, outputs)

    model.compile(
        optimizer=optimizers.Adam(learning_rate=learning_rate),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )
    return model

# Define hyperparameters
learning_rates = [1e-3, 5e-4]
weight_decays = [0.0, 1e-4]

best_val_acc = 0.0
best_config = None
best_model = None

print("\nStarting hyperparameter tuning for DenseNet121...")

# Hyperparamter tuning
for lr in learning_rates:
    for wd in weight_decays:
        print(f"\nTraining configuration: lr={lr}, weight_decay={wd}")
        model = build_densenet_model(learning_rate=lr, weight_decay=wd)

        earlystop = callbacks.EarlyStopping(monitor='val_accuracy', patience=3, restore_best_weights=True)  # Early stopping

        # Train the model
        history = model.fit(
            train_ds,
            validation_data=val_ds,
            epochs=EPOCHS,
            callbacks=[earlystop],
            class_weight=class_weights,
            verbose=1
        )

        max_val_acc = max(history.history['val_accuracy'])
        print(f"Config: lr={lr}, wd={wd} -- Best validation accuracy: {max_val_acc:.4f}")
        if max_val_acc > best_val_acc:
            best_val_acc = max_val_acc
            best_config = (lr, wd)
            best_model = model

model_save_path = "best_densenet_model.h5"
best_model.save(model_save_path)  # Save the model with the best validation accuracy
print(f"\nBest model saved to: {model_save_path}")
print(f"Best hyperparameters: lr={best_config[0]}, weight_decay={best_config[1]}")
print(f"Best validation accuracy: {best_val_acc:.4f}")

# Evaluate the best model on the test set
test_loss, test_acc = best_model.evaluate(test_ds)
print(f"\nTest Accuracy: {test_acc:.4f}")

y_true = []
y_pred = []

for images, labels in test_ds:
    preds = best_model.predict(images)
    y_true.extend(labels.numpy()) # True labels
    y_pred.extend(np.argmax(preds, axis=1)) # Predictions

y_true = np.array(y_true)
y_pred = np.array(y_pred)

# Compute metrics
f1 = f1_score(y_true, y_pred, average='weighted')
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')

# Print results
print(f"F1 Score (weighted): {f1:.4f}")
print(f"Precision (weighted): {precision:.4f}")
print(f"Recall (weighted): {recall:.4f}")


Starting hyperparameter tuning for DenseNet121...

Training configuration: lr=0.001, weight_decay=0.0
Epoch 1/10


2025-04-23 06:56:42.813672: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8902
2025-04-23 06:56:43.093619: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory


  2/280 [..............................] - ETA: 17s - loss: 0.0157 - accuracy: 0.0781  

2025-04-23 06:56:44.024569: I external/local_xla/xla/service/service.cc:168] XLA service 0x1552d8b95320 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-04-23 06:56:44.024611: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA A100-SXM4-80GB MIG 2g.20gb, Compute Capability 8.0
2025-04-23 06:56:44.029913: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1745405804.095863  544518 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Config: lr=0.001, wd=0.0 -- Best validation accuracy: 0.4813

Training configuration: lr=0.001, weight_decay=0.0001
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Config: lr=0.001, wd=0.0001 -- Best validation accuracy: 0.4493

Training configuration: lr=0.0005, weight_decay=0.0
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Config: lr=0.0005, wd=0.0 -- Best validation accuracy: 0.4103

Training configuration: lr=0.0005, weight_decay=0.0001
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Config: lr=0.0005, wd=0.0001 -- Best validation accuracy: 0.4415


  saving_api.save_model(



Best model saved to: best_densenet_model.h5
Best hyperparameters: lr=0.001, weight_decay=0.0
Best validation accuracy: 0.4813

Test Accuracy: 0.4992
F1 Score (weighted): 0.4950
Precision (weighted): 0.5136
Recall (weighted): 0.4992
