In [5]:
import os
import zipfile as zf

# Path to the repaired ZIP file
zip_file_path = "PartB_DFU_dataset - Copy.zip"
extract_path = "DFU_dataset"

if os.path.exists(zip_file_path):
    try:
        with zf.ZipFile(zip_file_path, 'r') as files:
            files.extractall(extract_path)
        print(f"Extraction completed successfully to '{extract_path}'")
    except zf.BadZipFile:
        print("Error: The ZIP file is corrupted.")
    except OSError as e:
        print(f"OS error: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
else:
    print(f"Error: The file '{zip_file_path}' does not exist.")


Extraction completed successfully to 'DFU_dataset'


In [6]:
import numpy as np # linear algebra
import os
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np
from PIL import Image
# Define the root directory where your image folders are located
root_directory = "DFU_dataset/PartB_DFU_dataset - Copy"

# Initialize lists to store image paths and corresponding class labels for both datasets
image_paths_ischaemia = []
categories_ischaemia = []
image_paths_infection = []
categories_infection = []

# Iterate over each class and its subdirectories
for class_name in ["Infection", "Ischaemia"]:
    for augmentation_type in ["Aug-Negative", "Aug-Positive"]:
        folder_path = os.path.join(root_directory, class_name, augmentation_type)
        category = f"{class_name.lower()}{'pov' if 'Positive' in augmentation_type else 'neg'}"
        
        # Iterate over image files in the current directory
        for file_name in os.listdir(folder_path):
            if file_name.endswith(".jpg"):  # Assuming images are jpg format
                image_path = os.path.join(folder_path, file_name)
                if class_name == "Ischaemia":
                    image_paths_ischaemia.append(image_path)
                    categories_ischaemia.append("ischemia" if "Positive" in augmentation_type else "non-ischemia")
                elif class_name == "Infection":
                    image_paths_infection.append(image_path)
                    categories_infection.append("infection" if "Positive" in augmentation_type else "non-infection")

# Create DataFrames for each dataset
df_ischaemia = pd.DataFrame({"category": categories_ischaemia, "image_path": image_paths_ischaemia})
df_infection = pd.DataFrame({"category": categories_infection, "image_path": image_paths_infection})

# Label encoding for Ischaemia dataset
label_encoder_ischaemia = LabelEncoder()
df_ischaemia['Class_Label'] = label_encoder_ischaemia.fit_transform(df_ischaemia['category'])
print("Ischaemia Class Mapping:")
for class_label, numerical_label in zip(df_ischaemia['category'].unique(), df_ischaemia['Class_Label'].unique()):
    print(f"{class_label}: {numerical_label}")

# Label encoding for Infection dataset
label_encoder_infection = LabelEncoder()
df_infection['Class_Label'] = label_encoder_infection.fit_transform(df_infection['category'])
print("Infection Class Mapping:")
for class_label, numerical_label in zip(df_infection['category'].unique(), df_infection['Class_Label'].unique()):
    print(f"{class_label}: {numerical_label}")

# Shuffle both DataFrames
df_ischaemia = df_ischaemia.sample(frac=1).reset_index(drop=True)
df_infection = df_infection.sample(frac=1).reset_index(drop=True)

# Helper function to load and process images
def load_images(df):
    images = []
    target_labels = []   
    for index, row in df.iterrows():
        image = Image.open(row['image_path'])
        image_array = np.array(image.resize((224, 224)))  # Resize image to fit MobileNet input size
        images.append(image_array)
        target_labels.append(row['Class_Label'])
    return np.array(images), np.array(target_labels)

# Load images for both datasets
images_ischaemia, target_labels_ischaemia = load_images(df_ischaemia)
images_infection, target_labels_infection = load_images(df_infection)

print("Shape of Ischaemia images array:", images_ischaemia.shape)
print("Shape of Ischaemia target labels array:", target_labels_ischaemia.shape)
print("Shape of Infection images array:", images_infection.shape)
print("Shape of Infection target labels array:", target_labels_infection.shape)

# Split the Ischaemia dataset
X_train_ischaemia, X_test_ischaemia, y_train_ischaemia, y_test_ischaemia = train_test_split(
    images_ischaemia, target_labels_ischaemia, test_size=0.3, random_state=42)
X_val_ischaemia, X_test_ischaemia, y_val_ischaemia, y_test_ischaemia = train_test_split(
    X_test_ischaemia, y_test_ischaemia, test_size=0.25, random_state=42)  # 0.25 * 0.3 = 0.075

# Split the Infection dataset
X_train_infection, X_test_infection, y_train_infection, y_test_infection = train_test_split(
    images_infection, target_labels_infection, test_size=0.3, random_state=42)
X_val_infection, X_test_infection, y_val_infection, y_test_infection = train_test_split(
    X_test_infection, y_test_infection, test_size=0.25, random_state=42)  # 0.25 * 0.3 = 0.075

print("Ischaemia Training set shape:", X_train_ischaemia.shape, y_train_ischaemia.shape)
print("Ischaemia Validation set shape:", X_val_ischaemia.shape, y_val_ischaemia.shape)
print("Ischaemia Test set shape:", X_test_ischaemia.shape, y_test_ischaemia.shape)
print("Infection Training set shape:", X_train_infection.shape, y_train_infection.shape)
print("Infection Validation set shape:", X_val_infection.shape, y_val_infection.shape)
print("Infection Test set shape:", X_test_infection.shape, y_test_infection.shape)

Ischaemia Class Mapping:
non-ischemia: 1
ischemia: 0
Infection Class Mapping:
non-infection: 1
infection: 0
Shape of Ischaemia images array: (9870, 224, 224, 3)
Shape of Ischaemia target labels array: (9870,)
Shape of Infection images array: (5890, 224, 224, 3)
Shape of Infection target labels array: (5890,)
Ischaemia Training set shape: (6909, 224, 224, 3) (6909,)
Ischaemia Validation set shape: (2220, 224, 224, 3) (2220,)
Ischaemia Test set shape: (741, 224, 224, 3) (741,)
Infection Training set shape: (4123, 224, 224, 3) (4123,)
Infection Validation set shape: (1325, 224, 224, 3) (1325,)
Infection Test set shape: (442, 224, 224, 3) (442,)


In [10]:
import numpy as np
from kerastuner import HyperModel, HyperParameters
from kerastuner.tuners import BayesianOptimization, GridSearch
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Dropout, TimeDistributed, Flatten, LSTM, Dense, BatchNormalization, GlobalAveragePooling2D, Reshape
from tensorflow.keras.models import Sequential
import tensorflow as tf
from tensorflow import keras
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.metrics import precision_score, recall_score, f1_score

# Define the model-building function
def build_model(hp):
    base_model = EfficientNetB0(input_shape=(224, 224, 3), include_top=False, weights='imagenet')
    
    # Unfreeze the top few layers of the base model
    for layer in base_model.layers[-20:]:
        layer.trainable = True
    
    model = Sequential([
        base_model,
        TimeDistributed(Flatten()),
        LSTM(hp.Int('lstm_units_1', min_value=120, max_value=200, step=30), 
             dropout=hp.Float('dropout_1', min_value=0.2, max_value=0.3, step=0.1), 
             return_sequences=True),
        LSTM(hp.Int('lstm_units_2', min_value=60, max_value=100, step=10), 
             dropout=hp.Float('dropout_2', min_value=0.2, max_value=0.3, step=0.1), 
             return_sequences=True),
        LSTM(hp.Int('lstm_units_3', min_value=30, max_value=50, step=10), 
             dropout=0.2, 
             return_sequences=False),
        Dense(148, activation='relu'),
        Dropout(0.4),
        BatchNormalization(),
        Dense(84, activation='relu'),
        Dropout(0.3),
        BatchNormalization(),
        Dense(32, activation='relu'),
        Dropout(0.2),
        BatchNormalization(),
        Dense(16, activation='relu'),
        Dropout(0.2),
        Dense(3, activation='softmax')
    ])

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.001),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

# Calculate class weights
class_weights = compute_class_weight('balanced', classes=np.unique(y_train_ischaemia), y=y_train_ischaemia)
class_weights = dict(enumerate(class_weights))

# Instantiate the tuner
tuner = BayesianOptimization(
    build_model,
    objective='val_accuracy',
    max_trials=5,
    directory='LSTM_EfficientNetB0_tunning',
    project_name='Bayesian_isechamia_tuning'
)

early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=1e-6)

tuner.search(X_train_ischaemia, y_train_ischaemia, epochs=10, batch_size=64, validation_data=(X_val_ischaemia, y_val_ischaemia), callbacks=[early_stopping, reduce_lr], class_weight=class_weights)

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Build the model with the optimal hyperparameters
model = tuner.hypermodel.build(best_hps)

# Train the final model
history = model.fit(X_train_ischaemia, y_train_ischaemia, epochs=30, batch_size=64, validation_data=(X_val_ischaemia, y_val_ischaemia), callbacks=[early_stopping, reduce_lr], class_weight=class_weights)


Trial 5 Complete [00h 04m 21s]
val_accuracy: 0.9923423528671265

Best val_accuracy So Far: 0.9932432174682617
Total elapsed time: 00h 21m 50s
Epoch 1/30


2024-07-30 11:45:32.573649: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:966] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inStatefulPartitionedCall/sequential_1_1/efficientnetb0_1/block2b_drop_1/stateless_dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 219ms/step - accuracy: 0.6049 - loss: 0.8540 - val_accuracy: 0.5689 - val_loss: 0.8681 - learning_rate: 0.0010
Epoch 2/30
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 190ms/step - accuracy: 0.9093 - loss: 0.2823 - val_accuracy: 0.7937 - val_loss: 0.5152 - learning_rate: 0.0010
Epoch 3/30
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 195ms/step - accuracy: 0.9571 - loss: 0.1425 - val_accuracy: 0.9459 - val_loss: 0.1535 - learning_rate: 0.0010
Epoch 4/30
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 197ms/step - accuracy: 0.9572 - loss: 0.1329 - val_accuracy: 0.9450 - val_loss: 0.1870 - learning_rate: 0.0010
Epoch 5/30
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 195ms/step - accuracy: 0.9747 - loss: 0.0918 - val_accuracy: 0.9459 - val_loss: 0.1510 - learning_rate: 0.0010
Epoch 6/30
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [11]:
train_loss, train_accuracy = model.evaluate(X_train_ischaemia,y_train_ischaemia)
val_loss, val_accuracy = model.evaluate(X_val_ischaemia, y_val_ischaemia)
trest_loss,train_accuracy = model.evaluate(X_test_ischaemia, y_test_ischaemia)
y_pred = model.predict([X_test_ischaemia, y_test_ischaemia])
y_pred_classes = np.argmax(y_pred, axis=1)
precision = precision_score(y_test_ischaemia, y_pred_classes, average='macro')
recall = recall_score(y_test_ischaemia, y_pred_classes, average='macro')
f1 = f1_score(y_test_ischaemia, y_pred_classes, average='macro')

print("Training Accuracy:", train_accuracy)
print("Training Loss:", train_loss)
print("Validation Accuracy:", val_accuracy)
print("Validation Loss:", val_loss)
print("Test Accuracy:", train_accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

[1m  7/216[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m4s[0m 20ms/step - accuracy: 1.0000 - loss: 2.4185e-04 

W0000 00:00:1722340525.286095     919 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340525.286596     919 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340525.286971     919 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340525.287389     919 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340525.287764     919 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340525.288165     919 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340525.288652     919 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340525.289189     919 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340525.289672     919 gp

[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 20ms/step - accuracy: 1.0000 - loss: 2.0944e-04


W0000 00:00:1722340529.526578     901 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340529.526973     901 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340529.527321     901 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340529.527645     901 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340529.527951     901 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340529.528287     901 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340529.528689     901 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340529.529127     901 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340529.529526     901 gp

[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - accuracy: 0.9961 - loss: 0.0140


W0000 00:00:1722340531.803421     901 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340531.803777     901 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340531.804040     901 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340531.804329     901 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340531.804618     901 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340531.804892     901 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340531.805223     901 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340531.805571     901 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340531.805888     901 gp

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.9969 - loss: 0.0169


W0000 00:00:1722340532.645266     898 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340532.645627     898 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340532.645864     898 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340532.646073     898 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340532.646281     898 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340532.646492     898 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340532.646758     898 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340532.646983     898 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1722340532.647271     898 gp

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 94ms/step
Training Accuracy: 0.9959514141082764
Training Loss: 0.0002089349291054532
Validation Accuracy: 0.9954954981803894
Validation Loss: 0.016047988086938858
Test Accuracy: 0.9959514141082764
Precision: 0.9946006995045176
Recall: 0.9946006995045176
F1 Score: 0.9946006995045176
