In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import KFold  # Import KFold for cross-validation
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.applications import ConvNeXtBase
from tensorflow.keras.layers import Dropout, GlobalAveragePooling2D, Dense, Add
from tensorflow.keras.optimizers import Adam
from tensorflow import keras
from tensorflow.keras.applications import ConvNeXtBase
import tensorflow as tf
import keras_tuner as kt

# Define data loading and preprocessing
PATH = 'AD Data'
max_samples_per_class = 5000  # Maximum samples per class

def load_samples_from_directory(directory_path, max_samples_per_class):
    class_data = {}  # Dictionary to hold samples for each class

    for class_name in os.listdir(directory_path):
        class_path = os.path.join(directory_path, class_name)
        
        if os.path.isdir(class_path):  # Check if it is a directory
            files = [f for f in os.listdir(class_path) if f.endswith(('.jpg', '.jpeg', '.png'))]
            if len(files) > max_samples_per_class:
                files = np.random.choice(files, max_samples_per_class, replace=False)  # Randomly select samples
            class_data[class_name] = files[:max_samples_per_class]  # Limit to max_samples_per_class
    
    return class_data

# Load samples from the directory
class_data = load_samples_from_directory(PATH, max_samples_per_class)

# Prepare data for K-Fold
all_images = []  # List to store images
all_labels = []  # List to store labels

for class_name, files in class_data.items():
    for filename in files:
        image_path = os.path.join(PATH, class_name, filename)
        img = tf.keras.utils.load_img(image_path, target_size=(160, 160))  # Adjust target_size based on your model input
        img_array = tf.keras.utils.img_to_array(img)
        all_images.append(img_array)
        all_labels.append(class_name)

# Convert to numpy arrays
all_images = np.array(all_images)
all_labels = np.array(all_labels)

# Convert labels to numerical values
class_names = np.unique(all_labels)
label_to_index = {class_name: index for index, class_name in enumerate(class_names)}
all_labels = np.array([label_to_index[label] for label in all_labels])

# Define K-Fold cross-validation
num_folds = 5
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

# Function to build the model
def build_model(hp):
    base_model = ConvNeXtBase(weights='imagenet', include_top=False, input_shape=(160, 160, 3))
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    
    dropout_rate = hp.Float('dropout_rate', min_value=0.2, max_value=0.5, step=0.1)
    x = Dropout(rate=dropout_rate)(x)
    
    units = hp.Int('units', min_value=64, max_value=256, step=64)
    x = Dense(units, activation='relu')(x)
    
    skip = Dense(units, activation='relu')(x)
    x = Add()([x, skip])
    
    x = Dropout(rate=dropout_rate)(x)
    
    predictions = Dense(len(class_names), activation='softmax')(x)
    model = keras.Model(inputs=base_model.input, outputs=predictions)
    
    for layer in base_model.layers:
        layer.trainable = False
        
    learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model




2024-10-16 16:59:15.508205: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-10-16 16:59:15.516832: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-16 16:59:15.526037: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-16 16:59:15.528808: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-10-16 16:59:15.536377: I tensorflow/core/platform/cpu_feature_guar

In [2]:
from tensorflow.keras import backend as K

# K-Fold Cross-Validation Loop
validation_scores = []
best_overall_accuracy = 0
best_overall_model = None
best_model_path = 'best_overall_model_ConvNeXtSmall.keras'  # Path to save the best model

tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=1,
    executions_per_trial=1,
    directory='ConvNeXtSmall_tuning',
    project_name='ConvNeXtSmall_tuning'
)

for fold, (train_index, val_index) in enumerate(kf.split(all_images)):
    print(f'Training fold {fold + 1}/{num_folds}...')
    
    X_train, X_val = all_images[train_index], all_images[val_index]
    y_train, y_val = all_labels[train_index], all_labels[val_index]
    
    # Create TensorFlow datasets for the current fold
    train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(32).shuffle(buffer_size=1000)
    val_ds = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(32)

    # Find the best model using the tuner
    tuner.search(train_ds, epochs=10, validation_data=val_ds)
    best_models = tuner.get_best_models(num_models=1)

    if len(best_models) > 0:
        best_model = best_models[0]

        # Fit the best model on the current training fold
        history = best_model.fit(train_ds, epochs=3, validation_data=val_ds, verbose=1)
        
        # Evaluate on the validation fold
        val_loss, val_accuracy = best_model.evaluate(val_ds)
        validation_scores.append(val_accuracy)
        print(f'Fold {fold + 1} - Validation Accuracy: {val_accuracy}')
        
        # Check if this is the best model so far
        if val_accuracy > best_overall_accuracy:
            best_overall_accuracy = val_accuracy
            best_overall_model = best_model
            # Save the best model
            print(f'New best model found in fold {fold + 1} with accuracy {val_accuracy}. Saving the model...')
            best_model.save(best_model_path)

    else:
        print("No models were found by the tuner for this fold.")
    
    # Clear the Keras session after each fold
    K.clear_session()

# Summary of validation scores
print(f'Mean Validation Accuracy across {num_folds} folds: {np.mean(validation_scores)}')
print(f'Standard Deviation of Validation Accuracy: {np.std(validation_scores)}')




Trial 1 Complete [00h 02m 00s]
val_accuracy: 0.8389999866485596

Best val_accuracy So Far: 0.8389999866485596
Total elapsed time: 00h 02m 00s
Epoch 1/3


  saveable.load_own_variables(weights_store.get(inner_path))


[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 50ms/step - accuracy: 0.7676 - loss: 0.4906 - val_accuracy: 0.8350 - val_loss: 0.4051
Epoch 2/3
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 38ms/step - accuracy: 0.7780 - loss: 0.4596 - val_accuracy: 0.8415 - val_loss: 0.3975
Epoch 3/3
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 38ms/step - accuracy: 0.7994 - loss: 0.4269 - val_accuracy: 0.8490 - val_loss: 0.3717
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 30ms/step - accuracy: 0.8396 - loss: 0.3508
Fold 1 - Validation Accuracy: 0.8489999771118164
New best model found in fold 1 with accuracy 0.8489999771118164. Saving the model...
Training fold 2/5...
Epoch 1/3
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 50ms/step - accuracy: 0.7809 - loss: 0.4646 - val_accuracy: 0.8425 - val_loss: 0.3797
Epoch 2/3
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 38ms/step - accura