In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Input, Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.applications import MobileNetV2, EfficientNetB0, ResNet50V2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.utils.class_weight import compute_class_weight

# Verify GPU availability
print("TensorFlow version:", tf.__version__)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

base_dir = 'C:\\Users\\Θάνος\\Desktop\\Thesis Thanasis\\data_aug_3'
subfolders = ['clear', 'clouds']
categories = ['Healthy_augmented', 'Damaged_augmented']
IMG_HEIGHT = 64
IMG_WIDTH = 64
BATCH_SIZE = 32

def load_data(base_dir, subfolders, categories, img_height, img_width):
    data = []
    labels = []
    image_paths = []
    for category in categories:
        class_num = categories.index(category)
        for subfolder in subfolders:
            folder_path = os.path.join(base_dir, subfolder, category)
            images = sorted(os.listdir(folder_path))
            for img_name in images:
                if img_name.endswith('.png'):
                    img_path = os.path.join(folder_path, img_name)
                    img = tf.keras.preprocessing.image.load_img(img_path, target_size=(img_height, img_width))
                    img_array = tf.keras.preprocessing.image.img_to_array(img)
                    data.append(img_array)
                    labels.append(class_num)
                    image_paths.append((subfolder, category, img_name))
    return np.array(data), np.array(labels), image_paths

data, labels, image_paths = load_data(base_dir, subfolders, categories, IMG_HEIGHT, IMG_WIDTH)
data = data / 255.0

# Split data ensuring twins are in the same split
def split_data(image_paths):
    unique_image_ids = list(set([img_name for subfolder, category, img_name in image_paths]))
    train_ids, test_ids = train_test_split(unique_image_ids, test_size=0.2, random_state=42)
    return train_ids, test_ids

def get_split_indices(image_paths, split_ids):
    split_indices = [i for i, (subfolder, category, img_name) in enumerate(image_paths) if img_name in split_ids]
    return split_indices

train_ids, test_ids = split_data(image_paths)
train_indices = get_split_indices(image_paths, train_ids)
test_indices = get_split_indices(image_paths, test_ids)

X_train_val, y_train_val = data[train_indices], labels[train_indices]
X_test, y_test = data[test_indices], labels[test_indices]

# Convert labels to one-hot encoding
y_train_val = to_categorical(y_train_val, num_classes=2)
y_test = to_categorical(y_test, num_classes=2)

print(f"Training data shape: {X_train_val.shape}")
print(f"Test data shape: {X_test.shape}")
print(f"Training labels shape: {y_train_val.shape}")
print(f"Test labels shape: {y_test.shape}")

# Define data augmentation with seed
def create_datagen(seed=None):
    return ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.3,
        height_shift_range=0.3,
        shear_range=0.3,
        zoom_range=0.3,
        horizontal_flip=True,
        fill_mode='nearest'
    ), seed

datagen, seed = create_datagen(seed=42)  # Set the seed for reproducibility
datagen.fit(X_train_val)
train_val_generator = datagen.flow(X_train_val, y_train_val, batch_size=BATCH_SIZE, seed=seed)  # Use the seed here too

# Compute class weights using the training set
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(np.argmax(y_train_val, axis=1)), y=np.argmax(y_train_val, axis=1))
class_weights = dict(enumerate(class_weights))

print(f"Class weights: {class_weights}")

# Define model building functions
def build_mobilenetv2_model(input_shape):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=input_shape)
    base_model.trainable = False
    inputs = Input(shape=input_shape)
    x = base_model(inputs, training=False)
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.3)(x)
    outputs = Dense(2, activation='softmax')(x)
    model = Model(inputs, outputs)
    return model

def build_efficientnetb0_model(input_shape):
    base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=input_shape)
    base_model.trainable = False
    inputs = Input(shape=input_shape)
    x = base_model(inputs, training=False)
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.3)(x)
    outputs = Dense(2, activation='softmax')(x)
    model = Model(inputs, outputs)
    return model

def build_resnet50v2_model(input_shape):
    base_model = ResNet50V2(weights='imagenet', include_top=False, input_shape=input_shape)
    base_model.trainable = False
    inputs = Input(shape=input_shape)
    x = base_model(inputs, training=False)
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.3)(x)
    outputs = Dense(2, activation='softmax')(x)
    model = Model(inputs, outputs)
    return model

input_shape = (IMG_HEIGHT, IMG_WIDTH, 3)
models = {
    "MobileNetV2": build_mobilenetv2_model(input_shape),
    #"EfficientNetB0": build_efficientnetb0_model(input_shape),
    "ResNet50V2": build_resnet50v2_model(input_shape)
}

# Compile models
for name, model in models.items():
    model.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])

# Define callbacks
early_stopping = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.2, patience=5, min_lr=1e-7, verbose=1)

# Train and evaluate each model
histories = {}
test_results = {}
for name, model in models.items():
    print(f"Training {name} model...")
    history = model.fit(
        train_val_generator,
        steps_per_epoch=len(X_train_val) // BATCH_SIZE,
        epochs=200,
        class_weight=class_weights,
        callbacks=[early_stopping, reduce_lr],
        verbose=1
    )
    histories[name] = history

    # Save the fine-tuned model
    model_path = f'best_{name}_model.h5'
    model.save(model_path)
    print(f"Best {name} fine-tuned model saved to {model_path}")

    # Evaluate the model on the test set
    test_predictions = model.predict(X_test)
    y_test_true = np.argmax(y_test, axis=1)
    y_test_pred = np.argmax(test_predictions, axis=1)

    test_conf_matrix = confusion_matrix(y_test_true, y_test_pred)
    test_class_report = classification_report(y_test_true, y_test_pred, target_names=categories)

    test_results[name] = {
        "confusion_matrix": test_conf_matrix,
        "classification_report": test_class_report
    }

# Print the evaluation results for each model
for name, results in test_results.items():
    print(f"Confusion Matrix (Test) for {name}:")
    print(results["confusion_matrix"])
    print(f"Classification Report (Test) for {name}:")
    print(results["classification_report"])


TensorFlow version: 2.10.0
Num GPUs Available:  1
Training data shape: (9664, 64, 64, 3)
Test data shape: (2416, 64, 64, 3)
Training labels shape: (9664, 2)
Test labels shape: (2416, 2)
Class weights: {0: 0.8619336425258651, 1: 1.1907343518974864}
Training MobileNetV2 model...
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 40: ReduceLROnPlateau reducing learning rate to 1.9999999494757503e-05.
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epo