# Get data

In [1]:
# To run this notebook, follow the next steps:
# 1. You need to download the archive.zip file from https://www.kaggle.com/datasets/sonainjamil/bleached-corals-detection
# 2. Create a folder named 'Bleached_and_Unbleached_Corals_Classification/' inside the 'raw_data' folder
# 3. Copy the archive.zip file into the 'Bleached_and_Unbleached_Corals_Classification/' folder
# 4. Define the 'path_data' variable below with the path to the 'raw_data/Bleached_and_Unbleached_Corals_Classification/' folder on your local machine.

# path_data = '/Users/carloschutz/code/Lucia-Cordero/ReefSight-Project/raw_data/Bleached_and_Unbleached_Corals_Classification'

In [2]:
import os
import tensorflow as tf
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import random
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input

# ==============================================================================
# 1. Data Loading and Preprocessing (Unchanged)
# ==============================================================================

data_dir = "../code/Lucia-Cordero/ReefSight-Project/raw_data/Bleached_and_Unbleached_Corals_Classification:"

print("Contents of data_dir:", os.listdir(data_dir))
image_size = (224, 224)
batch_size = 32

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="training",
    seed=42,
    image_size=image_size,
    batch_size=batch_size,
    label_mode="binary",
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="validation",
    seed=42,
    image_size=image_size,
    batch_size=batch_size,
    label_mode="binary",
)

class_names = train_ds.class_names
print("Classes:", class_names)

AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.prefetch(AUTOTUNE)
val_ds = val_ds.prefetch(AUTOTUNE)

# --- Hyperparameters ---
input_shape = image_size + (3,)
# We will use a lower learning rate for fine-tuning to prevent feature damage.
initial_learning_rate = 1e-4
fine_tune_learning_rate = 1e-5
epochs_initial_train = 10  # Reduced for faster demonstration, adjust as needed
epochs_fine_tune = 20      # Additional epochs for fine-tuning

# ==============================================================================
# 2. Data Augmentation and Model Definition (Modified)
# ==============================================================================

# Data Augmentation layer (This is applied directly to the model input)
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
    layers.RandomContrast(0.1),
], name="data_augmentation")

def create_vgg16_frozen_model(input_shape, learning_rate):
    """Creates the VGG16 model with the base frozen for initial training."""

    inputs = layers.Input(shape=input_shape)

    # Note: Augmentation is applied to input *before* VGG preprocessing
    x = data_augmentation(inputs)
    x = preprocess_input(x) # VGG-specific preprocessing

    base_model = VGG16(
        include_top=False,
        weights="imagenet",
        input_tensor=x # The input tensor is the output of VGG preprocessing
    )
    # CRITICAL: Freeze the base model for the first training phase
    base_model.trainable = False

    x = base_model.output
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(256, activation="relu")(x)
    x = layers.Dropout(0.5)(x)

    outputs = layers.Dense(1, activation="sigmoid")(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate),
        loss="binary_crossentropy",
        metrics=["accuracy"],
    )
    return model

# ==============================================================================
# 3. Initial Training (Frozen Base)
# ==============================================================================

vgg_aug_model = create_vgg16_frozen_model(input_shape, initial_learning_rate)
vgg_aug_model.summary()

# Define paths and callbacks for the INITIAL phase
model_save_path = os.path.join(data_dir, "models", "vgg16_augmented_initial_best.keras")

initial_callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor="val_accuracy",
        patience=5, # Shorter patience for initial phase
        restore_best_weights=True
    ),
    tf.keras.callbacks.ModelCheckpoint(
        model_save_path,
        monitor="val_accuracy",
        save_best_only=True
    ),
]

print("\n--- Starting Initial Training (Frozen Base) ---")
history_initial = vgg_aug_model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=epochs_initial_train,
    callbacks=initial_callbacks,
)

# Load the best weights from the initial training run
vgg_aug_model.load_weights(model_save_path)


# ==============================================================================
# 4. Fine-Tuning (Unfrozen Base with Augmentation)
# ==============================================================================

print("\n--- Starting Fine-Tuning Phase (Unfrozen Base) ---")

# Step 4a: Unfreeze the base model
vgg_aug_model.layers[2].trainable = True # Index 2 is the VGG16 model

# Step 4b: Re-compile the model with a lower learning rate
vgg_aug_model.compile(
    optimizer=tf.keras.optimizers.Adam(fine_tune_learning_rate), # CRITICAL: Lower LR
    loss="binary_crossentropy",
    metrics=["accuracy"],
)
# vgg_aug_model.summary(expand_nested=True) # Optional: Show which layers are now trainable

# Define callbacks for the FINE-TUNING phase
model_fine_tune_save_path = os.path.join(data_dir, "models", "vgg16_augmented_final_best.keras")

fine_tune_callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor="val_accuracy",
        patience=10, # Longer patience for fine-tuning
        restore_best_weights=True
    ),
    tf.keras.callbacks.ModelCheckpoint(
        model_fine_tune_save_path,
        monitor="val_accuracy",
        save_best_only=True
    ),
]

# Step 4c: Continue training from the saved initial weights
history_fine_tune = vgg_aug_model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=epochs_initial_train + epochs_fine_tune, # Run for the total number of desired epochs
    initial_epoch=history_initial.epoch[-1],        # Start from where the initial training stopped
    callbacks=fine_tune_callbacks,
)

# Load the FINAL best weights
vgg_aug_model.load_weights(model_fine_tune_save_path)


# ==============================================================================
# 5. Evaluation (Final Model)
# ==============================================================================

val_loss, val_acc = vgg_aug_model.evaluate(val_ds)
print("\nValidation loss (Final Augmented Model):", val_loss)
print("Validation accuracy (Final Augmented Model):", val_acc)


# ==============================================================================
# 6. Plotting and Visualization (Adapted to combine history)
# ==============================================================================

# Concatenate histories for combined plotting
def combine_histories(h1, h2):
    """Combines two history objects for continuous plotting."""
    h1_df = pd.DataFrame(h1.history)
    h2_df = pd.DataFrame(h2.history)
    return pd.concat([h1_df, h2_df], ignore_index=True)

combined_history_df = combine_histories(history_initial, history_fine_tune)

def plot_combined_history(df):
    df["epoch"] = range(1, len(df) + 1)

    plt.figure(figsize=(12, 4))

    # Accuracy
    plt.subplot(1, 2, 1)
    plt.plot(df["epoch"], df["accuracy"], label="Train")
    plt.plot(df["epoch"], df["val_accuracy"], label="Val")
    plt.axvline(x=history_initial.epoch[-1] + 1, color='r', linestyle='--', label='Fine-Tune Start')
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.title("Accuracy over epochs (Augmented/Fine-Tuned)")
    plt.grid(True, alpha=0.3)
    plt.legend()

    # Loss
    plt.subplot(1, 2, 2)
    plt.plot(df["epoch"], df["loss"], label="Train")
    plt.plot(df["epoch"], df["val_loss"], label="Val")
    plt.axvline(x=history_initial.epoch[-1] + 1, color='r', linestyle='--')
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Loss over epochs (Augmented/Fine-Tuned)")
    plt.grid(True, alpha=0.3)
    plt.legend()

    plt.tight_layout()
    plt.show()

plot_combined_history(combined_history_df)


# ==============================================================================
# 7. Single Prediction Example (Unchanged)
# ==============================================================================
# This section uses the FINAL best model saved to vgg16_augmented_final_best.keras

best_model_path = model_fine_tune_save_path # Use the final saved path
model = tf.keras.models.load_model(best_model_path)


if 'class_names' not in globals():
    # ... (logic to determine class_names) ...
    class_names = train_ds.class_names # Assuming train_ds is still available


images, labels = next(iter(val_ds))
idx = random.randint(0, len(images) - 1)
img = images[idx]
true_label = int(labels[idx].numpy())

prob = float(model.predict(tf.expand_dims(img, 0))[0, 0])
pred_label = int(prob >= 0.5)


bleached_idx = class_names.index("Bleached") if "Bleached" in class_names else 1

if bleached_idx == 1:
    p_bleached = prob
else:
    p_bleached = 1 - prob

print("\n--- Final Model Prediction Sample ---")
print("True:", class_names[true_label])
print("Pred:", class_names[pred_label])
print(f"Bleached probability: {p_bleached*100:.2f}%")
print(f"Unbleached probability: {(1 - p_bleached)*100:.2f}%")

plt.imshow(img.numpy().astype("uint8"))
plt.axis("off")
plt.show()

2025-12-11 14:34:33.360974: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-12-11 14:34:33.475850: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-12-11 14:34:33.615340: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-12-11 14:34:33.757199: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-12-11 14:34:33.757987: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-12-11 14:34:33.991862: I tensorflow/core/platform/cpu_feature_guard.cc:

FileNotFoundError: [Errno 2] No such file or directory: '../code/Lucia-Cordero/ReefSight-Project/raw_data/Bleached_and_Unbleached_Corals_Classification:'