In [None]:
# Install dependencies
!pip install numpy pillow scikit-learn seaborn tensorflow

# Imports
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Dataset Overview
bleached_path = "UW-Madison-GI-Tract-Image-Segmentation/data/bleached_corals"
healthy_path = "UW-Madison-GI-Tract-Image-Segmentation/data/healthy_corals"

def get_image_paths(folder_path):
    return [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.jpg')]

# Get all image paths
bleached_image_paths = get_image_paths(bleached_path)
print(f"Bleached Corals - Total Images: {len(bleached_image_paths)}")
healthy_image_paths = get_image_paths(healthy_path)
print(f"Healthy Corals - Total Images: {len(healthy_image_paths)}")

# Prepare dataset
image_paths = bleached_image_paths + healthy_image_paths
labels = ['bleached'] * len(bleached_image_paths) + ['healthy'] * len(healthy_image_paths)

# Encode labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Split dataset
train_paths, test_paths, train_labels, test_labels = train_test_split(image_paths, encoded_labels, test_size=0.2, random_state=42)

# Image Data Generator with Augmentation
def preprocess_image(image_path):
    img = Image.open(image_path)
    img = img.resize((299, 299))
    img = np.array(img)
    if img.shape[2] == 4:  # Remove alpha channel if present
        img = img[:, :, :3]
    return img

train_images = np.array([preprocess_image(path) for path in train_paths])
test_images = np.array([preprocess_image(path) for path in test_paths])

train_images = train_images / 255.0
test_images = test_images / 255.0

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Load GoogleNet (InceptionV3) model
base_model = InceptionV3(weights='imagenet', include_top=False)

# Add custom layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001)

# Train the model
history = model.fit(
    datagen.flow(train_images, train_labels, batch_size=32),
    epochs=80,
    validation_data=(test_images, test_labels),
    callbacks=[early_stopping, reduce_lr]
)

# Unfreeze some layers of the base model for fine-tuning
for layer in base_model.layers[-50:]:
    layer.trainable = True

# Recompile the model with a lower learning rate for fine-tuning
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5), loss='binary_crossentropy', metrics=['accuracy'])

# Continue training the model
history_fine = model.fit(
    datagen.flow(train_images, train_labels, batch_size=32),
    epochs=80,
    validation_data=(test_images, test_labels),
    callbacks=[early_stopping, reduce_lr]
)

# Evaluate the model
loss, accuracy = model.evaluate(test_images, test_labels)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

# Make predictions
predictions = model.predict(test_images)
predicted_labels = (predictions > 0.5).astype(int)

# Classification report
print(classification_report(test_labels, predicted_labels, target_names=label_encoder.classes_))

# Confusion matrix
conf_matrix = confusion_matrix(test_labels, predicted_labels)
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

# Plot training & validation accuracy values
plt.plot(history.history['accuracy'] + history_fine.history['accuracy'])
plt.plot(history.history['val_accuracy'] + history_fine.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()