In [None]:
# Install required libraries
!pip install keras scikit-learn seaborn matplotlib tensorflow

# Core libraries
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Computer vision
import cv2
from PIL import Image

# Machine learning
import keras
from keras.applications import ResNet50
from keras.layers import Dense, GlobalAveragePooling2D, Dropout
from keras.models import Model
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
# Use the ImageDataGenerator from tf.keras
# Correct the import statement from tf.keras to tensorflow.keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score, matthews_corrcoef

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Directories for dataset
train_dir = "/content/drive/MyDrive/MIT URTC 2025/Experiments/experiment_C/train"
validation_dir = "/content/drive/MyDrive/MIT URTC 2025/Experiments/experiment_C/val"
test_dir = "/content/drive/MyDrive/MIT URTC 2025/Experiments/experiment_C/test"

In [None]:
# Data preprocessing parameters
image_size = (224, 224)
batch_size = 16
num_classes = 3

# Data augmentation and preprocessing
train_datagen = ImageDataGenerator(
    rescale=1./255,
    horizontal_flip=True,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=10,
    zoom_range=0.2,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest'
)

test_datagen = ImageDataGenerator(rescale=1./255)

# Load datasets
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=True
)

val_generator = test_datagen.flow_from_directory(
    validation_dir,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

print(f"Training samples: {train_generator.samples}")
print(f"Validation samples: {val_generator.samples}")
print(f"Test samples: {test_generator.samples}")
print(f"Number of classes: {train_generator.num_classes}")
print(f"Classes: {list(train_generator.class_indices.keys())}")

In [None]:
# Build ResNet50 model
# Import necessary components from Keras/TensorFlow
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.optimizers import Adam

def create_resnet50_model(input_shape, num_classes):
    # Use Keras ResNet50 without pre-trained weights (from scratch)
    base_model = ResNet50(
        include_top=False,
        weights=None,       # No pre-trained weights (train from scratch)
        input_shape=input_shape,
        pooling='avg'       # Global average pooling
    )

    # Add custom classification head
    # Use the imported 'models' and 'layers'
    model = models.Sequential([
        base_model,
        layers.Dense(num_classes, activation='softmax', name='predictions')
    ])

    return model

# Create the model
input_shape = (*image_size, 3)  # RGB images
model = create_resnet50_model(input_shape, num_classes)

# Compile the model
# Use the imported 'Adam' from tensorflow.keras.optimizers
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("Model Summary:")
model.summary()

In [None]:
from tensorflow.keras import callbacks

# Callbacks
early_stopping = callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True,
    verbose=1
)

reduce_lr = callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    min_lr=1e-7,
    verbose=1
)

checkpoint = callbacks.ModelCheckpoint(
    'best_model.h5',
    monitor='val_loss',
    save_best_only=True,
    verbose=1
)

# Custom callback to track metrics
class MetricsCallback(callbacks.Callback):
    def __init__(self):
        self.train_losses = []
        self.val_losses = []
        self.train_accuracies = []
        self.val_accuracies = []

    def on_epoch_end(self, epoch, logs=None):
        self.train_losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))
        self.train_accuracies.append(logs.get('accuracy'))
        self.val_accuracies.append(logs.get('val_accuracy'))

        print(f"Epoch {epoch+1} | Train Loss: {logs.get('loss'):.4f} | Val Loss: {logs.get('val_loss'):.4f} | Train Acc: {logs.get('accuracy'):.4f} | Val Acc: {logs.get('val_accuracy'):.4f}")

metrics_callback = MetricsCallback()

In [None]:
# Training
epochs = 100

print("Starting training...")
history = model.fit(
    train_generator,
    epochs=epochs,
    validation_data=val_generator,
    callbacks=[early_stopping, reduce_lr, checkpoint, metrics_callback],
    verbose=0
)

print("Training Completed")

In [None]:
# Load the best model
model.load_weights('best_model.h5')

In [None]:
# Testing phase
print("Evaluating on test set...")
test_generator.reset()  # Reset generator to start from beginning

# Get predictions
test_steps = test_generator.samples // test_generator.batch_size + 1
predictions = model.predict(test_generator, steps=test_steps, verbose=1)

# Get true labels
test_generator.reset()
true_labels = []
for i in range(test_steps):
    try:
        batch_x, batch_y = next(test_generator)
        true_labels.extend(np.argmax(batch_y, axis=1))
    except StopIteration:
        break

# Trim predictions to match true labels length
predictions = predictions[:len(true_labels)]
predicted_labels = np.argmax(predictions, axis=1)

# Calculate metrics
accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels, average='weighted')
recall = recall_score(true_labels, predicted_labels, average='weighted')
f1 = f1_score(true_labels, predicted_labels, average='weighted')

# For ROC AUC, we need the probability scores
true_labels_categorical = keras.utils.to_categorical(true_labels, num_classes)
roc_auc = roc_auc_score(true_labels_categorical, predictions[:len(true_labels)], multi_class='ovr')
mcc = matthews_corrcoef(true_labels, predicted_labels)

# Get class names
class_names = list(train_generator.class_indices.keys())

In [None]:
print(classification_report(true_labels, predicted_labels, target_names=class_names))
print(f"Test Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}, ROC AUC: {roc_auc:.4f}, MCC: {mcc:.4f}")

In [None]:
# Confusion matrix
conf_matrix = confusion_matrix(true_labels, predicted_labels)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, cmap='Blues', fmt='d',
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

In [None]:
# Plot training history
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

# Loss plot
ax1.plot(metrics_callback.train_losses, label='Training Loss')
ax1.plot(metrics_callback.val_losses, label='Validation Loss')
ax1.set_title('Model Loss')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.legend()

# Accuracy plot
ax2.plot(metrics_callback.train_accuracies, label='Training Accuracy')
ax2.plot(metrics_callback.val_accuracies, label='Validation Accuracy')
ax2.set_title('Model Accuracy')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy')
ax2.legend()

plt.tight_layout()
plt.show()