In [None]:
import os
import sys
import shutil
import time
import yaml
import json
import glob
import random
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import torch
import torchvision

# Install dependencies if missing
def install_dependencies():
    os.system("pip install -q ultralytics torchmetrics albumentations")

try:
    import torchmetrics
    from torchmetrics.detection.mean_ap import MeanAveragePrecision
    import albumentations as A
except ImportError:
    print("Installing dependencies...")
    install_dependencies()
    import torchmetrics
    from torchmetrics.detection.mean_ap import MeanAveragePrecision
    import albumentations as A

from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection import ssd300_vgg16
from torchvision.models.detection.ssd import SSD300_VGG16_Weights, SSDHead
from google.colab import drive

# Mount Drive
if not os.path.exists('/content/drive'):
    drive.mount('/content/drive')

# Reproducibility Setup
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

set_seed(42)

# Configuration
BASE_DRIVE_DIR = "/content/drive/MyDrive/"
DRIVE_YAML_PATH = os.path.join(BASE_DRIVE_DIR, "Dataset/FINAL_YOLO_SPLIT/dataset.yaml")
LOCAL_DATA_DIR = "/content/local_dataset"
DATASET_YAML = os.path.join(LOCAL_DATA_DIR, "dataset.yaml")

DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f"Computation Device: {DEVICE}")

# Hyperparameters
BATCH_SIZE = 16
NUM_EPOCHS = 100
CLASS_NAMES = ['CSP', 'LV']
NUM_CLASSES = len(CLASS_NAMES) + 1  # +1 for Background
ID_MAPPING = {1: 1, 2: 2}

In [None]:
if not os.path.exists(LOCAL_DATA_DIR):
    print(f"Copying dataset to local runtime: {LOCAL_DATA_DIR}...")
    try:
        drive_data_dir = os.path.dirname(DRIVE_YAML_PATH)
        shutil.copytree(drive_data_dir, LOCAL_DATA_DIR)

        # Update local dataset configuration
        if os.path.exists(DATASET_YAML):
            with open(DATASET_YAML, 'r') as f:
                data_conf = yaml.safe_load(f)
            data_conf['path'] = LOCAL_DATA_DIR
            with open(DATASET_YAML, 'w') as f:
                yaml.dump(data_conf, f)
        print("Dataset setup complete.")
    except Exception as e:
        print(f"Error setting up dataset: {e}")
else:
    print(f"Local dataset found at {LOCAL_DATA_DIR}.")

In [None]:
class YOLODatasetForSSD_Raw(Dataset):
    def __init__(self, img_dir, label_dir, width=300, height=300, mapping=None):
        self.img_paths = sorted(glob.glob(os.path.join(img_dir, "*.jpg")) +
                                glob.glob(os.path.join(img_dir, "*.png")))
        self.label_dir = label_dir
        self.width = width
        self.height = height
        self.mapping = mapping

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        image = cv2.imread(img_path)
        if image is None:
            return self.__getitem__((idx + 1) % len(self.img_paths))

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        orig_h, orig_w, _ = image.shape

        image_resized = cv2.resize(image, (self.width, self.height))
        image_tensor = torch.from_numpy(image_resized / 255.0).permute(2, 0, 1)

        label_name = os.path.basename(img_path).rsplit('.', 1)[0] + ".txt"
        label_path = os.path.join(self.label_dir, label_name)
        boxes, labels = [], []

        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                lines = f.readlines()
            for line in lines:
                parts = line.strip().split()
                try:
                    raw_id = int(parts[0])
                    if self.mapping:
                        if raw_id in self.mapping:
                            final_id = self.mapping[raw_id]
                        else:
                            continue
                    else:
                        final_id = raw_id + 1

                    x_c, y_c, w, h = map(float, parts[1:])
                    x_c *= orig_w; y_c *= orig_h; w *= orig_w; h *= orig_h
                    x_min = (x_c - w/2) * (self.width / orig_w)
                    y_min = (y_c - h/2) * (self.height / orig_h)
                    x_max = (x_c + w/2) * (self.width / orig_w)
                    y_max = (y_c + h/2) * (self.height / orig_h)

                    boxes.append([x_min, y_min, x_max, y_max])
                    labels.append(final_id)
                except ValueError:
                    continue

        target = {}
        if len(boxes) > 0:
            target["boxes"] = torch.as_tensor(boxes, dtype=torch.float32)
            target["labels"] = torch.as_tensor(labels, dtype=torch.int64)
        else:
            target["boxes"] = torch.zeros((0, 4), dtype=torch.float32)
            target["labels"] = torch.zeros((0,), dtype=torch.int64)

        return image_tensor, target

    def __len__(self):
        return len(self.img_paths)


class YOLODatasetForSSD_Tuned(Dataset):
    def __init__(self, img_dir, label_dir, width=300, height=300, mapping=None, augment=False):
        self.img_paths = sorted(glob.glob(os.path.join(img_dir, "*.jpg")) +
                                glob.glob(os.path.join(img_dir, "*.png")))
        self.label_dir = label_dir
        self.width = width
        self.height = height
        self.mapping = mapping
        self.augment = augment

        if self.augment:
            self.transform = A.Compose([
                A.Affine(rotate=(-45, 45), shear=(-5, 5), translate_percent=(-0.2, 0.2), scale=(0.4, 1.6), p=1.0),
                A.Perspective(scale=(0.01, 0.05), keep_size=True, p=0.5),
                A.HorizontalFlip(p=0.5),
                A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, p=0.5),
                A.Resize(height=self.height, width=self.width)
            ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels'], min_visibility=0.3))
        else:
            self.transform = A.Compose([
                A.Resize(height=self.height, width=self.width)
            ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        image = cv2.imread(img_path)
        if image is None:
            return self.__getitem__((idx + 1) % len(self.img_paths))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        orig_h, orig_w, _ = image.shape

        label_name = os.path.basename(img_path).rsplit('.', 1)[0] + ".txt"
        label_path = os.path.join(self.label_dir, label_name)
        boxes_raw, labels_raw = [], []

        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                lines = f.readlines()
            for line in lines:
                parts = line.strip().split()
                try:
                    raw_id = int(parts[0])
                    if self.mapping:
                        if raw_id in self.mapping:
                            final_id = self.mapping[raw_id]
                        else:
                            continue
                    else:
                        final_id = raw_id + 1

                    x_c, y_c, w, h = map(float, parts[1:])
                    x_c *= orig_w; y_c *= orig_h; w *= orig_w; h *= orig_h
                    x_min = max(0, x_c - (w/2))
                    y_min = max(0, y_c - (h/2))
                    x_max = min(orig_w, x_c + (w/2))
                    y_max = min(orig_h, y_c + (h/2))

                    if x_max <= x_min or y_max <= y_min: continue
                    boxes_raw.append([x_min, y_min, x_max, y_max])
                    labels_raw.append(final_id)
                except ValueError:
                    continue

        if len(boxes_raw) == 0:
            augmented = self.transform(image=image, bboxes=[], class_labels=[])
        else:
            try:
                augmented = self.transform(image=image, bboxes=boxes_raw, class_labels=labels_raw)
            except ValueError:
                return self.__getitem__((idx + 1) % len(self.img_paths))

        image_tensor = torch.from_numpy(augmented['image'].astype(np.float32) / 255.0).permute(2, 0, 1)
        target = {}
        if len(augmented['bboxes']) > 0:
            target["boxes"] = torch.as_tensor(augmented['bboxes'], dtype=torch.float32)
            target["labels"] = torch.as_tensor(augmented['class_labels'], dtype=torch.int64)
        else:
            target["boxes"] = torch.zeros((0, 4), dtype=torch.float32)
            target["labels"] = torch.zeros((0,), dtype=torch.int64)

        return image_tensor, target

    def __len__(self):
        return len(self.img_paths)

def collate_fn(batch):
    return tuple(zip(*batch))

In [None]:
def evaluate_map_complete(model, dataloader, device):
    model.eval()
    metric_global = MeanAveragePrecision(class_metrics=True).to(device)
    metric_50 = MeanAveragePrecision(class_metrics=True, iou_thresholds=[0.5]).to(device)

    with torch.no_grad():
        for images, targets in dataloader:
            images = list(img.to(device) for img in images)
            t_clean = [{k: v.to(device) for k, v in t.items() if k in ['boxes', 'labels']} for t in targets]
            outputs = model(images)
            metric_global.update(outputs, t_clean)
            metric_50.update(outputs, t_clean)

    res_global = metric_global.compute()
    res_50 = metric_50.compute()

    return {
        'map': res_global['map'].item(),
        'map_50': res_global['map_50'].item(),
        'map_per_class': res_global['map_per_class'],
        'map_50_per_class': res_50['map_per_class']
    }

def evaluate_best_f1(model, dataloader, device, num_classes):
    model.eval()
    class_preds = {i: [] for i in range(1, num_classes)}
    class_gt_counts = {i: 0 for i in range(1, num_classes)}

    with torch.no_grad():
        for images, targets in dataloader:
            images = list(img.to(device) for img in images)
            outputs = model(images)

            for i, output in enumerate(outputs):
                pred_boxes = output['boxes']
                pred_scores = output['scores']
                pred_labels = output['labels']
                gt_boxes = targets[i]['boxes'].to(device)
                gt_labels = targets[i]['labels'].to(device)

                for cls_id in range(1, num_classes):
                    class_gt_counts[cls_id] += (gt_labels == cls_id).sum().item()

                if len(pred_scores) > 0:
                    sorted_indices = torch.argsort(pred_scores, descending=True)
                    pred_boxes = pred_boxes[sorted_indices]
                    pred_scores = pred_scores[sorted_indices]
                    pred_labels = pred_labels[sorted_indices]

                used_gt_indices = set()
                iou_matrix = None
                if len(gt_boxes) > 0 and len(pred_boxes) > 0:
                    iou_matrix = torchvision.ops.box_iou(pred_boxes, gt_boxes)

                for p_idx in range(len(pred_boxes)):
                    p_label = pred_labels[p_idx].item()
                    p_score = pred_scores[p_idx].item()
                    if p_label == 0: continue

                    is_tp = False
                    if iou_matrix is not None:
                        ious = iou_matrix[p_idx]
                        if len(ious) > 0:
                            max_iou, max_gt_idx = torch.max(ious, dim=0)
                            max_gt_idx = max_gt_idx.item()
                            if (max_iou > 0.5) and \
                               (gt_labels[max_gt_idx].item() == p_label) and \
                               (max_gt_idx not in used_gt_indices):
                                is_tp = True
                                used_gt_indices.add(max_gt_idx)

                    class_preds[p_label].append((p_score, is_tp))

    results = {}
    for cls_id in range(1, num_classes):
        preds = class_preds[cls_id]
        total_gt = class_gt_counts[cls_id]

        if len(preds) == 0:
            results[cls_id] = {'p': 0.0, 'r': 0.0, 'f1': 0.0, 'thres': 0.0}
            continue

        preds.sort(key=lambda x: x[0], reverse=True)
        preds_np = np.array(preds)
        scores = preds_np[:, 0]
        tp_status = preds_np[:, 1].astype(int)

        tp_cumsum = np.cumsum(tp_status)
        fp_cumsum = np.cumsum(1 - tp_status)

        precisions = tp_cumsum / (tp_cumsum + fp_cumsum + 1e-16)
        recalls = tp_cumsum / total_gt if total_gt > 0 else np.zeros_like(tp_cumsum)
        f1_scores = 2 * (precisions * recalls) / (precisions + recalls + 1e-16)

        best_idx = np.argmax(f1_scores)
        results[cls_id] = {
            'p': precisions[best_idx],
            'r': recalls[best_idx],
            'f1': f1_scores[best_idx],
            'thres': scores[best_idx]
        }
    return results

def evaluate_and_print(model, dataloader, device, class_names, current_epoch, num_epochs):
    print(f"\nEvaluating Epoch {current_epoch}/{num_epochs}...")
    num_classes = len(class_names) + 1

    map_results = evaluate_map_complete(model, dataloader, device)
    best_f1_results = evaluate_best_f1(model, dataloader, device, num_classes)

    csv_data = []
    total_p, total_r, valid_classes = 0, 0, 0

    map_50_tensor = map_results['map_50_per_class']
    map_50_95_tensor = map_results['map_per_class']

    for i, class_name in enumerate(class_names):
        cls_id = i + 1
        if cls_id in best_f1_results:
            res = best_f1_results[cls_id]
            precision, recall, f1, thres = res['p'], res['r'], res['f1'], res['thres']
        else:
            precision, recall, f1, thres = 0.0, 0.0, 0.0, 0.0

        total_p += precision
        total_r += recall
        valid_classes += 1

        map50 = map_50_tensor[i].item() if i < len(map_50_tensor) else 0.0
        map5095 = map_50_95_tensor[i].item() if i < len(map_50_95_tensor) else 0.0

        csv_data.append({
            "Class": class_name,
            "mAP 50": round(map50, 4),
            "mAP 50-95": round(map5095, 4),
            "Best F1": round(f1, 4),
            "Best Conf": round(thres, 3),
            "Precision": round(precision, 4),
            "Recall": round(recall, 4)
        })

    avg_p = total_p / valid_classes if valid_classes > 0 else 0.0
    avg_r = total_r / valid_classes if valid_classes > 0 else 0.0

    csv_data.append({
        "Class": "GLOBAL (ALL)",
        "mAP 50": round(map_results['map_50'], 4),
        "mAP 50-95": round(map_results['map'], 4),
        "Best F1": "-",
        "Best Conf": "-",
        "Precision": round(avg_p, 4),
        "Recall": round(avg_r, 4)
    })

    df_results = pd.DataFrame(csv_data)
    print(f"\nVALIDATION RESULTS (Epoch {current_epoch}/{num_epochs})")
    print("="*95)
    print(df_results.to_string(index=False))
    print("="*95)

    return {
        'map50': map_results['map_50'],
        'map': map_results['map'],
        'precision': avg_p,
        'recall': avg_r
    }

In [None]:
def get_ssd_model(num_classes):
    print("Loading SSD300 VGG16 Pretrained Model...")
    model = ssd300_vgg16(weights=SSD300_VGG16_Weights.DEFAULT)
    in_channels = [512, 1024, 512, 256, 256, 256]
    num_anchors = model.anchor_generator.num_anchors_per_location()
    model.head = SSDHead(in_channels, num_anchors, num_classes)
    return model

def plot_history(history, save_path):
    epochs_range = range(1, len(history['loss']) + 1)
    fig, ax1 = plt.subplots(figsize=(12, 6))

    color = 'tab:red'
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Training Loss', color=color, fontweight='bold')
    ax1.plot(epochs_range, history['loss'], color=color, linewidth=2, label='Train Loss')
    ax1.tick_params(axis='y', labelcolor=color)
    ax1.grid(True, linestyle='--', alpha=0.5)

    ax2 = ax1.twinx()
    color = 'tab:blue'
    ax2.set_ylabel('Validation mAP 50', color=color, fontweight='bold')
    ax2.plot(epochs_range, history['map50'], color=color, linewidth=2, marker='o', markersize=4, label='Val mAP 50')

    if 'recall' in history:
        ax2.plot(epochs_range, history['recall'], color='tab:green', linestyle='--', alpha=0.7, label='Val Recall')

    ax2.tick_params(axis='y', labelcolor=color)

    plt.title('Training Analysis: Loss vs Accuracy')
    plt.tight_layout()
    plt.savefig(save_path)
    print(f"Graph saved to: {save_path}")
    plt.show()

def run_training_session(session_name, dataset_train, dataset_val, save_dir):
    csv_log_path = os.path.join(save_dir, 'training_log.csv')
    print(f"\n{'#'*40}")
    print(f"STARTING SESSION: {session_name}")
    print(f"Save Directory: {save_dir}")
    print(f"{'#'*40}\n")

    os.makedirs(save_dir, exist_ok=True)

    train_loader = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn, num_workers=2)
    val_loader = DataLoader(dataset_val, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn, num_workers=2)

    model = get_ssd_model(NUM_CLASSES)
    model.to(DEVICE)

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.AdamW(params, lr=0.0001, weight_decay=0.0005)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=NUM_EPOCHS, eta_min=1e-6)

    history = {
        'epoch': [], 'loss': [], 'map': [], 'map50': [],
        'precision': [], 'recall': [], 'time': []
    }
    best_map50 = 0.0
    patience = 15
    patience_counter = 0
    best_model_path = os.path.join(save_dir, 'best_model.pth')

    print(f"Logging metrics to: {csv_log_path}")

    for epoch in range(NUM_EPOCHS):
        start_time = time.time()
        model.train()
        epoch_loss = 0

        for images, targets in train_loader:
            images = list(image.to(DEVICE) for image in images)
            targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

            optimizer.zero_grad(set_to_none=True)
            losses.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0)
            optimizer.step()
            epoch_loss += losses.item()

        avg_loss = epoch_loss / len(train_loader)
        lr_scheduler.step()

        metrics = evaluate_and_print(model, val_loader, DEVICE, CLASS_NAMES, epoch+1, NUM_EPOCHS)
        duration = time.time() - start_time

        history['epoch'].append(epoch + 1)
        history['loss'].append(avg_loss)
        history['map'].append(metrics['map'])
        history['map50'].append(metrics['map50'])
        history['precision'].append(metrics['precision'])
        history['recall'].append(metrics['recall'])
        history['time'].append(duration)

        pd.DataFrame(history).to_csv(csv_log_path, index=False)

        current_lr = optimizer.param_groups[0]['lr']
        print(f"Epoch Summary: Loss: {avg_loss:.4f} | LR: {current_lr:.6f} | Time: {duration:.1f}s")

        if metrics['map50'] > best_map50:
            best_map50 = metrics['map50']
            patience_counter = 0
            torch.save(model.state_dict(), best_model_path)
            print(f"Model Saved. Best mAP50: {best_map50:.4f}")
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered.")
                break

    print(f"{session_name} Finished.")

    history_path = os.path.join(save_dir, "training_history_finished.json")
    with open(history_path, 'w') as f:
        json.dump(history, f)

    plot_path = os.path.join(save_dir, "training_history_finished.png")
    plot_history(history, plot_path)

In [None]:
# Experiment 1: Raw Dataset
SAVE_DIR_RAW = os.path.join(BASE_DRIVE_DIR, "Model/SSD_VGG16_Raw")

train_ds_raw = YOLODatasetForSSD_Raw(
    os.path.join(LOCAL_DATA_DIR, 'train', 'images'),
    os.path.join(LOCAL_DATA_DIR, 'train', 'labels'),
    mapping=ID_MAPPING
)
val_ds_raw = YOLODatasetForSSD_Raw(
    os.path.join(LOCAL_DATA_DIR, 'val', 'images'),
    os.path.join(LOCAL_DATA_DIR, 'val', 'labels'),
    mapping=ID_MAPPING
)

run_training_session("RAW_TRAINING", train_ds_raw, val_ds_raw, SAVE_DIR_RAW)

# Experiment 2: Tuned Dataset
SAVE_DIR_TUNED = os.path.join(BASE_DRIVE_DIR, "Model/SSD_VGG16_Tuned")

train_ds_tuned = YOLODatasetForSSD_Tuned(
    os.path.join(LOCAL_DATA_DIR, 'train', 'images'),
    os.path.join(LOCAL_DATA_DIR, 'train', 'labels'),
    mapping=ID_MAPPING,
    augment=True
)
val_ds_tuned = YOLODatasetForSSD_Tuned(
    os.path.join(LOCAL_DATA_DIR, 'val', 'images'),
    os.path.join(LOCAL_DATA_DIR, 'val', 'labels'),
    mapping=ID_MAPPING,
    augment=False
)

run_training_session("TUNED_TRAINING", train_ds_tuned, val_ds_tuned, SAVE_DIR_TUNED)

print("All experiments completed successfully.")