In [None]:
# -*- coding: utf-8 -*-
"""yolo_licenseplate.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1GUjd24TTc85Ol2BsMT3TlxoATde7Qef8
"""

!pip install ultralytics kaggle opencv-python matplotlib
!pip install roboflow

import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from google.colab import files, drive
import json
import shutil
from pathlib import Path
import yaml

print("All dependencies installed successfully!")

# CELL 2 - Setup paths and mount Drive
# Mount Google Drive
drive.mount('/content/drive')

# Use local paths for faster processing during training
local_dataset_path = '/content/Bangla_License_Plate_Dataset'
yolo_dataset_path = '/content/yolo_dataset'  # Local for speed
drive_backup_path = '/content/drive/MyDrive/Bangla_License_Plate_Dataset'

print("Drive mounted successfully!")
print(f"Local dataset path: {local_dataset_path}")
print(f"YOLO dataset path: {yolo_dataset_path}")

# CELL 3 - Download and prepare dataset
# Check if dataset exists locally first, then Drive, then download
dataset_ready = False

if os.path.exists(local_dataset_path):
    print("Dataset already exists locally")
    dataset_ready = True
elif os.path.exists(drive_backup_path):
    print("Copying dataset from Drive to local storage...")
    shutil.copytree(drive_backup_path, local_dataset_path)
    print("Dataset copied to local storage")
    dataset_ready = True
else:
    print("Dataset not found. Please upload your kaggle.json file:")
    uploaded = files.upload()

    # Setup Kaggle API
    os.makedirs('/root/.kaggle', exist_ok=True)
    shutil.move('kaggle.json', '/root/.kaggle/kaggle.json')
    os.chmod('/root/.kaggle/kaggle.json', 0o600)

    print("Downloading dataset from Kaggle...")
    os.system('kaggle datasets download -d nishat99/bangla-license-plate-detection -p /content')
    os.system('unzip -q /content/bangla-license-plate-detection.zip -d /content')

    # Move to standard location
    if os.path.exists('/content/Bangla License Plate Dataset'):
        shutil.move('/content/Bangla License Plate Dataset', local_dataset_path)
        dataset_ready = True

        # Backup to Drive for future use
        try:
            shutil.copytree(local_dataset_path, drive_backup_path)
            print("Dataset backed up to Drive")
        except:
            print("Drive backup failed, continuing with local dataset")

if not dataset_ready:
    print("ERROR: Dataset preparation failed!")
else:
    print(f"Dataset ready at: {local_dataset_path}")

# CELL 4 - Convert annotations to YOLO format with progress tracking
# Check if annotations already converted
annotations_complete_flag = os.path.join(yolo_dataset_path, 'annotations_complete.txt')

if os.path.exists(annotations_complete_flag):
    print("Annotations already exist! Skipping annotation generation...")
    with open(annotations_complete_flag, 'r') as f:
        stats = f.read()
    print("\nExisting annotation stats:")
    print(stats)
else:
    print("Creating YOLO annotations from masks...")
    print("="*50)

    def mask_to_bbox(mask):
        """Convert binary mask to bounding box coordinates"""
        coords = np.where(mask > 127)
        if len(coords[0]) == 0:
            return []
        y_min, y_max = coords[0].min(), coords[0].max()
        x_min, x_max = coords[1].min(), coords[1].max()
        return [(x_min, y_min, x_max, y_max)]

    def bbox_to_yolo_format(bbox, img_width, img_height):
        """Convert bounding box to YOLO format (normalized)"""
        x_min, y_min, x_max, y_max = bbox
        x_center = (x_min + x_max) / 2.0
        y_center = (y_min + y_max) / 2.0
        width = x_max - x_min
        height = y_max - y_min

        x_center_norm = x_center / img_width
        y_center_norm = y_center / img_height
        width_norm = width / img_width
        height_norm = height / img_height

        return f"0 {x_center_norm:.6f} {y_center_norm:.6f} {width_norm:.6f} {height_norm:.6f}"

    def process_dataset_split(split_name, local_dataset_path, yolo_dataset_path):
        """Process one split with progress tracking"""
        img_folder = os.path.join(local_dataset_path, split_name, 'img')
        mask_folder = os.path.join(local_dataset_path, split_name, 'masks')

        print(f"\nProcessing {split_name}:")

        if not os.path.exists(img_folder) or not os.path.exists(mask_folder):
            print(f"ERROR: Missing folders for {split_name}")
            return 0

        yolo_img_dir = os.path.join(yolo_dataset_path, split_name, 'images')
        yolo_label_dir = os.path.join(yolo_dataset_path, split_name, 'labels')
        os.makedirs(yolo_img_dir, exist_ok=True)
        os.makedirs(yolo_label_dir, exist_ok=True)

        img_files = [f for f in os.listdir(img_folder) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
        print(f"Found {len(img_files)} images in {split_name}")

        processed_count = 0
        no_plate_count = 0
        multi_plate_count = 0

        for i, img_file in enumerate(img_files):
            # Progress update every 500 files
            if i % 500 == 0 and i > 0:
                print(f"  Processed {i}/{len(img_files)} images ({i/len(img_files)*100:.1f}%)")

            try:
                mask_name = os.path.splitext(img_file)[0] + '.png'
                mask_path = os.path.join(mask_folder, mask_name)

                if not os.path.exists(mask_path):
                    continue

                mask = cv2.imread(mask_path, 0)
                if mask is None:
                    continue

                img_path = os.path.join(img_folder, img_file)
                img = cv2.imread(img_path)
                if img is None:
                    continue

                img_height, img_width = img.shape[:2]
                bboxes = mask_to_bbox(mask)

                # Copy image
                dst_img_path = os.path.join(yolo_img_dir, img_file)
                shutil.copy2(img_path, dst_img_path)

                # Create label file
                label_file = os.path.splitext(img_file)[0] + '.txt'
                label_path = os.path.join(yolo_label_dir, label_file)

                with open(label_path, 'w') as f:
                    if len(bboxes) == 0:
                        no_plate_count += 1
                    else:
                        if len(bboxes) > 1:
                            multi_plate_count += 1
                        for bbox in bboxes:
                            yolo_line = bbox_to_yolo_format(bbox, img_width, img_height)
                            f.write(yolo_line + '\n')

                processed_count += 1

            except Exception as e:
                print(f"Error processing {img_file}: {e}")
                continue

        print(f"{split_name.upper()} Summary:")
        print(f"Total processed: {processed_count}")
        print(f"No license plate: {no_plate_count}")
        print(f"Multiple license plates: {multi_plate_count}")
        print(f"Single license plate: {processed_count - no_plate_count - multi_plate_count}")

        return processed_count

    # Process all splits
    train_count = process_dataset_split('train', local_dataset_path, yolo_dataset_path)
    val_count = process_dataset_split('validation', local_dataset_path, yolo_dataset_path)
    test_count = process_dataset_split('test', local_dataset_path, yolo_dataset_path)

    total_count = train_count + val_count + test_count
    print(f"\nANNOTATION CONVERSION COMPLETED!")
    print(f"Train: {train_count} images")
    print(f"Validation: {val_count} images")
    print(f"Test: {test_count} images")
    print(f"Total: {total_count} images")

    # Save completion flag
    stats_text = f"""ANNOTATION CONVERSION COMPLETED!
Train: {train_count} images
Validation: {val_count} images
Test: {test_count} images
Total: {total_count} images"""

    with open(annotations_complete_flag, 'w') as f:
        f.write(stats_text)

    print(f"Completion flag saved: {annotations_complete_flag}")

# CELL 5 - Create dataset configuration and verify structure
# Create dataset.yaml configuration file
dataset_config = {
    'path': yolo_dataset_path,
    'train': 'train/images',
    'val': 'validation/images',
    'test': 'test/images',
    'nc': 1,
    'names': ['license_plate']
}

config_path = os.path.join(yolo_dataset_path, 'dataset.yaml')
with open(config_path, 'w') as f:
    yaml.dump(dataset_config, f)

print("Dataset configuration created!")
print(f"Config saved at: {config_path}")

# Verify dataset structure
def verify_dataset_structure():
    """Verify dataset is properly structured"""
    print("\nDataset Structure Verification:")
    print("-" * 40)

    for split in ['train', 'validation', 'test']:
        img_dir = os.path.join(yolo_dataset_path, split, 'images')
        label_dir = os.path.join(yolo_dataset_path, split, 'labels')

        if os.path.exists(img_dir) and os.path.exists(label_dir):
            img_count = len([f for f in os.listdir(img_dir) if f.endswith(('.jpg', '.png', '.jpeg'))])
            label_count = len([f for f in os.listdir(label_dir) if f.endswith('.txt')])
            print(f"{split:>12}: {img_count:>4} images, {label_count:>4} labels")

            if img_count != label_count:
                print(f"  WARNING: Image/label count mismatch in {split}")
        else:
            print(f"  ERROR: Missing directories for {split}")

    print("-" * 40)

verify_dataset_structure()

# Display sample annotations for verification
def show_sample_annotations(split_name, num_samples=2):
    img_dir = os.path.join(yolo_dataset_path, split_name, 'images')
    label_dir = os.path.join(yolo_dataset_path, split_name, 'labels')

    if not os.path.exists(img_dir):
        print(f"Cannot show samples for {split_name} - directory not found")
        return

    img_files = os.listdir(img_dir)[:num_samples]
    plt.figure(figsize=(15, 5))

    for i, img_file in enumerate(img_files):
        img_path = os.path.join(img_dir, img_file)
        img = cv2.imread(img_path)
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        h, w = img.shape[:2]

        label_file = os.path.splitext(img_file)[0] + '.txt'
        label_path = os.path.join(label_dir, label_file)

        plt.subplot(1, num_samples, i+1)
        plt.imshow(img_rgb)
        plt.title(f'{split_name}: {img_file}')
        plt.axis('off')

        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                lines = f.readlines()

            for line in lines:
                line = line.strip()
                if line:
                    parts = line.split()
                    if len(parts) == 5:
                        try:
                            _, x_center, y_center, width, height = map(float, parts)
                            x_center_px = x_center * w
                            y_center_px = y_center * h
                            width_px = width * w
                            height_px = height * h

                            x_min = int(x_center_px - width_px/2)
                            y_min = int(y_center_px - height_px/2)
                            x_max = int(x_center_px + width_px/2)
                            y_max = int(y_center_px + height_px/2)

                            import matplotlib.patches as patches
                            rect = patches.Rectangle((x_min, y_min), width_px, height_px,
                                                   linewidth=2, edgecolor='red', facecolor='none')
                            plt.gca().add_patch(rect)
                        except ValueError:
                            continue

    plt.tight_layout()
    plt.show()

print("\nSample annotations visualization:")
show_sample_annotations('train', 2)
show_sample_annotations('validation', 2)

print(f"\nDataset ready for YOLO training!")

In [None]:
# CELL 6 - Enhanced YOLO Training with Optimized Hyperparameters
from ultralytics import YOLO
import torch
import shutil
import os
import json
import threading
import time
from datetime import datetime

# Setup paths
model_save_dir = '/content/yolo_models'
checkpoint_dir = '/content/drive/MyDrive/yolo_checkpoints'
progress_file = os.path.join(checkpoint_dir, 'training_progress.json')

os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(checkpoint_dir, exist_ok=True)

print(f"GPU Available: {torch.cuda.is_available()}")
print(f"Checkpoint directory: {checkpoint_dir}")

# Check GPU memory and recommend batch size
if torch.cuda.is_available():
    gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
    print(f"GPU Memory: {gpu_memory:.1f} GB")

    # Recommend batch size based on GPU memory
    if gpu_memory < 8:
        recommended_batch = 4
        print("WARNING: Low GPU memory detected. Using batch size 4.")
    elif gpu_memory < 12:
        recommended_batch = 6
        print("Moderate GPU memory. Using batch size 6.")
    else:
        recommended_batch = 8
        print("High GPU memory. Using batch size 8.")
else:
    recommended_batch = 4
    print("No GPU detected. Using batch size 4.")

def save_training_progress(stage, epoch, status="in_progress"):
    """Save training progress to Drive"""
    progress_data = {
        'stage': stage,
        'completed_epochs': epoch,
        'status': status,
        'timestamp': datetime.now().isoformat(),
        'stage1_epochs': 20,
        'stage2_epochs': 50
    }

    with open(progress_file, 'w') as f:
        json.dump(progress_data, f, indent=2)
    print(f"Progress saved: Stage {stage}, Epoch {epoch}")

def load_training_progress():
    """Load training progress from Drive"""
    if os.path.exists(progress_file):
        with open(progress_file, 'r') as f:
            return json.load(f)
    return None

def get_best_checkpoint(stage):
    """Get the best checkpoint for a stage"""
    stage_dir = os.path.join(checkpoint_dir, f'stage{stage}')
    best_pt = os.path.join(stage_dir, 'weights', 'best.pt')
    last_pt = os.path.join(stage_dir, 'weights', 'last.pt')

    if os.path.exists(best_pt):
        return best_pt
    elif os.path.exists(last_pt):
        return last_pt
    return None

def copy_checkpoint_to_drive(stage, epoch):
    """Copy checkpoint to Drive with enhanced logging"""
    local_stage_dir = os.path.join(model_save_dir, f'stage{stage}')
    drive_stage_dir = os.path.join(checkpoint_dir, f'stage{stage}')

    if os.path.exists(local_stage_dir):
        try:
            # Copy entire stage directory
            if os.path.exists(drive_stage_dir):
                shutil.rmtree(drive_stage_dir)
            shutil.copytree(local_stage_dir, drive_stage_dir)

            # Save progress
            save_training_progress(stage, epoch)
            print(f"Checkpoint saved to Drive: Stage {stage}, Epoch {epoch}")

            # Log checkpoint size for verification
            checkpoint_size = sum(os.path.getsize(os.path.join(dirpath, filename))
                                for dirpath, dirnames, filenames in os.walk(drive_stage_dir)
                                for filename in filenames) / (1024**2)  # MB
            print(f"   Checkpoint size: {checkpoint_size:.1f} MB")

            return True
        except Exception as e:
            print(f"Failed to save checkpoint: {e}")
            return False
    return False

def get_training_epoch_from_logs(stage_dir):
    """Estimate current epoch from training logs"""
    results_file = os.path.join(stage_dir, 'results.csv')
    if os.path.exists(results_file):
        try:
            with open(results_file, 'r') as f:
                lines = f.readlines()
                return len(lines) - 1  # Subtract header
        except:
            pass
    return 0

# Load previous progress
progress = load_training_progress()
start_stage = 1
resume_model_path = None

if progress:
    print(f"\nFound previous training progress:")
    print(f"   Last stage: {progress['stage']}")
    print(f"   Last epoch: {progress['completed_epochs']}")
    print(f"   Status: {progress['status']}")
    print(f"   Timestamp: {progress['timestamp']}")

    if progress['status'] == 'completed':
        print("Training already completed!")
        # Load the final model
        final_model_path = os.path.join(checkpoint_dir, 'final_model.pt')
        if os.path.exists(final_model_path):
            trained_model = YOLO(final_model_path)
            print("Loaded final trained model")
        else:
            print("Final model not found, will retrain")
    else:
        # Determine where to resume
        if progress['stage'] == 1:
            if progress['completed_epochs'] >= 20:
                # Stage 1 completed, start Stage 2
                start_stage = 2
                resume_model_path = get_best_checkpoint(1)
                print(f"Resuming from Stage 2 with model: {resume_model_path}")
            else:
                # Resume Stage 1
                start_stage = 1
                resume_model_path = get_best_checkpoint(1)
                if resume_model_path:
                    print(f"Resuming Stage 1 from epoch {progress['completed_epochs']} with model: {resume_model_path}")
                else:
                    print("Restarting Stage 1 (no valid checkpoint found)")
        elif progress['stage'] == 2:
            if progress['completed_epochs'] >= 50:
                print("Training already completed!")
            else:
                # Resume Stage 2
                start_stage = 2
                resume_model_path = get_best_checkpoint(2)
                if resume_model_path:
                    print(f"Resuming Stage 2 from epoch {progress['completed_epochs']} with model: {resume_model_path}")
                else:
                    # Fall back to Stage 1 model
                    resume_model_path = get_best_checkpoint(1)
                    print(f"Restarting Stage 2 with Stage 1 model: {resume_model_path}")

# Global variable to control monitoring thread
monitoring_active = False

def monitor_training_stage(stage, max_epochs):
    """Monitor training and save checkpoints every 10 epochs"""
    global monitoring_active
    stage_dir = os.path.join(model_save_dir, f'stage{stage}')
    last_saved_epoch = 0
    check_interval = 30  # Check every 30 seconds

    print(f"Starting training monitor for Stage {stage}")
    print(f"   Will save checkpoints every 10 epochs")

    while monitoring_active:
        try:
            time.sleep(check_interval)

            if not monitoring_active:
                break

            if os.path.exists(stage_dir):
                # Get current epoch from logs
                current_epoch = get_training_epoch_from_logs(stage_dir)

                # Save checkpoint every 10 epochs
                if current_epoch > 0 and current_epoch % 10 == 0 and current_epoch > last_saved_epoch:
                    success = copy_checkpoint_to_drive(stage, current_epoch)
                    if success:
                        last_saved_epoch = current_epoch
                        print(f"Auto-checkpoint saved: Stage {stage}, Epoch {current_epoch}")

                # Additional safety checkpoints
                safety_epochs = []
                if stage == 1:
                    safety_epochs = [5, 15]  # 25% and 75% of 20 epochs
                elif stage == 2:
                    safety_epochs = [12, 25, 37]  # 25%, 50%, 75% of 50 epochs

                if current_epoch in safety_epochs and current_epoch > last_saved_epoch:
                    success = copy_checkpoint_to_drive(stage, current_epoch)
                    if success:
                        last_saved_epoch = current_epoch
                        print(f"Safety checkpoint: Stage {stage}, Epoch {current_epoch}")

                # Check if training completed
                if current_epoch >= max_epochs:
                    print(f"Training appears complete for Stage {stage}")
                    break

        except Exception as e:
            print(f"Monitor error: {e}")
            time.sleep(60)

    print(f"Training monitor stopped for Stage {stage}")

# STAGE 1: Warm-up with optimized hyperparameters
if start_stage <= 1:
    print("\n" + "="*60)
    print("STAGE 1: Warm-up Training (20 epochs)")
    print("="*60)

    # Load appropriate model
    if resume_model_path and start_stage == 1:
        model = YOLO(resume_model_path)
        print(f"Loaded checkpoint: {resume_model_path}")
    else:
        # Use YOLOv8s instead of YOLOv8l for small dataset
        model = YOLO('yolov8m.pt')  # Changed from yolov8l.pt
        print("Loaded YOLOv8m model (optimized for small datasets)")

    # Start monitoring thread
    monitoring_active = True
    monitor_thread = threading.Thread(target=monitor_training_stage, args=(1, 20), daemon=True)
    monitor_thread.start()

    try:
        results = model.train(
            data=config_path,
            epochs=20,
            batch=recommended_batch,
            name='stage1',
            project=model_save_dir,
            save_period=10,
            freeze=8,        # Freeze fewer layers (was 10)
            lr0=0.002,       # Much lower learning rate (was 0.01)
            lrf=0.1,         # Final learning rate factor
            momentum=0.937,  # Standard momentum
            weight_decay=0.0005,  # L2 regularization
            warmup_epochs=3, # Warmup for stability
            warmup_momentum=0.8,
            warmup_bias_lr=0.1,
            box=7.5,         # Box loss weight
            cls=0.5,         # Class loss weight
            dfl=1.5,         # DFL loss weight
            # Enhanced augmentations for small dataset
            hsv_h=0.015,     # Hue augmentation
            hsv_s=0.7,       # Saturation augmentation
            hsv_v=0.4,       # Value augmentation
            degrees=10,      # Rotation degrees
            translate=0.1,   # Translation
            scale=0.5,       # Scale augmentation
            shear=2,         # Shear degrees
            perspective=0.0, # Perspective (disable for license plates)
            flipud=0.0,      # Disable vertical flip (bad for text)
            fliplr=0.5,      # Horizontal flip
            mosaic=1.0,      # Mosaic augmentation
            mixup=0.1,       # Mixup augmentation
            copy_paste=0.3,  # Copy-paste augmentation
            patience=15,     # Increased patience
            exist_ok=True,
            verbose=True
        )

        # Stop monitoring
        monitoring_active = False
        time.sleep(2)

        # Save final Stage 1 checkpoint
        copy_checkpoint_to_drive(1, 20)
        print("Stage 1 completed successfully!")

    except Exception as e:
        # Stop monitoring
        monitoring_active = False
        print(f"Stage 1 failed: {e}")

        # Try to save current progress
        try:
            stage1_dir = os.path.join(model_save_dir, 'stage1')
            current_epoch = get_training_epoch_from_logs(stage1_dir)
            if current_epoch > 0:
                copy_checkpoint_to_drive(1, current_epoch)
            save_training_progress(1, current_epoch, "failed")
        except:
            save_training_progress(1, 0, "failed")
        raise e

# STAGE 2: Full training with fine-tuned parameters
if start_stage <= 2:
    print("\n" + "="*60)
    print("STAGE 2: Full Training (50 epochs)")
    print("="*60)

    # Determine which model to load for Stage 2
    if start_stage == 2 and resume_model_path:
        # Resuming Stage 2
        model = YOLO(resume_model_path)
        print(f"Resuming with: {resume_model_path}")
    else:
        # Starting Stage 2 fresh
        stage1_best = get_best_checkpoint(1)
        if stage1_best:
            model = YOLO(stage1_best)
            print(f"Loading Stage 1 best: {stage1_best}")
        else:
            # Fallback
            stage1_local = os.path.join(model_save_dir, 'stage1', 'weights', 'best.pt')
            if os.path.exists(stage1_local):
                model = YOLO(stage1_local)
                print(f"Loading local Stage 1: {stage1_local}")
            else:
                model = YOLO('yolov8m.pt')
                print("No Stage 1 model found, using fresh YOLOv8m")

    # Start enhanced monitoring for Stage 2
    monitoring_active = True
    monitor_thread = threading.Thread(target=monitor_training_stage, args=(2, 50), daemon=True)
    monitor_thread.start()

    try:
        results = model.train(
            data=config_path,
            epochs=50,
            batch=recommended_batch,
            name='stage2',
            project=model_save_dir,
            save_period=10,
            freeze=0,        # Unfreeze all layers
            lr0=0.0005,      # Very low learning rate for fine-tuning (was 0.001)
            lrf=0.01,        # Final learning rate factor
            momentum=0.937,
            weight_decay=0.0005,
            warmup_epochs=0, # No warmup needed in stage 2
            box=7.5,
            cls=0.5,
            dfl=1.5,
            # Reduced augmentation for fine-tuning
            hsv_h=0.01,      # Reduced hue augmentation
            hsv_s=0.5,       # Reduced saturation
            hsv_v=0.3,       # Reduced value
            degrees=5,       # Reduced rotation
            translate=0.05,  # Reduced translation
            scale=0.3,       # Reduced scale
            shear=1,         # Reduced shear
            perspective=0.0,
            flipud=0.0,
            fliplr=0.3,      # Reduced horizontal flip
            mosaic=0.8,      # Reduced mosaic
            mixup=0.05,      # Reduced mixup
            copy_paste=0.1,  # Reduced copy-paste
            patience=25,     # Higher patience for longer training
            exist_ok=True,
            verbose=True
        )

        # Stop monitoring
        monitoring_active = False
        time.sleep(2)

        # Save final Stage 2 checkpoint and mark as completed
        copy_checkpoint_to_drive(2, 50)

        # Save final model
        stage2_best = os.path.join(model_save_dir, 'stage2', 'weights', 'best.pt')
        final_model_drive = os.path.join(checkpoint_dir, 'final_model.pt')

        if os.path.exists(stage2_best):
            shutil.copy2(stage2_best, final_model_drive)
            print("Final model saved to Drive")

        # Mark training as completed
        save_training_progress(2, 50, "completed")
        print("Stage 2 completed successfully!")

    except Exception as e:
        # Stop monitoring
        monitoring_active = False
        print(f"Stage 2 failed: {e}")

        # Try to save current progress
        try:
            stage2_dir = os.path.join(model_save_dir, 'stage2')
            current_epoch = get_training_epoch_from_logs(stage2_dir)
            if current_epoch > 0:
                copy_checkpoint_to_drive(2, current_epoch)
            save_training_progress(2, current_epoch, "failed")
        except:
            save_training_progress(2, 0, "failed")
        raise e

# Load the final trained model
final_model_path = os.path.join(checkpoint_dir, 'final_model.pt')
if os.path.exists(final_model_path):
    trained_model = YOLO(final_model_path)
    print("Final trained model loaded successfully!")
else:
    # Fallback to local model
    local_best = os.path.join(model_save_dir, 'stage2', 'weights', 'best.pt')
    if os.path.exists(local_best):
        trained_model = YOLO(local_best)
        print("Loaded local trained model")
    else:
        print("No trained model found!")

print("\n" + "="*60)
print("TRAINING PIPELINE COMPLETED!")
print("="*60)
print(f"Check training progress: {progress_file}")
print(f"Models saved in: {checkpoint_dir}")
print(f"Final model: {final_model_path}")

# Clean up
monitoring_active = False

In [None]:
# CELL 7 - Simple Comprehensive Evaluation
# Add these helper functions BEFORE the evaluate_model function in Cell 7

def load_ground_truth_boxes(label_path, img_width, img_height):
    """Load ground truth boxes from YOLO format label file"""
    boxes = []
    if os.path.exists(label_path):
        with open(label_path, 'r') as f:
            lines = f.readlines()

        for line in lines:
            line = line.strip()
            if line:
                parts = line.split()
                if len(parts) == 5:
                    try:
                        _, x_center, y_center, width, height = map(float, parts)

                        # Convert from YOLO format to absolute coordinates
                        x_center_abs = x_center * img_width
                        y_center_abs = y_center * img_height
                        width_abs = width * img_width
                        height_abs = height * img_height

                        # Convert to corner coordinates [x1, y1, x2, y2]
                        x1 = x_center_abs - width_abs / 2
                        y1 = y_center_abs - height_abs / 2
                        x2 = x_center_abs + width_abs / 2
                        y2 = y_center_abs + height_abs / 2

                        boxes.append([x1, y1, x2, y2])
                    except ValueError:
                        continue
    return boxes

def calculate_box_iou(box1, box2):
    """Calculate IoU between two bounding boxes"""
    x1_1, y1_1, x2_1, y2_1 = box1
    x1_2, y1_2, x2_2, y2_2 = box2

    # Calculate intersection coordinates
    x1_i = max(x1_1, x1_2)
    y1_i = max(y1_1, y1_2)
    x2_i = min(x2_1, x2_2)
    y2_i = min(y2_1, y2_2)

    # Check if there's intersection
    if x2_i <= x1_i or y2_i <= y1_i:
        return 0.0

    # Calculate intersection area
    intersection_area = (x2_i - x1_i) * (y2_i - y1_i)

    # Calculate areas of both boxes
    box1_area = (x2_1 - x1_1) * (y2_1 - y1_1)
    box2_area = (x2_2 - x1_2) * (y2_2 - y1_2)

    # Calculate union area
    union_area = box1_area + box2_area - intersection_area

    # Calculate IoU
    if union_area == 0:
        return 0.0

    iou = intersection_area / union_area
    return iou

def evaluate_model(model, split_name, conf_thresh=0.5):
    """Simple but comprehensive evaluation"""
    img_dir = f'{yolo_dataset_path}/{split_name}/images'
    label_dir = f'{yolo_dataset_path}/{split_name}/labels'

    img_files = [f for f in os.listdir(img_dir) if f.endswith(('.jpg', '.png'))]

    tp, fp, fn, tn = 0, 0, 0, 0
    iou_scores = []

    print(f"Evaluating {len(img_files)} images...")

    for i, img_file in enumerate(img_files):
        if i % 500 == 0 and i > 0:
            print(f"Progress: {i}/{len(img_files)}")

        img_path = f'{img_dir}/{img_file}'
        label_path = f'{label_dir}/{os.path.splitext(img_file)[0]}.txt'

        # Load image and get dimensions
        img = cv2.imread(img_path)
        if img is None: continue
        h, w = img.shape[:2]

        # Load ground truth
        gt_boxes = load_ground_truth_boxes(label_path, w, h)
        has_gt = len(gt_boxes) > 0

        # Get predictions
        results = model(img_path, conf=conf_thresh, verbose=False)
        pred_boxes = []
        if results[0].boxes is not None:
            for box in results[0].boxes:
                x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
                pred_boxes.append([x1, y1, x2, y2])
        has_pred = len(pred_boxes) > 0

        # Calculate IoU and metrics
        if has_gt and has_pred:
            max_iou = 0
            for gt_box in gt_boxes:
                for pred_box in pred_boxes:
                    iou = calculate_box_iou(gt_box, pred_box)
                    max_iou = max(max_iou, iou)

            iou_scores.append(max_iou)
            if max_iou > 0.5:
                tp += 1
            else:
                fn += 1
        elif has_gt and not has_pred:
            fn += 1
            iou_scores.append(0)
        elif not has_gt and has_pred:
            fp += 1
            iou_scores.append(0)
        else:
            tn += 1
            iou_scores.append(1)

    # Calculate final metrics
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
    accuracy = (tp + tn) / len(img_files)
    avg_iou = sum(iou_scores) / len(iou_scores)

    # Binary detection (IoU > 0.7, same as U-Net)
    binary_detection = sum(1 for iou in iou_scores if iou > 0.7) / len(iou_scores)

    return {
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'accuracy': accuracy,
        'avg_iou': avg_iou,
        'binary_detection': binary_detection,
        'iou_scores': iou_scores
    }

# Evaluate on test and validation
print("Evaluating model...")
val_results = evaluate_model(trained_model, 'validation')
test_results = evaluate_model(trained_model, 'test')

print("\nVALIDATION RESULTS:")
print(f"IoU: {val_results['avg_iou']:.4f}")
print(f"Precision: {val_results['precision']:.4f}")
print(f"Recall: {val_results['recall']:.4f}")
print(f"F1-Score: {val_results['f1_score']:.4f}")
print(f"Binary Detection (IoU>0.7): {val_results['binary_detection']:.4f}")

print("\nTEST RESULTS:")
print(f"IoU: {test_results['avg_iou']:.4f}")
print(f"Precision: {test_results['precision']:.4f}")
print(f"Recall: {test_results['recall']:.4f}")
print(f"F1-Score: {test_results['f1_score']:.4f}")
print(f"Binary Detection (IoU>0.7): {test_results['binary_detection']:.4f}")
print(f"Boundary Box Accuracy: {test_results['binary_detection']*100:.1f}%")

In [None]:
# CELL 8 - Simple Training Visualization (Updated for new epoch counts)
import matplotlib.pyplot as plt
import pandas as pd

# Load training results
stage1_csv = f'{model_save_dir}/stage1/results.csv'
stage2_csv = f'{model_save_dir}/stage2/results.csv'

if os.path.exists(stage1_csv) and os.path.exists(stage2_csv):
    stage1_data = pd.read_csv(stage1_csv)
    stage2_data = pd.read_csv(stage2_csv)
    all_data = pd.concat([stage1_data, stage2_data], ignore_index=True)

    fig, axes = plt.subplots(2, 3, figsize=(15, 8))
    epochs = range(1, len(all_data) + 1)
    stage1_end = len(stage1_data)

    # Loss
    axes[0,0].plot(epochs, all_data['train/box_loss'], 'b-', label='Train')
    if 'val/box_loss' in all_data.columns:
        axes[0,0].plot(epochs, all_data['val/box_loss'], 'r-', label='Val')
    axes[0,0].axvline(x=stage1_end, color='green', linestyle='--', alpha=0.5, label=f'Stage 1 End (Epoch {stage1_end})')
    axes[0,0].set_title('Box Loss')
    axes[0,0].legend()
    axes[0,0].grid(True, alpha=0.3)

    # mAP (IoU equivalent)
    axes[0,1].plot(epochs, all_data['metrics/mAP50(B)'], 'g-', label='mAP@0.5')
    axes[0,1].axvline(x=stage1_end, color='green', linestyle='--', alpha=0.5)
    axes[0,1].set_title('mAP@0.5 (IoU Equivalent)')
    axes[0,1].legend()
    axes[0,1].grid(True, alpha=0.3)

    # Precision & Recall
    axes[0,2].plot(epochs, all_data['metrics/precision(B)'], 'purple', label='Precision')
    axes[0,2].plot(epochs, all_data['metrics/recall(B)'], 'brown', label='Recall')
    axes[0,2].axvline(x=stage1_end, color='green', linestyle='--', alpha=0.5)
    axes[0,2].set_title('Precision & Recall')
    axes[0,2].legend()
    axes[0,2].grid(True, alpha=0.3)

    # F1-Score
    precision = all_data['metrics/precision(B)']
    recall = all_data['metrics/recall(B)']
    f1 = 2 * (precision * recall) / (precision + recall)
    f1 = f1.fillna(0)

    axes[1,0].plot(epochs, f1, 'cyan', label='F1-Score')
    axes[1,0].axvline(x=stage1_end, color='green', linestyle='--', alpha=0.5)
    axes[1,0].set_title('F1-Score')
    axes[1,0].legend()
    axes[1,0].grid(True, alpha=0.3)

    # Learning Rate
    axes[1,1].plot(epochs, all_data['lr/pg0'], 'magenta', label='Learning Rate')
    axes[1,1].axvline(x=stage1_end, color='green', linestyle='--', alpha=0.5)
    axes[1,1].set_title('Learning Rate')
    axes[1,1].legend()
    axes[1,1].grid(True, alpha=0.3)

    # Final Performance Bar
    metrics = ['IoU', 'Precision', 'Recall', 'F1', 'Binary Det']
    val_scores = [val_results['avg_iou'], val_results['precision'],
                  val_results['recall'], val_results['f1_score'], val_results['binary_detection']]
    test_scores = [test_results['avg_iou'], test_results['precision'],
                   test_results['recall'], test_results['f1_score'], test_results['binary_detection']]

    x = range(len(metrics))
    width = 0.35
    axes[1,2].bar([i-width/2 for i in x], val_scores, width, label='Val', alpha=0.7)
    axes[1,2].bar([i+width/2 for i in x], test_scores, width, label='Test', alpha=0.7)
    axes[1,2].set_title('Final Performance')
    axes[1,2].set_xticks(x)
    axes[1,2].set_xticklabels(metrics, rotation=45)
    axes[1,2].legend()
    axes[1,2].grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

    # Updated Summary
    print("\nTRAINING SUMMARY:")
    print(f"Total epochs: {len(all_data)} (Stage 1: 20, Stage 2: 50)")  # Updated
    print(f"Batch size used: 8")  # Updated
    print(f"Checkpoint frequency: Every 10 epochs")  # Updated
    print(f"Best mAP@0.5: {all_data['metrics/mAP50(B)'].max():.4f}")
    print(f"Best Precision: {all_data['metrics/precision(B)'].max():.4f}")
    print(f"Best Recall: {all_data['metrics/recall(B)'].max():.4f}")
    print(f"Final Test IoU: {test_results['avg_iou']:.4f}")
    print(f"Stage 1 completed at epoch: {stage1_end}")
    print(f"Total training epochs: {len(all_data)}")

print("Enhanced YOLO training completed!")