In [12]:
from ultralytics import YOLO
import torch
import os

In [26]:
# --- Configuration ---
DATA_YAML_PATH = r"D:\VScodefiles\DeepLearningProject\sonar_dataset_10k_3k_3k.yaml"

# Model selection 
MODEL_NAME = "yolo11s.pt"  

# Training parameters optimized for RTX 3060 6GB
IMG_SIZE = 640    
BATCH_SIZE = 16    
EPOCHS = 100
WORKERS = 8       

# Memory optimization settings
PRECISION = 'fp16'  
PERSISTENT_WORKERS = True  

In [3]:
print("=== Optimized Training Configuration ===")
print(f"Using device: {'cuda' if torch.cuda.is_available() else 'cpu'}")
if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(0)
    gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
    print(f"GPU: {gpu_name}")
    print(f"GPU Memory: {gpu_memory:.1f} GB")

print(f"\n=== Performance Settings ===")
print(f"Batch size: {BATCH_SIZE} (optimized for 6GB VRAM)")
print(f"Workers: {WORKERS} (increased for i7-12th gen)")
print(f"Image size: {IMG_SIZE}")
print(f"Persistent workers: {PERSISTENT_WORKERS}")

=== Optimized Training Configuration ===
Using device: cuda
GPU: NVIDIA GeForce RTX 3060 Laptop GPU
GPU Memory: 6.4 GB

=== Performance Settings ===
Batch size: 8 (optimized for 6GB VRAM)
Workers: 8 (increased for i7-12th gen)
Image size: 640
Persistent workers: True


In [27]:
# --- Load a model ---
print(f"\nLoading model: {MODEL_NAME}")
try:
    model = YOLO(MODEL_NAME)  # Load a pretrained model
    print(f"Model loaded successfully: {type(model)}")
except Exception as e:
    print(f"Failed to load model {MODEL_NAME}: {e}")
    # Potentially try downloading the model again or check MODEL_NAME
    raise e


Loading model: yolo11s.pt
Model loaded successfully: <class 'ultralytics.models.yolo.model.YOLO'>


In [28]:
try:
    dataset_info = model.model.yaml 
    print("Dataset YAML structure seems valid (basic check passed).")
except Exception as e:
    print(f"Warning: Potential issue validating dataset structure: {e}")

Dataset YAML structure seems valid (basic check passed).


In [None]:
# --- Start Training ---
print("\nStarting training with optimized settings...")
try:
    results = model.train(
        data=DATA_YAML_PATH,
        epochs=EPOCHS,
        imgsz=IMG_SIZE,
        batch=BATCH_SIZE,           
        workers=WORKERS,            
        device=0,                                   
        patience=20,                
        save=True,
        exist_ok=True,              
        pretrained=True,
        optimizer='auto',           
        lr0=0.01,                   
        weight_decay=0.0005,
        warmup_epochs=3.0,          
        warmup_momentum=0.8,
        box=7.5,                    
        cls=0.5,                    
        dfl=1.5,                    
        close_mosaic=10,            
        overlap_mask=False,         
    )

    
    print("Training completed successfully!")
    
    # Save the final model path
    print(f"Best model saved at: {model.trainer.best}")
    print(f"Best model saved at: {results.save_dir / 'weights/best.pt'}") # Or print(results) to see its attributes
    print(f"Training results saved in: {results.save_dir}")
    print("Check the 'results.png', 'confusion_matrix.png', 'labels.jpg', etc., in the save directory for training plots.")
    
    
except RuntimeError as e:
    if "out of memory" in str(e).lower():
        print("\n⚠️  GPU OUT OF MEMORY ERROR!")
        print("Solutions to try:")
        print("1. Reduce batch_size to 4")
        print("2. Reduce image size to 512")
        print("3. Close other applications using GPU")
        print("4. Use 'yolo11n.pt' instead of 'yolo11s.pt'")
    raise e

except Exception as e:
    print(f"Training failed with error: {e}")
    raise e


Starting training with optimized settings...
Ultralytics 8.3.223  Python-3.13.5 torch-2.7.1+cu118 CUDA:0 (NVIDIA GeForce RTX 3060 Laptop GPU, 6144MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=D:\VScodefiles\DeepLearningProject\sonar_dataset_10k_3k_3k.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo11s.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train, nbs=64, nms=False

KeyboardInterrupt: 

In [31]:
from ultralytics import YOLO
import torch

CHECKPOINT_PATH = r"D:\VScodefiles\DeepLearningProject\runs\detect\train\weights\last.pt" # Adjust path if needed

DATA_YAML_PATH = r"D:\VScodefiles\DeepLearningProject\sonar_dataset_10k_3k_3k.yaml" # Ensure this is correct

print(f"\nLoading model from checkpoint: {CHECKPOINT_PATH}")
try:
    model = YOLO(CHECKPOINT_PATH) 
    print(f"Model loaded successfully from checkpoint: {type(model)}")
except Exception as e:
    print(f"Failed to load model from checkpoint {CHECKPOINT_PATH}: {e}")
    raise e

print("\nResuming training from the checkpoint...")
try:
    results = model.train(
        resume=True, # This tells the trainer to resume from the loaded checkpoint
        data=DATA_YAML_PATH, # Path to your dataset YAML (should be the same as original)
        # epochs=EPOCHS, # Optional: You can adjust the total epochs if needed, e.g., if you want to train for 100 total, set this to 100 now, and it will train for 100-58 more epochs
        # imgsz=IMG_SIZE, # Usually not needed to specify again if saved in checkpoint, but can if changed
        # batch=BATCH_SIZE, # Usually not needed to specify again if saved in checkpoint, but can if changed
        # device=0, # Can specify again, or let it auto-detect
        # ... other arguments if you need to change them from the checkpoint's original settings ...
        # Note: Arguments like optimizer state, learning rate schedule, current epoch count are loaded from the checkpoint
    )

    print("Training resumed and completed successfully!")

    # The best model path is available after training finishes
    print(f"Best model saved at: {results.save_dir / 'weights/best.pt'}")
    print(f"Training results saved in: {results.save_dir}")

except RuntimeError as e:
    if "out of memory" in str(e).lower():
        print("\n⚠️  GPU OUT OF MEMORY ERROR!")
        print("Solutions to try:")
        print("1. Reduce batch_size")
        print("2. Reduce image size")
        print("3. Close other applications using GPU")
        print("4. Use a smaller model")
    raise e

except Exception as e:
    print(f"Training resumption failed with error: {e}")
    raise e

print("\nTraining resumption pipeline completed!")


Loading model from checkpoint: D:\VScodefiles\DeepLearningProject\runs\detect\train\weights\last.pt
Model loaded successfully from checkpoint: <class 'ultralytics.models.yolo.model.YOLO'>

Resuming training from the checkpoint...
Ultralytics 8.3.223  Python-3.13.5 torch-2.7.1+cu118 CUDA:0 (NVIDIA GeForce RTX 3060 Laptop GPU, 6144MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=D:\VScodefiles\DeepLearningProject\sonar_dataset_10k_3k_3k.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1

In [32]:
# --- Validation ---
print("\n--- Running Validation ---")
try:
    # Validate the model
    metrics = model.val()
    print(f"Validation mAP50-95: {metrics.box.map:.4f}")
    print(f"Validation mAP50: {metrics.box.map50:.4f}")
    
except Exception as e:
    print(f"Validation error: {e}")


--- Running Validation ---
Ultralytics 8.3.223  Python-3.13.5 torch-2.7.1+cu118 CUDA:0 (NVIDIA GeForce RTX 3060 Laptop GPU, 6144MiB)
YOLO11s summary (fused): 100 layers, 9,413,574 parameters, 0 gradients, 21.3 GFLOPs
[34m[1mval: [0mFast image access  (ping: 0.10.1 ms, read: 587.7462.3 MB/s, size: 398.8 KB)
[K[34m[1mval: [0mScanning D:\VScodefiles\DeepLearningProject\sonardataset_10k_3k_3k\val\labels.cache... 3000 images, 2030 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 3000/3000 6.4Mit/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 188/188 5.9it/s 31.8s0.2ss
                   all       3000       2173       0.98      0.914      0.953      0.814
                 MILCO        843       1420      0.977      0.891       0.94      0.787
                 NOMBO        392        753      0.984      0.938      0.967       0.84
Speed: 1.7ms preprocess, 7.3ms inference, 0.0ms loss, 0.4ms postprocess per image
Results s

In [35]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from ultralytics import YOLO
import torch
import os

TRAIN_RUN_DIR = r"D:\VScodefiles\DeepLearningProject\runs\detect\train" 

OUTPUT_PLOTS_DIR = os.path.join(TRAIN_RUN_DIR, "custom_plots")
os.makedirs(OUTPUT_PLOTS_DIR, exist_ok=True)

results_csv_path = os.path.join(TRAIN_RUN_DIR, "results.csv")

if os.path.exists(results_csv_path):
    print(f"Loading training history from {results_csv_path}")
    results_df = pd.read_csv(results_csv_path)
    print(f"Loaded {len(results_df)} epochs of training history.")
    print(f"Available columns in results.csv:")
    for i, col in enumerate(results_df.columns):
        print(f"  [{i}] '{col}'") 
else:
    print(f"Warning: Could not find results.csv at {results_csv_path}. Cannot generate training curve plots.")
    results_df = pd.DataFrame() 

def save_high_quality_plot(fig, filename, dpi=300):
    """Saves a matplotlib figure with high DPI."""
    filepath = os.path.join(OUTPUT_PLOTS_DIR, filename)
    fig.savefig(filepath, dpi=dpi, bbox_inches='tight', facecolor='white')
    print(f"Saved high-quality plot: {filepath}")
    plt.close(fig) 

# --- Generate Plots ---

if not results_df.empty:
    box_loss_col = 'train/box_loss' 
    cls_loss_col = 'train/cls_loss' 
    dfl_loss_col = 'train/dfl_loss' 
    val_box_loss_col = 'val/box_loss' 
    val_cls_loss_col = 'val/cls_loss' 
    val_dfl_loss_col = 'val/dfl_loss' 
    map50_col = 'metrics/mAP50(B)' 
    map50_95_col = 'metrics/mAP50-95(B)' 
    lr0_col = 'x/lr0' 
    precision_col = 'metrics/precision(B)' 
    recall_col = 'metrics/recall(B)' 

    missing_cols = []
    for col_name in [box_loss_col, cls_loss_col, dfl_loss_col, map50_col, map50_95_col, lr0_col]:
        if col_name not in results_df.columns:
            missing_cols.append(col_name)
    if missing_cols:
        print(f"\nWarning: Some expected columns not found: {missing_cols}")
        print("Please check the printed column list above and update the variable names in the script.")
        if not any(col in results_df.columns for col in [box_loss_col, cls_loss_col, dfl_loss_col]):
             print("Essential loss columns not found. Skipping loss plots.")
             essential_loss_found = False
        else:
            essential_loss_found = True
        if not any(col in results_df.columns for col in [map50_col, map50_95_col]):
             print("Essential mAP columns not found. Skipping mAP plots.")
             essential_mAP_found = False
        else:
            essential_mAP_found = True
    else:
        essential_loss_found = True
        essential_mAP_found = True

    if essential_loss_found or essential_mAP_found:
        print("\nGenerating Training/Validation Curves...")
        fig, axes = plt.subplots(2, 2, figsize=(12, 10)) # 2x2 grid
        fig.suptitle('YOLOv11 Training History', fontsize=16)

        if box_loss_col in results_df.columns:
            axes[0, 0].plot(results_df['epoch'], results_df[box_loss_col], label='Train Box Loss', marker='o', markersize=3)
            if val_box_loss_col in results_df.columns:
                 axes[0, 0].plot(results_df['epoch'], results_df[val_box_loss_col], label='Val Box Loss', marker='s', markersize=3)
            axes[0, 0].set_title('Box Loss')
            axes[0, 0].set_xlabel('Epoch')
            axes[0, 0].set_ylabel('Loss')
            axes[0, 0].grid(True, linestyle='--', alpha=0.6)
            axes[0, 0].legend()
        else:
            axes[0, 0].text(0.5, 0.5, 'Box Loss Not Found', horizontalalignment='center', verticalalignment='center', transform=axes[0, 0].transAxes)
            axes[0, 0].set_title('Box Loss (Not Available)')

        if cls_loss_col in results_df.columns:
            axes[0, 1].plot(results_df['epoch'], results_df[cls_loss_col], label='Train CLS Loss', marker='o', markersize=3, color='orange')
            if val_cls_loss_col in results_df.columns:
                 axes[0, 1].plot(results_df['epoch'], results_df[val_cls_loss_col], label='Val CLS Loss', marker='s', markersize=3, color='darkorange')
            axes[0, 1].set_title('Classification Loss')
            axes[0, 1].set_xlabel('Epoch')
            axes[0, 1].set_ylabel('Loss')
            axes[0, 1].grid(True, linestyle='--', alpha=0.6)
            axes[0, 1].legend()
        else:
            axes[0, 1].text(0.5, 0.5, 'CLS Loss Not Found', horizontalalignment='center', verticalalignment='center', transform=axes[0, 1].transAxes)
            axes[0, 1].set_title('Classification Loss (Not Available)')

        if dfl_loss_col in results_df.columns:
            axes[1, 0].plot(results_df['epoch'], results_df[dfl_loss_col], label='Train DFL Loss', marker='o', markersize=3, color='green')
            if val_dfl_loss_col in results_df.columns:
                 axes[1, 0].plot(results_df['epoch'], results_df[val_dfl_loss_col], label='Val DFL Loss', marker='s', markersize=3, color='darkgreen')
            axes[1, 0].set_title('DFL Loss')
            axes[1, 0].set_xlabel('Epoch')
            axes[1, 0].set_ylabel('Loss')
            axes[1, 0].grid(True, linestyle='--', alpha=0.6)
            axes[1, 0].legend()
        else:
            axes[1, 0].text(0.5, 0.5, 'DFL Loss Not Found', horizontalalignment='center', verticalalignment='center', transform=axes[1, 0].transAxes)
            axes[1, 0].set_title('DFL Loss (Not Available)')

        if essential_mAP_found:
            axes[1, 1].plot(results_df['epoch'], results_df[map50_col], label='mAP50', marker='o', markersize=3, color='red')
            axes[1, 1].plot(results_df['epoch'], results_df[map50_95_col], label='mAP50-95', marker='o', markersize=3, color='purple')
            axes[1, 1].set_title('Mean Average Precision')
            axes[1, 1].set_xlabel('Epoch')
            axes[1, 1].set_ylabel('mAP')
            axes[1, 1].grid(True, linestyle='--', alpha=0.6)
            axes[1, 1].legend()
        else:
            axes[1, 1].text(0.5, 0.5, 'mAP columns not found', horizontalalignment='center', verticalalignment='center', transform=axes[1, 1].transAxes)
            axes[1, 1].set_title('Mean Average Precision (Not Available)')

        plt.tight_layout(rect=[0, 0.03, 1, 0.95])
        save_high_quality_plot(fig, "training_curves.png", dpi=300)


    if lr0_col in results_df.columns:
        print("\nGenerating Learning Rate Schedule Plot...")
        fig, ax = plt.subplots(figsize=(10, 6))
        ax.plot(results_df['epoch'], results_df[lr0_col], label='Learning Rate (LR0)', marker='o', markersize=3, color='brown')
        ax.set_title('Learning Rate Schedule Over Epochs')
        ax.set_xlabel('Epoch')
        ax.set_ylabel('Learning Rate')
        ax.grid(True, linestyle='--', alpha=0.6)
        ax.legend()
        save_high_quality_plot(fig, "learning_rate_schedule.png", dpi=300)
    else:
        print(f"\nWarning: Learning Rate column ({lr0_col}) not found in results.csv. Plotting skipped.")

    if precision_col in results_df.columns and recall_col in results_df.columns:
        print("\nGenerating Precision-Recall over Epochs Plot...")
        fig, ax = plt.subplots(figsize=(10, 6))
        ax.plot(results_df['epoch'], results_df[precision_col], label='Precision', marker='o', markersize=3, color='blue')
        ax.plot(results_df['epoch'], results_df[recall_col], label='Recall', marker='s', markersize=3, color='magenta')
        ax.set_title('Precision and Recall over Epochs (Validation)')
        ax.set_xlabel('Epoch')
        ax.set_ylabel('Score')
        ax.grid(True, linestyle='--', alpha=0.6)
        ax.legend()
        save_high_quality_plot(fig, "precision_recall_over_epochs.png", dpi=300)
    else:
        print(f"\nWarning: Precision or Recall columns ({precision_col}, {recall_col}) not found in results.csv. Plotting skipped.")


print(f"\nCustom high-quality plots (300 DPI) saved in: {OUTPUT_PLOTS_DIR}")
print("Remember to also check the automatically generated plots in the training and validation directories.")

Loading training history from D:\VScodefiles\DeepLearningProject\runs\detect\train\results.csv
Loaded 42 epochs of training history.
Available columns in results.csv:
  [0] 'epoch'
  [1] 'time'
  [2] 'train/box_loss'
  [3] 'train/cls_loss'
  [4] 'train/dfl_loss'
  [5] 'metrics/precision(B)'
  [6] 'metrics/recall(B)'
  [7] 'metrics/mAP50(B)'
  [8] 'metrics/mAP50-95(B)'
  [9] 'val/box_loss'
  [10] 'val/cls_loss'
  [11] 'val/dfl_loss'
  [12] 'lr/pg0'
  [13] 'lr/pg1'
  [14] 'lr/pg2'

Please check the printed column list above and update the variable names in the script.

Generating Training/Validation Curves...
Saved high-quality plot: D:\VScodefiles\DeepLearningProject\runs\detect\train\custom_plots\training_curves.png


Generating Precision-Recall over Epochs Plot...
Saved high-quality plot: D:\VScodefiles\DeepLearningProject\runs\detect\train\custom_plots\precision_recall_over_epochs.png

Custom high-quality plots (300 DPI) saved in: D:\VScodefiles\DeepLearningProject\runs\detect\train\

In [37]:
# --- Export for Deployment ---
print("\n--- Model Export Options ---")
export_choice = input("Export model to ONNX/TensorRT? (y/n): ").lower()
if export_choice == 'y':
    try:
        # Export to ONNX
        model.export(format='onnx', imgsz=IMG_SIZE)
        print("Model exported to ONNX format")
    except Exception as e:
        print(f"Export failed: {e}")

print("\nTraining pipeline completed!")


--- Model Export Options ---
Ultralytics 8.3.223  Python-3.13.5 torch-2.7.1+cu118 CPU (12th Gen Intel Core(TM) i7-12700H)

[34m[1mPyTorch:[0m starting from 'D:\VScodefiles\DeepLearningProject\runs\detect\train\weights\best.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 6, 8400) (18.3 MB)

[34m[1mONNX:[0m starting export with onnx 1.19.1 opset 19...
[34m[1mONNX:[0m slimming with onnxslim 0.1.72...
[34m[1mONNX:[0m export success  1.5s, saved as 'D:\VScodefiles\DeepLearningProject\runs\detect\train\weights\best.onnx' (36.2 MB)

Export complete (1.9s)
Results saved to [1mD:\VScodefiles\DeepLearningProject\runs\detect\train\weights[0m
Predict:         yolo predict task=detect model=D:\VScodefiles\DeepLearningProject\runs\detect\train\weights\best.onnx imgsz=640  
Validate:        yolo val task=detect model=D:\VScodefiles\DeepLearningProject\runs\detect\train\weights\best.onnx imgsz=640 data=D:\VScodefiles\DeepLearningProject\sonar_dataset_10k_3k_3k.yaml  
