## 0- Imports

In [None]:
import os
import shutil
from PIL import Image
import matplotlib.pyplot as plt
import yaml

from ultralytics import YOLO

---

## 1- Preparing the Dataset

I use a YAML file since it is the best practice with the YOLO models. To use such file, first, I create a convenient version of the dataset

In [2]:
base_dir = os.getcwd()
annotations_dir = os.path.join(base_dir, 'Annotations')
images_dir = os.path.join(base_dir, 'Images')
imagesets_dir = os.path.join(base_dir, 'ImageSets')

In [3]:
dataset_dir = os.path.join(base_dir, 'dataset')
os.makedirs(os.path.join(dataset_dir, 'train', 'images'), exist_ok=True)
os.makedirs(os.path.join(dataset_dir, 'train', 'labels'), exist_ok=True)
os.makedirs(os.path.join(dataset_dir, 'val', 'images'), exist_ok=True)
os.makedirs(os.path.join(dataset_dir, 'val', 'labels'), exist_ok=True)
os.makedirs(os.path.join(dataset_dir, 'test', 'images'), exist_ok=True)
os.makedirs(os.path.join(dataset_dir, 'test', 'labels'), exist_ok=True)

In [4]:
def convert_to_yolo_format(bbox, img_width, img_height):
    x_min, y_min, x_max, y_max, class_id = bbox
    
    # Convert to center format
    x_center = (x_min + x_max) / 2
    y_center = (y_min + y_max) / 2
    width = x_max - x_min
    height = y_max - y_min
    
    # Normalize to 0-1
    x_center /= img_width
    y_center /= img_height
    width /= img_width
    height /= img_height
    
    # YOLO uses zero-indexed classes
    class_id = int(class_id) - 1
    
    return [class_id, x_center, y_center, width, height]

In [None]:
# Process each split (train, val, test)
for split in ['train', 'val', 'test']:
    # Read filenames from split files
    with open(os.path.join(imagesets_dir, f'{split}.txt'), 'r') as f:
        filenames = [line.strip() for line in f.readlines()]
    
    for filename in filenames:
        # Image path
        image_path = os.path.join(images_dir, f'{filename}.png')
            
        # Get image dimensions
        with Image.open(image_path) as img:
            img_width, img_height = img.size
        
        # Annotation path
        annotation_path = os.path.join(annotations_dir, f'{filename}.txt')
        
        # Destination paths
        dest_img_path = os.path.join(dataset_dir, split, 'images', os.path.basename(image_path))
        dest_label_path = os.path.join(dataset_dir, split, 'labels', f'{filename}.txt')
        
        # Copy image
        shutil.copy(image_path, dest_img_path)
        
        # Convert and save annotation
        with open(annotation_path, 'r') as f:
            bbox_lines = f.readlines()
            
        with open(dest_label_path, 'w') as f:
            for line in bbox_lines:
                bbox = list(map(float, line.strip().split()))
                yolo_bbox = convert_to_yolo_format(bbox, img_width, img_height)
                f.write(' '.join(map(str, yolo_bbox)) + '\n')

Dataset conversion complete!


In [None]:
data_yaml = {
    'path': dataset_dir,  # Absolute path to dataset directory
    'train': os.path.join(dataset_dir, 'train', 'images'),
    'val': os.path.join(dataset_dir, 'val', 'images'),
    'test': os.path.join(dataset_dir, 'test', 'images'),
    'names': {
        0: 'car'
    }
}

# Write to file
with open('dataset.yaml', 'w') as f:
    yaml.dump(data_yaml, f, sort_keys=False)

Created dataset.yaml with path: c:\Users\Bora\Desktop\HU\AIN433\Assignment 3\dataset


---

## 2- Training

First, I define a global training function that can be used for all settings.

In [28]:
def train_yolo_frozen_blocks(blocks_to_freeze=3, optimizer='Adam', lr=0.001, batch_size=16, epochs=30):
    """
    Train YOLOv8n with specific blocks frozen and hyperparameters.
    
    Args:
        blocks_to_freeze: Number of blocks to freeze from the start
        optimizer: Optimizer to use ('Adam', 'SGD', 'AdamW')
        lr: Learning rate
        batch_size: Batch size for training
        epochs: Number of epochs
    
    Returns:
        Trained model and training results
    """
    # Load a pretrained YOLOv8n model
    model = YOLO(os.path.join(base_dir, 'yolov8n.pt'))
    
    # Freeze the first blocks_to_freeze blocks
    for i in range(blocks_to_freeze):
        for param in model.model.model[i].parameters():
            param.requires_grad = False
        print(f"Block {i} frozen")
    
    # Define training hyperparameters
    training_args = {
        'data': 'dataset.yaml',
        'epochs': epochs,
        'imgsz': 640,
        'batch': batch_size,
        'optimizer': optimizer.lower(),
        'lr0': lr,
        'name': f'freeze_{blocks_to_freeze}_opt_{optimizer}_lr_{lr}_batch_{batch_size}'
    }
    
    # Train the model
    results = model.train(**training_args)
    
    return model, results

Plotting is automatically done by YOLO trainer, making our lives much easier :)

In [64]:
def visualize_results(results):
    """
    Visualize training results and prediction comparisons from YOLOv8 output.
    
    Args:
        results: The results object returned by model.train()
    """
    # Get the save directory
    save_dir = results.save_dir
    print(f"Loading visualizations from: {save_dir}")
    
    # 1. Display the results.png which contains all the metrics and curves
    results_img_path = os.path.join(save_dir, 'results.png')
    if os.path.exists(results_img_path):
        plt.figure(figsize=(12, 8))
        img = plt.imread(results_img_path)
        plt.imshow(img)
        plt.axis('off')
        plt.title('Training Results and Metrics')
        plt.tight_layout()
        plt.show()
    else:
        print(f"Warning: Results image not found at {results_img_path}")
    
    # 2. Display validation batch images side by side (ground truth vs predictions)
    labels_img_path = os.path.join(save_dir, 'val_batch0_labels.jpg')
    preds_img_path = os.path.join(save_dir, 'val_batch0_pred.jpg')
    
    if os.path.exists(labels_img_path) and os.path.exists(preds_img_path):
        plt.figure(figsize=(18, 8))
        
        # Ground truth
        plt.subplot(1, 2, 1)
        img_labels = plt.imread(labels_img_path)
        plt.imshow(img_labels)
        plt.axis('off')
        plt.title('Ground Truth Labels')
        
        # Predictions
        plt.subplot(1, 2, 2)
        img_preds = plt.imread(preds_img_path)
        plt.imshow(img_preds)
        plt.axis('off')
        plt.title('Model Predictions')
        
        plt.tight_layout()
        plt.show()
    else:
        print(f"Warning: Validation images not found at {labels_img_path} or {preds_img_path}")

### 2.1- Freezing the First 3 Blocks

In [None]:
# Train with first 3 blocks frozen (stem + first two backbone blocks)
model, results = train_yolo_frozen_blocks(
    blocks_to_freeze=3,
    optimizer='Adam',
    lr=0.001,
    batch_size=16,
    epochs=30
)

visualize_results(results)

Block 0 frozen
Block 1 frozen
Block 2 frozen
New https://pypi.org/project/ultralytics/8.3.126 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.49  Python-3.10.0 torch-2.4.1+cu124 CUDA:0 (NVIDIA GeForce RTX 3080 Laptop GPU, 8192MiB)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=c:\Users\Bora\Desktop\HU\AIN433\Assignment 3\yolov8n.pt, data=dataset.yaml, epochs=30, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=freeze_3_opt_Adam_lr_0.001_batch_168, exist_ok=False, pretrained=True, optimizer=adam, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, 

[34m[1mtrain: [0mScanning C:\Users\Bora\Desktop\HU\AIN433\Assignment 3\dataset\train\labels.cache... 1000 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1000/1000 [00:00<?, ?it/s]
[34m[1mval: [0mScanning C:\Users\Bora\Desktop\HU\AIN433\Assignment 3\dataset\val\labels.cache... 200 images, 0 backgrounds, 0 corrupt: 100%|██████████| 200/200 [00:00<?, ?it/s]


Plotting labels to runs\detect\freeze_3_opt_Adam_lr_0.001_batch_168\labels.jpg... 
[34m[1moptimizer:[0m Adam(lr=0.001, momentum=0.937) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns\detect\freeze_3_opt_Adam_lr_0.001_batch_168[0m
Starting training for 30 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/30      5.69G      1.336      1.432     0.9808       1084        640: 100%|██████████| 63/63 [00:18<00:00,  3.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:02<00:00,  2.76it/s]

                   all        200      11906      0.963      0.876      0.957      0.659






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/30      6.94G      1.125     0.7389      0.914        742        640: 100%|██████████| 63/63 [00:28<00:00,  2.24it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:05<00:00,  1.21it/s]

                   all        200      11906      0.964       0.94      0.978      0.743






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/30      6.93G      1.032     0.6497     0.8939        952        640: 100%|██████████| 63/63 [00:30<00:00,  2.10it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:04<00:00,  1.49it/s]

                   all        200      11906      0.986       0.95      0.984      0.762






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/30      5.87G     0.9779     0.6038     0.8808        993        640: 100%|██████████| 63/63 [00:38<00:00,  1.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:03<00:00,  1.97it/s]


                   all        200      11906      0.986      0.944      0.986      0.778

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/30       4.5G     0.9485     0.5696     0.8724        731        640: 100%|██████████| 63/63 [00:35<00:00,  1.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:04<00:00,  1.75it/s]

                   all        200      11906      0.988      0.964       0.99      0.766






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/30      5.76G     0.9378     0.5531     0.8696        619        640: 100%|██████████| 63/63 [00:37<00:00,  1.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:05<00:00,  1.34it/s]

                   all        200      11906       0.99      0.968      0.991      0.781






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/30      8.75G     0.9265      0.531     0.8636        919        640: 100%|██████████| 63/63 [27:02<00:00, 25.76s/it] 
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [03:46<00:00, 32.34s/it]

                   all        200      11906       0.99      0.975      0.993      0.789






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/30      5.79G     0.8974      0.516     0.8608        533        640: 100%|██████████| 63/63 [00:44<00:00,  1.40it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:06<00:00,  1.14it/s]

                   all        200      11906      0.991      0.973      0.993      0.799






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/30      5.97G     0.8933     0.5065      0.859       1004        640: 100%|██████████| 63/63 [00:55<00:00,  1.13it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:08<00:00,  1.16s/it]


                   all        200      11906      0.992      0.974      0.993      0.794

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/30       8.4G     0.8773     0.4921     0.8554       1161        640: 100%|██████████| 63/63 [05:16<00:00,  5.03s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:28<00:00,  4.08s/it]

                   all        200      11906      0.988      0.977      0.993      0.798






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/30       5.4G     0.8631     0.4827     0.8528        917        640: 100%|██████████| 63/63 [00:46<00:00,  1.36it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:06<00:00,  1.09it/s]

                   all        200      11906      0.988      0.974      0.993      0.777






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/30      5.09G     0.8788     0.4827      0.855        697        640: 100%|██████████| 63/63 [00:48<00:00,  1.31it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:07<00:00,  1.11s/it]


                   all        200      11906      0.991      0.978      0.994      0.804

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/30      6.62G     0.8532     0.4738     0.8512        613        640: 100%|██████████| 63/63 [00:48<00:00,  1.29it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:06<00:00,  1.06it/s]

                   all        200      11906      0.991      0.979      0.994      0.807






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/30      4.32G      0.846     0.4637     0.8496        608        640: 100%|██████████| 63/63 [00:49<00:00,  1.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:06<00:00,  1.12it/s]

                   all        200      11906       0.99      0.982      0.994      0.806






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/30       5.5G     0.8379     0.4545     0.8479        904        640: 100%|██████████| 63/63 [00:45<00:00,  1.38it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:09<00:00,  1.42s/it]

                   all        200      11906       0.99      0.979      0.994      0.799






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/30      8.29G     0.8274     0.4496     0.8465        941        640: 100%|██████████| 63/63 [01:03<00:00,  1.01s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:05<00:00,  1.32it/s]

                   all        200      11906      0.991      0.981      0.994      0.808






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/30      6.31G     0.8232     0.4482     0.8465        656        640: 100%|██████████| 63/63 [00:48<00:00,  1.29it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:11<00:00,  1.65s/it]

                   all        200      11906       0.99      0.981      0.994      0.813






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/30      4.95G     0.8241     0.4444     0.8445       1115        640: 100%|██████████| 63/63 [00:51<00:00,  1.22it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:05<00:00,  1.18it/s]

                   all        200      11906      0.992      0.982      0.994      0.814






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/30      6.21G     0.8168      0.436     0.8425        719        640: 100%|██████████| 63/63 [00:48<00:00,  1.29it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:07<00:00,  1.11s/it]

                   all        200      11906      0.991      0.983      0.994      0.815






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/30      4.78G      0.805     0.4289     0.8411       1216        640: 100%|██████████| 63/63 [00:51<00:00,  1.23it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:06<00:00,  1.09it/s]

                   all        200      11906      0.992      0.982      0.994      0.812





Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      21/30      3.73G     0.7865     0.4559     0.8442        372        640: 100%|██████████| 63/63 [00:35<00:00,  1.77it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:04<00:00,  1.40it/s]

                   all        200      11906      0.991      0.981      0.994      0.808






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      22/30      3.89G     0.7671     0.4247     0.8382        667        640: 100%|██████████| 63/63 [00:36<00:00,  1.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:05<00:00,  1.39it/s]

                   all        200      11906      0.992       0.98      0.994      0.814






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      23/30      3.96G     0.7686     0.4168     0.8371        503        640: 100%|██████████| 63/63 [00:37<00:00,  1.70it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:06<00:00,  1.05it/s]

                   all        200      11906      0.993       0.98      0.994      0.819






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      24/30      3.57G     0.7552     0.4058      0.836        555        640: 100%|██████████| 63/63 [00:41<00:00,  1.53it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:05<00:00,  1.17it/s]

                   all        200      11906      0.993      0.981      0.994       0.82






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      25/30      4.38G     0.7416      0.398     0.8335        523        640: 100%|██████████| 63/63 [00:45<00:00,  1.39it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:06<00:00,  1.14it/s]

                   all        200      11906      0.993      0.981      0.994       0.82






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      26/30      4.13G     0.7384      0.394     0.8328        477        640: 100%|██████████| 63/63 [00:41<00:00,  1.51it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:06<00:00,  1.09it/s]

                   all        200      11906      0.994      0.981      0.994      0.821






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      27/30      3.48G     0.7392     0.3921     0.8324        506        640: 100%|██████████| 63/63 [00:41<00:00,  1.50it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:07<00:00,  1.05s/it]

                   all        200      11906      0.994       0.98      0.994      0.819






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      28/30       3.6G     0.7346      0.387     0.8328        365        640: 100%|██████████| 63/63 [00:49<00:00,  1.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:06<00:00,  1.14it/s]

                   all        200      11906      0.995      0.981      0.994      0.823






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      29/30      3.91G     0.7357     0.3873     0.8311        460        640: 100%|██████████| 63/63 [00:44<00:00,  1.41it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:08<00:00,  1.16s/it]

                   all        200      11906      0.994      0.981      0.994      0.823






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      30/30      4.79G     0.7246     0.3812     0.8297        479        640: 100%|██████████| 63/63 [00:42<00:00,  1.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:06<00:00,  1.01it/s]

                   all        200      11906      0.993      0.983      0.994      0.824






30 epochs completed in 1.024 hours.
Optimizer stripped from runs\detect\freeze_3_opt_Adam_lr_0.001_batch_168\weights\last.pt, 6.2MB
Optimizer stripped from runs\detect\freeze_3_opt_Adam_lr_0.001_batch_168\weights\best.pt, 6.2MB

Validating runs\detect\freeze_3_opt_Adam_lr_0.001_batch_168\weights\best.pt...
Ultralytics 8.3.49  Python-3.10.0 torch-2.4.1+cu124 CUDA:0 (NVIDIA GeForce RTX 3080 Laptop GPU, 8192MiB)
Model summary (fused): 168 layers, 3,005,843 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:16<00:00,  2.38s/it]


                   all        200      11906      0.993      0.983      0.994      0.824
Speed: 1.3ms preprocess, 7.0ms inference, 0.0ms loss, 8.6ms postprocess per image
Results saved to [1mruns\detect\freeze_3_opt_Adam_lr_0.001_batch_168[0m


KeyError: 'train/box_loss'

In [None]:
model2, results2 = train_yolo_frozen_blocks(
    blocks_to_freeze=3,
    optimizer='AdamW',  
    lr=0.001,
    batch_size=16,
    epochs=30
)

visualize_results(results2)

In [None]:
model3, results3 = train_yolo_frozen_blocks(
    blocks_to_freeze=3,
    optimizer='Adam',
    lr=0.003,  
    batch_size=32,  
    epochs=30
)

visualize_results(results3)