# Deep Learning group assignment
Group name: Angry Birds

Group members:
- Nienke Reijnen: 2117034
- Andrea Ciavatti: 2115635
- Niels Boonstra: 1451294
- Yannick Lankhorst: 2052754
- Thom Zoomer:2059225
- Anne Barnasconi: 2053988

## Setting up the environment

Before running, make sure to also have installed the following packages (according to lab 8 instructions):
- pip install imageio
- pip install future
- pip install tensorboard

In [7]:
import os
import json
import shutil
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import Dataset
import json
from PIL import Image
from tqdm import tqdm
import random
from torchvision.transforms import functional
from torch.utils.data import ConcatDataset
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import itertools
# from ultralytics import YOLO
# %matplotlib inline

In [None]:
device, device_name = (torch.device("cuda"), torch.cuda.get_device_name(0)) if torch.cuda.is_available() else (torch.device("cpu"), "CPU")
print(f"Device: {device}, {device_name}")

## Data loading & preprocessing

- DONE Correct implementation of data loaders for images and annotations for your specific object detection model
- DONE Use of data augmentation techniques
- DONE Appropriate shuffling  and batching of data
- TO DO: Conduct an online search for relevant open-source datasets, and if you can find them, use them in your application as additional training data (to improve generalization)

### The code below needs to be run on your own laptop to convert the names of the scare_crow dataset

In [9]:
# Define base directory and dataset splits
base_dir = "datasets/scarecrow_dataset"
splits = ["train", "val", "test"]

# Allowed image extensions (all lowercased for matching)
image_extensions = (".png", ".jpg", ".jpeg")

for split in splits:
    print(f"\nProcessing {split} split...")

    image_dir = os.path.join(base_dir, split, "images")
    annotation_path = os.path.join(base_dir, split, "annotations.json")

    # Load annotations
    with open(annotation_path, "r") as f:
        annotations = json.load(f)

    print(f"Found {len(annotations)} annotation entries in annotations.json")

    # Loop through annotations and rename corresponding images
    for idx, annotation in enumerate(annotations, 1):
        old_name = annotation["OriginalFileName"]
        ext = os.path.splitext(old_name)[1].lower()  # Preserve the file extension
        new_name = f"{split}_original_{idx}{ext}"

        old_path = os.path.join(image_dir, old_name)
        new_path = os.path.join(image_dir, new_name)

        # Check if the image file exists before renaming
        if os.path.exists(old_path):
            os.rename(old_path, new_path)
            print(f"Renamed {old_name} to {new_name}")
        else:
            print(f"WARNING: Image file {old_name} not found in {image_dir}")
            continue

        # Update the annotation with the new image name
        annotation["OriginalFileName"] = new_name

    # Save updated annotations
    with open(annotation_path, "w") as f:
        json.dump(annotations, f, indent=4)

    print(f"{split} renamed and annotations updated.")


Processing train split...
Found 263 annotation entries in annotations.json
Renamed Schermafbeelding 2024-06-04 105530.png to train_original_1.png
Renamed Schermafbeelding 2024-06-04 102707.png to train_original_2.png
Renamed Schermafbeelding 2024-06-04 102122.png to train_original_3.png
Renamed Schermafbeelding 2024-06-04 105217.png to train_original_4.png
Renamed Schermafbeelding 2024-06-04 104532.png to train_original_5.png
Renamed Schermafbeelding 2024-06-04 102227.png to train_original_6.png
Renamed Schermafbeelding 2024-06-04 102744.png to train_original_7.png
Renamed Schermafbeelding 2024-06-04 104541.png to train_original_8.png
Renamed Schermafbeelding 2024-06-04 101800.png to train_original_9.png
Renamed Schermafbeelding 2024-06-04 102731.png to train_original_10.png
Renamed Schermafbeelding 2024-06-04 105154.png to train_original_11.png
Renamed Schermafbeelding 2024-06-04 105244.png to train_original_12.png
Renamed Schermafbeelding 2024-06-04 101719.png to train_original_13.p

### Data loading & augmentation

In [10]:
######################################
### Defining a CustomDataset class ###
######################################


class CustomDataset(Dataset):
    def __init__(self, data_path, transform = None):
        """
        Initialize the custom dataset.
        Works for both the train data and the test data.
        """
        self.images_dir = os.path.join(data_path, "images")
        self.transform = transform
        annotations_file = data_path + "/annotations.json"
        with open(annotations_file, 'r') as f:
            annotations_list = json.load(f)
       
        # We need to extract the bounding boxes of the annotations from the JSON file and store them as [x_min, y_min, x_max, y_max] tensors
        self.data = []
        for entry in annotations_list:
            image_name = entry['OriginalFileName']
            annotation_data = entry['AnnotationData']
            bird_boxes = self.extract_bird_boxes(annotation_data)
            self.data.append({'imagename': image_name, 'bird_boxes_tensor': bird_boxes})

        # Note: we should not load all the images into a tensor here, as it would take too much memory. We load images into a tensor in the __getitem__ method.


    def extract_bird_boxes(self, annotation_data):
        """
        Extract the coordinates of the birds from the annotation data in the JSON file and return it as a tensor.
        """
        bird_boxes = []
        for entry in annotation_data:
            if entry['Label'] == 'Bird':
                coordinates_list = entry['Coordinates']
                x_coordinates = [point['X'] for point in coordinates_list]
                y_coordinates = [point['Y'] for point in coordinates_list]
                x_min, x_max = min(x_coordinates), max(x_coordinates)
                y_min, y_max = min(y_coordinates), max(y_coordinates)
                bird_boxes.append([x_min, y_min, x_max, y_max])

        return torch.tensor(bird_boxes, dtype=torch.float32) # Shape: (num_birds, 4)


    def __len__(self):
        """
        Return the size of the dataset, i.e. the number of images.
        """
        return len(self.data)


    def __getitem__(self, index):
        """
        Load an image and its corresponding annotations.
        Returns the image and a target dictionary with bounding boxes and labels (we need this for compatiblity with object detection models like Faster R-CNN)
        """
        item = self.data[index]
        image_path = os.path.join(self.images_dir, item['imagename'])
        image = Image.open(image_path).convert("RGB")
        
        bird_boxes = item['bird_boxes_tensor']
        labels = torch.ones((bird_boxes.shape[0],), dtype=torch.int64) # Assuming all the labels are 'Bird' --> we assign this to class 1
        target = {'boxes': bird_boxes, 'labels': labels} # should contain the bounding boxes and the labels

        # Apply data augmentations
        if self.transform is not None:
            image, target = self.transform(image, target)

        return image, target


In [12]:
###############################################
### Finding the mean and std of the dataset ###
###############################################

def calculate_mean_and_std(dataset):
    # Initialize sums for mean and variance
    mean = torch.zeros(3)
    std = torch.zeros(3)
    num_pixels = 0

    # Use tqdm to add a progress bar
    for image, _ in tqdm(dataset, desc="Calculating Mean and Std", unit="image"):
        # Convert image to tensor if it is in PIL format
        image = transforms.ToTensor()(image)  # shape: (C, H, W)
        
        # Calculate the sum and squared sum of pixels for each channel
        mean += image.mean([1, 2])  # mean per channel (C,)
        std += image.std([1, 2])    # std per channel (C,)
        num_pixels += 1
    
    # Average the sums to get the mean and std
    mean /= num_pixels
    std /= num_pixels
    
    return mean, std

# For now, do no transformations:
train_data_original = CustomDataset("datasets/scarecrow_dataset/train", transform=None)
train_data_extra = CustomDataset("datasets/bird-detection-farm/train", transform=None)

train_data = torch.utils.data.ConcatDataset([train_data_original, train_data_extra])

mean, std = calculate_mean_and_std(train_data)
print(f"Dataset Mean: {mean}")
print(f"Dataset Std: {std}")


Calculating Mean and Std: 100%|██████████| 381/381 [01:55<00:00,  3.31image/s]


Dataset Mean: tensor([0.5409, 0.5505, 0.3894])
Dataset Std: tensor([0.1674, 0.1557, 0.1689])


Dataset Mean: tensor([0.5390, 0.5306, 0.4421])

Dataset Std: tensor([0.1624, 0.1527, 0.1647])

### With the extra data

Dataset Mean: tensor([0.5409, 0.5505, 0.3894])

Dataset Std: tensor([0.1674, 0.1557, 0.1689])

In [13]:
#############################################
### Defining a CustomTransformation class ###
##############################################
import math
import torchvision.transforms.functional as TF

random.seed(7)
torch.manual_seed(7)
mean = [0.5409, 0.5505, 0.3894]
std = [0.1674, 0.1557, 0.1689]

class CustomTransformation:
    def __init__(self):
        self.transforms = transforms.Compose([transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
                                              transforms.ToTensor(),
                                              transforms.Normalize(mean, std)])
        

    def perform_horizontal_flip(self, image, target, prob = 0.25):
        """
        Horizontally flips the image with a given probability, default is 0.25
        """
        flip_prob = prob
        if random.random() < flip_prob:
            # Flip the image
            image = functional.hflip(image)

            # Flip the bounding boxes
            boxes = target['boxes']
            width, _ = image.size
            x_min = boxes[:, 0].clone()
            x_max = boxes[:, 2].clone()
            boxes[:, 0] = width - x_max
            boxes[:, 2] = width - x_min
        
        return image, target
    

    def perform_vertical_flip(self, image, target, prob = 0.25):
        """
        Vertically flips the image with a given probability, default is 0.25
        """
        flip_prob = prob
        if random.random() < flip_prob:
            # Flip the image
            image = functional.vflip(image)

            # Flip the bounding boxes
            boxes = target['boxes']
            _, height = image.size
            y_min = boxes[:, 1].clone()
            y_max = boxes[:, 3].clone()
            boxes[:, 1] = height - y_max
            boxes[:, 3] = height - y_min
            target['boxes'] = boxes

        return image, target
    

    def rotate_box(self, boxes, angle, img_width, img_height):
        # Rotate in the opposite (clockwise) direction to match torchvision's CCW rotation
        angle_rad = math.radians(-angle)

        cx, cy = img_width / 2, img_height / 2

        new_boxes = []
        for box in boxes:
            x0, y0, x1, y1 = box.tolist()
            corners = [
                [x0, y0],
                [x1, y0],
                [x1, y1],
                [x0, y1]
            ]
            rotated = []
            for x, y in corners:
                # Translate to origin
                x -= cx
                y -= cy
                # Rotate
                x_new = x * math.cos(angle_rad) - y * math.sin(angle_rad)
                y_new = x * math.sin(angle_rad) + y * math.cos(angle_rad)
                # Translate back
                x_new += cx
                y_new += cy
                rotated.append([x_new, y_new])
            rotated = torch.tensor(rotated)
            x_min, y_min = rotated.min(dim=0).values
            x_max, y_max = rotated.max(dim=0).values
            new_boxes.append([x_min, y_min, x_max, y_max])
        return torch.tensor(new_boxes)


    def perform_random_rotation(self, image, target, prob=0.25, rotations=[90, 180, 270]):
        if random.random() < prob:
            angle = random.choice(rotations)
            w, h = image.size
            image = TF.rotate(image, angle)  # CCW rotation
            boxes = target['boxes']
            target['boxes'] = self.rotate_box(boxes, angle, w, h)
        return image, target



    def perform_random_resize(self, image, target, scale_range=(0.75, 1.25)):
        """
        Perform a random reize within the specified scale range, default scale range is (0,75, 1.25)
        """
        scale = random.uniform(*scale_range)

        # Resize the image
        width, height = image.size
        new_height, new_width = int(height * scale), int(width * scale)
        image = functional.resize(image, [new_height, new_width])
        
        # Resize the boxes
        boxes = target['boxes']
        boxes = boxes * scale
        target['boxes'] = boxes
        
        return image, target


    def __call__(self, image, target):
        """
        Apply the transformations to an image
        """
        image, target = self.perform_horizontal_flip(image, target)
        image, target = self.perform_vertical_flip(image, target)
        image, target = self.perform_random_rotation(image, target)
        image, target = self.perform_random_resize(image, target)
        image = self.transforms(image)
        return image, target

In [15]:
batch_size = 32

# Define paths
train_data_original_path = "datasets/scarecrow_dataset/train"
test_data_original_path = "datasets/scarecrow_dataset/test"
val_data_original_path = "datasets/scarecrow_dataset/val"
train_data_extra_path = "datasets/bird-detection-farm/train"
valid_data_extra_path = "datasets/bird-detection-farm/valid"
test_data_extra_path = "datasets/bird-detection-farm/test"

# Loading the datasets with the transformations
transform = CustomTransformation()

train_data_raw = CustomDataset(train_data_original_path)
train_data_original = CustomDataset(train_data_original_path, transform)
valid_data_original = CustomDataset(val_data_original_path, transform)
test_data_original = CustomDataset(test_data_original_path, transform)

train_data_extra = CustomDataset(train_data_extra_path, transform)
valid_data_extra = CustomDataset(valid_data_extra_path, transform)
test_data_extra = CustomDataset(test_data_extra_path, transform)

# Split old training set into train/val
#train_data_original, valid_data_original = torch.utils.data.random_split(train_data_original, [0.8, 0.2])

# Combine datasets
train_data = torch.utils.data.ConcatDataset([train_data_original, train_data_extra])
valid_data = torch.utils.data.ConcatDataset([valid_data_original, valid_data_extra])
test_data = torch.utils.data.ConcatDataset([test_data_original, test_data_extra])

# Loaders
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
# No shuffling for validation and test data because we want consistnt order for reproducibility:
val_loader = DataLoader(valid_data, batch_size=batch_size)
test_loader = DataLoader(test_data, batch_size=batch_size)


In [16]:
# Fetch one image and its corresponding target from the dataset
image, target = train_data_original[88]  # Replace 0 with any index to fetch a different image

# Convert the image tensor to a NumPy array for visualization
image_np = image.permute(1, 2, 0).numpy()  # Convert from (C, H, W) to (H, W, C)
image_np = (image_np * std + mean).clip(0, 1)  # Denormalize the image

# Visualize the image with bounding boxes
import matplotlib.pyplot as plt
import matplotlib.patches as patches

fig, ax = plt.subplots(1, figsize=(10, 10))
ax.imshow(image_np)

# Draw bounding boxes
for box in target['boxes']:
    x_min, y_min, x_max, y_max = box
    rect = patches.Rectangle(
        (x_min, y_min), x_max - x_min, y_max - y_min,
        linewidth=2, edgecolor='r', facecolor='none'
    )
    ax.add_patch(rect)

plt.axis('off')
plt.show()

<Figure size 1000x1000 with 1 Axes>

In [None]:
###################################
### Debugging data augmentation ###
###################################

# import copy
# import numpy as np

# random.seed(42)
# torch.manual_seed(42)

# # Choose the index of an image to test
# idx = 55
# image_raw, target_raw = train_data_raw[idx]

# def visualize(image, boxes, title="Image"):
#     # Convert PIL image to NumPy
#     if isinstance(image, torch.Tensor):
#         image = image.permute(1, 2, 0).numpy()
#     elif hasattr(image, 'size') and not isinstance(image, np.ndarray):
#         # It's a PIL image — convert to NumPy and scale to [0, 1]
#         image = np.array(image).astype('float32') / 255.0

#     image = np.clip(image, 0, 1)

#     fig, ax = plt.subplots(1, figsize=(8, 8))
#     ax.imshow(image)

#     for box in boxes:
#         x_min, y_min, x_max, y_max = box
#         rect = patches.Rectangle((x_min, y_min), x_max - x_min, y_max - y_min,
#                                  linewidth=2, edgecolor='red', facecolor='none')
#         ax.add_patch(rect)

#     plt.title(title)
#     plt.axis('off')
#     plt.show()

# transform = CustomTransformation()

# print(image_raw, target_raw)

# # Test individual transformations manually
# image_aug, target_aug = transform.perform_horizontal_flip(
#     copy.deepcopy(image_raw), {'boxes': target_raw['boxes'].clone()}, prob=1.0
# )

# image_aug3, target_aug3 = transform.perform_vertical_flip(
#     copy.deepcopy(image_raw), {'boxes': target_raw['boxes'].clone()}, prob=1.0
# )

# image_aug2, target_aug2 = transform.perform_random_rotation(
#     copy.deepcopy(image_raw), {'boxes': target_raw['boxes'].clone()}, prob=1.0
# )

# image_aug4, target_aug4 = transform.perform_random_rotation(
#     copy.deepcopy(image_raw), {'boxes': target_raw['boxes'].clone()}, prob=1.0, rotations = [90]
# )

# image_aug5, target_aug5 = transform.perform_random_rotation(
#     copy.deepcopy(image_raw), {'boxes': target_raw['boxes'].clone()}, prob=1.0, rotations = [180]
# )

# image_aug6, target_aug6 = transform.perform_random_rotation(
#     copy.deepcopy(image_raw), {'boxes': target_raw['boxes'].clone()}, prob=1.0, rotations = [270])

# # Visualize before/after
# visualize(image_raw, target_raw['boxes'], title="Original")
# visualize(image_aug, target_aug['boxes'], title="Horizontally Flipped")
# visualize(image_aug3, target_aug3['boxes'], title="Vertically Flipped")
# visualize(image_aug4, target_aug4['boxes'], title="90 degree rotation")
# visualize(image_aug5, target_aug5['boxes'], title="180 degree rotation")
# visualize(image_aug6, target_aug6['boxes'], title="270 degree rotation")

In [18]:
import json
import os

#this changes the annations format to Txt which Yolo can read. 
def convert_to_yolo_format(data_path, annotations_file, output_dir):
    # Load annotations JSON file
    with open(annotations_file, 'r') as f:
        annotations_list = json.load(f)

    # Ensure output directories exist
        os.makedirs(os.path.join(output_dir, 'labels'), exist_ok=True)

    #dit moest blijkbaar
    class_map = {"Bird": 0}

    # ocess each image
    for entry in annotations_list:
        image_name = entry['OriginalFileName']
        annotation_data = entry['AnnotationData']


        # Load image to get width and height
        image_path = os.path.join(data_path, 'images', image_name)
        with Image.open(image_path) as img:
            img_width, img_height = img.size
            #print(f"Image size: {img_width} x {img_height}")

        # Create the label file for this image
        label_file = os.path.join(output_dir, 'labels', os.path.splitext(image_name)[0] + '.txt')

        with open(label_file, 'w') as label_f:
            for obj in annotation_data:
                class_name = obj['Label']  # 'Label' field in your data
                
                if class_name in class_map:
                    # Get the coordinates (bounding box)
                    coordinates = obj['Coordinates']
                    
                    # Calculate bounding box (x_min, y_min, width, height)
                    x_min = min([coord['X'] for coord in coordinates])
                    y_min = min([coord['Y'] for coord in coordinates])
                    x_max = max([coord['X'] for coord in coordinates])
                    y_max = max([coord['Y'] for coord in coordinates])

                    # YOLO format: class_id x_center y_center width height (all normalized)
                    class_id = class_map[class_name]
                    x_center = (x_min + x_max) / 2 / img_width
                    y_center = (y_min + y_max) / 2 / img_height
                    norm_width = (x_max - x_min) / img_width
                    norm_height = (y_max - y_min) / img_height
                    
                    # Write the YOLO annotation to the label file
                    label_f.write(f"{class_id} {x_center} {y_center} {norm_width} {norm_height}\n")
                    
           
        print("Created txt for:"+image_name)
        
    
    

data_path = "datasets/scarecrow_dataset/val"  # Path to the train folder
annotations_file = "datasets/scarecrow_dataset/val/annotations.json"  # Path to annotations.json
output_dir = "datasets/scarecrow_dataset/val"  # Output directory for YOLO annotations
convert_to_yolo_format(data_path, annotations_file, output_dir)
data_path = "datasets/scarecrow_dataset/train"  # Path to the train folder
annotations_file = "datasets/scarecrow_dataset/train/annotations.json"  # Path to annotations.json
output_dir = "datasets/scarecrow_dataset/train"  # Output directory for YOLO annotations
convert_to_yolo_format(data_path, annotations_file, output_dir)
data_path = "datasets/scarecrow_dataset/test"  # Path to the train folder
annotations_file = "datasets/scarecrow_dataset/test/annotations.json"  # Path to annotations.json
output_dir = "datasets/scarecrow_dataset/test"  # Output directory for YOLO annotations
convert_to_yolo_format(data_path, annotations_file, output_dir)


Created txt for:val_original_1.png
Created txt for:val_original_2.png
Created txt for:val_original_3.png
Created txt for:val_original_4.png
Created txt for:val_original_5.png
Created txt for:val_original_6.png
Created txt for:val_original_7.png
Created txt for:val_original_8.png
Created txt for:val_original_9.png
Created txt for:val_original_10.png
Created txt for:val_original_11.png
Created txt for:val_original_12.png
Created txt for:val_original_13.png
Created txt for:val_original_14.png
Created txt for:val_original_15.png
Created txt for:val_original_16.png
Created txt for:val_original_17.png
Created txt for:val_original_18.png
Created txt for:val_original_19.png
Created txt for:val_original_20.png
Created txt for:val_original_21.png
Created txt for:val_original_22.png
Created txt for:val_original_23.png
Created txt for:val_original_24.jpg
Created txt for:val_original_25.jpg
Created txt for:val_original_26.jpg
Created txt for:val_original_27.png
Created txt for:val_original_28.png
C

In [None]:
from ultralytics import YOLO

model = YOLO("yolo11n.pt")  # Load YOLO model
# Example: Add dropout to YOLO layers

model.train(
    data='data.yaml', # Path to dataset YAML file
    epochs=50,                          
    imgsz=640,                          
    batch=32,                           
    device='cpu'                             # Set to 0 for GPU, 'cpu' for CPU
)

# Evaluate the model's performance on the validation set
metrics = model.val()

# Perform object detection on an image
results = model("datasets/scarecrow_dataset/test/images/20240901120856_0268_D_frame_1020 - kopie.png")  # Predict on an image
results[0].show()  # Display results
#model.save('yolov8_trained.pt') # saved yolov8s model

Ultralytics 8.3.128  Python-3.12.7 torch-2.6.0+cpu CPU (11th Gen Intel Core(TM) i7-1195G7 2.90GHz)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=32, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo11n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train4, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=0.0, plots=True, pose=12.0, 

100%|██████████| 755k/755k [00:00<00:00, 12.9MB/s]

Overriding model.yaml nc=80 with nc=1

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      6640  ultralytics.nn.modules.block.C3k2            [32, 64, 1, False, 0.25]      
  3                  -1  1     36992  ultralytics.nn.modules.conv.Conv             [64, 64, 3, 2]                
  4                  -1  1     26080  ultralytics.nn.modules.block.C3k2            [64, 128, 1, False, 0.25]     
  5                  -1  1    147712  ultralytics.nn.modules.conv.Conv             [128, 128, 3, 2]              
  6                  -1  1     87040  ultralytics.nn.modules.block.C3k2            [128, 128, 1, True]           
  7                  -1  1    295424  ultralytics




 22                  -1  1    378880  ultralytics.nn.modules.block.C3k2            [384, 256, 1, True]           
 23        [16, 19, 22]  1    430867  ultralytics.nn.modules.head.Detect           [1, [64, 128, 256]]           
YOLO11n summary: 181 layers, 2,590,035 parameters, 2,590,019 gradients, 6.4 GFLOPs

Transferred 448/499 items from pretrained weights
Freezing layer 'model.23.dfl.conv.weight'
[34m[1mtrain: [0mFast image access  (ping: 0.20.1 ms, read: 601.177.4 MB/s, size: 10263.1 KB)


[34m[1mtrain: [0mScanning C:\Users\thomz\Documents\JADS\Master\Deep Learning\DL_angrybirds\datasets\scarecrow_dataset\train\labels... 263 images, 20 backgrounds, 0 corrupt: 100%|██████████| 263/263 [00:10<00:00, 24.43it/s] 

[34m[1mtrain: [0mNew cache created: C:\Users\thomz\Documents\JADS\Master\Deep Learning\DL_angrybirds\datasets\scarecrow_dataset\train\labels.cache
[34m[1mval: [0mFast image access  (ping: 0.30.5 ms, read: 482.753.3 MB/s, size: 7541.8 KB)



[34m[1mval: [0mScanning C:\Users\thomz\Documents\JADS\Master\Deep Learning\DL_angrybirds\datasets\scarecrow_dataset\val\labels... 32 images, 5 backgrounds, 0 corrupt: 100%|██████████| 32/32 [00:01<00:00, 24.28it/s]

[34m[1mval: [0mC:\Users\thomz\Documents\JADS\Master\Deep Learning\DL_angrybirds\datasets\scarecrow_dataset\val\images\DJI_0319.JPG: 1 duplicate labels removed
[34m[1mval: [0mNew cache created: C:\Users\thomz\Documents\JADS\Master\Deep Learning\DL_angrybirds\datasets\scarecrow_dataset\val\labels.cache





Plotting labels to c:\Users\thomz\Documents\JADS\Master\Deep Learning\angry_birds\DL_angrybirds\runs\detect\train4\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 81 weight(decay=0.0), 88 weight(decay=0.0005), 87 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mc:\Users\thomz\Documents\JADS\Master\Deep Learning\angry_birds\DL_angrybirds\runs\detect\train4[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/50         0G      2.674      8.067      1.044        212        640:  56%|█████▌    | 5/9 [01:46<01:25, 21.26s/it]


KeyboardInterrupt: 

In [None]:
import model_utils
import importlib
importlib.reload(model_utils)

# Initialize YOLOTrainer with your model path and device (e.g., 'cpu' or 'cuda')
trainer = model_utils.YOLOModel(model_path='yolov8n.pt', device='cpu')  # Use 'cuda' if you want to use a GPU

# Train the model (with your DataLoader for training and validation)
trainer.train(data_yaml='data.yaml', epochs=10, imgsz=640, batch_size=16)


# After training, test the model on the test dataset
test_accuracy = trainer.test(test_loader)
print(f'Test Accuracy: {test_accuracy:.4f}')

AttributeError: 'YOLOModel' object has no attribute 'train_loop'

In [None]:
# This checks all the hyperparameters and finds the best one
learning_rates = [1e-5, 1e-4, 1e-3]
batch_sizes = [32]
optimizers = ['SGD', 'Adam']
momentums = [0.9, 0.99]  # Only used for SGD
weight_decays = [1e-6, 1e-5, 1e-4]
image_sizes = [480, 640]

# Initialize YOLO model
model_path = 'yolo11n.pt'
data_path = './data.yaml'

# Iterate over all combinations of hyperparameters
best_map = 0
best_params = None

for lr, batch_size, optimizer, img_size, weight_decay in itertools.product(
    learning_rates, batch_sizes, optimizers, image_sizes, weight_decays
):
    # Set momentum only for SGD
    momentum = 0.9 if optimizer == 'SGD' else None

    # Initialize YOLO model
    model = YOLO(model_path)

    # Train the model
    results = model.train(
        data=data_path,
        epochs=15,
        imgsz=img_size,
        batch=batch_size,
        device=0,
        lr0=lr,
        optimizer=optimizer,
        momentum=momentum,
        weight_decay=weight_decay
    )

    # Get the metric to optimize (e.g., mAP@50)
    map50 = results.maps

    # Log the results
    print(f"lr: {lr}, batch_size: {batch_size}, optimizer: {optimizer}, img_size: {img_size}, weight_decay: {weight_decay}, mAP@50: {map50}")

    # Update the best parameters
    if map50 > best_map:
        best_map = map50
        best_params = {
            'lr': lr,
            'batch_size': batch_size,
            'optimizer': optimizer,
            'momentum': momentum,
            'weight_decay': weight_decay,
            'img_size': img_size
        }

# Print the best hyperparameters
print("Best hyperparameters:", best_params)
print("Best mAP@50:", best_map)
#Application of regularization techniques (e.g., dropout, batch normalization) 

In [None]:
#This is there to check if the Annotations are working correctly.
def visualize_yolo_annotations(image_path, label_path, class_names=None):
 
    # Load the image
    image = Image.open(image_path).convert("RGB")
    img_width, img_height = image.size

    # Create a plot
    fig, ax = plt.subplots(1, figsize=(10, 10))
    ax.imshow(image)

    # Read the YOLO label file
    with open(label_path, 'r') as f:
        lines = f.readlines()

    # Parse each line in the label file
    for line in lines:
        parts = line.strip().split()
        class_id = int(parts[0])
        x_center, y_center, width, height = map(float, parts[1:])

        # Convert normalized coordinates to absolute pixel values
        x_center *= img_width
        y_center *= img_height
        width *= img_width
        height *= img_height

        # Calculate the top-left corner of the bounding box
        x_min = x_center - (width / 2)
        y_min = y_center - (height / 2)

        # Create a rectangle patch
        rect = patches.Rectangle(
            (x_min, y_min), width, height,
            linewidth=2, edgecolor='r', facecolor='none'
        )
        ax.add_patch(rect)

        # Add class label (if provided)
        if class_names:
            ax.text(
                x_min, y_min - 5, class_names[class_id],
                color='red', fontsize=12, backgroundcolor='white'
            )

    plt.axis('off')
    plt.show()

# Example usage
image_path = "scarecrow_dataset/train/images/train_original_89.png"
label_path = "scarecrow_dataset/train/labels/train_original_89.txt"
class_names = ["Bird"]  #

visualize_yolo_annotations(image_path, label_path, class_names)