In [None]:
import os
import json
import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import DataLoader
from PIL import Image
from yolov5 import YOLOv5

In [None]:
class CustomDataset(Dataset):
    def __init__(self, directory, transform=None):
        self.directory = directory
        self.transform = transform
        self.images = []
        self.annotations = []

        for filename in os.listdir(directory):
            if filename.endswith('.jpeg'):
                img_path = os.path.join(directory, filename)
                ann_path = os.path.join(directory, filename.replace('.jpeg', '.json'))
                
                if os.path.exists(ann_path):
                    self.images.append(img_path)
                    self.annotations.append(ann_path)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        ann_path = self.annotations[idx]

        image = Image.open(img_path).convert('RGB')
        with open(ann_path, 'r') as f:
            annotation = json.load(f)

        categories = [ann['category_id'] for ann in annotation['annotations']]
        bboxes = [ann['bbox'] for ann in annotation['annotations']]

        image_width, image_height = image.size
        bboxes = [[(x + w / 2) / image_width, (y + h / 2) / image_height, w / image_width, h / image_height] 
                  for x, y, w, h in bboxes]

        target = {'boxes': torch.Tensor(bboxes), 'labels': torch.Tensor(categories).long()}
        
        if self.transform:
            image = self.transform(image)

        return image, target


In [None]:
transform = transforms.Compose([
    transforms.Resize((640, 640)),
    transforms.ToTensor(),
])

dataset = CustomDataset(directory="train", transform=transform)
data_loader = DataLoader(dataset, batch_size=16, shuffle=True, num_workers=4)

model = YOLOv5('yolov5s.pt', autoshape=True)
model.train()

In [None]:
# Loss function
class YOLOv5Loss(nn.Module):
    def __init__(self):
        super(YOLOv5Loss, self).__init__()
        self.mse_loss = nn.MSELoss() # GIoU another option
        self.bce_loss = nn.BCEWithLogitsLoss()
        self.ce_loss = nn.CrossEntropyLoss()

    def forward(self, predictions, targets):
        # Implement parsing of predictions and targets
        obj_preds, no_obj_preds, class_preds, box_preds = predictions
        obj_targets, no_obj_targets, class_targets, box_targets = targets
        objectness_loss = self.bce_loss(obj_preds, obj_targets)

        no_objectness_loss = self.bce_loss(no_obj_preds, no_obj_targets)

        classification_loss = self.ce_loss(class_preds, class_targets)

        box_loss = self.mse_loss(box_preds, box_targets)

        # Combine losses
        total_loss = (
            objectness_loss + 
            no_objectness_loss + 
            classification_loss + 
            box_loss
        )

        return total_loss

In [None]:
# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Training Loop
num_epochs = 10  
for epoch in range(num_epochs):
    for images, targets in data_loader:
        optimizer.zero_grad()
        
        outputs = model(images)
        
        loss = loss_function(outputs, targets)
        
        loss.backward()
        optimizer.step()