In [2]:
# Training custom model using Pytorch

In [3]:
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
!pip install ipykernel
!python -m ipykernel install --user --name=minke

Looking in indexes: https://download.pytorch.org/whl/cu121
Installed kernelspec minke in /home/minkescanor/snap/code/176/.local/share/jupyter/kernels/minke


In [4]:
# Import packages in pytorch
# Import packages for loading data Images
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Dataset
from PIL import Image

In [5]:
class YOLOv1(nn.Module):
    def __init__(self, S=7, B=2, C=1):
        """
        S: number of grid cells
        B: number of bounding boxes per grid cell
        C: number of classes
        """
        super(YOLOv1, self).__init__()
        self.S = S
        self.B = B
        self.C = C
        
        self.model = nn.Sequential(
            nn.Conv2d(3, 8, kernel_size=7, stride=2, padding=3),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=1),
            
            nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=1),
            
            nn.Conv2d(16, 16, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=1),
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=1),
            
           
            nn.Flatten(),
            nn.Linear(32 *58 *58, 32),  # Match the flattened size correctly
            nn.ReLU(),
            nn.Linear(32, self.S * self.S * (self.C + self.B * 5))
        )

    def forward(self, x):
        return self.model(x).reshape(-1, self.S, self.S, self.C + self.B * 5)


In [6]:
def yolov1_loss(predictions, target, S=7, B=2, C=1, lambda_coord=5, lambda_noobj=0.5):
    """
    Compute the loss function for YOLOv1
    """
    
    # Ensure target is a tensor
    target = torch.stack(target) if isinstance(target, (list, tuple)) else target
    predictions = predictions.reshape(-1, S, S, C + B * 5)
    target = target.reshape(-1, S, S, C + B * 5)
    
    # Separate the predictions
    pred_boxes = predictions[..., C:C + B * 4].reshape(-1, S, S, B, 4)  # x, y, w, h
    pred_scores = predictions[..., C + B * 4:C + B * 5].reshape(-1, S, S, B)  # confidence score
    
    target_boxes = target[..., C:C + B * 4].reshape(-1, S, S, B, 4)  # x, y, w, h
    target_scores = target[..., C + B * 4:C + B * 5].reshape(-1, S, S, B)  # confidence score
    
    # Calculate IoU (intersection over union) for each box
    ious = torch.zeros_like(pred_scores)  # Placeholder for IoU calculation
    
    # Loss for coordinate prediction (x, y, w, h)
    coord_loss = lambda_coord * torch.sum(
        target_scores * (torch.sum((pred_boxes - target_boxes) ** 2, dim=-1))
    )
    
    # Loss for confidence score prediction
    obj_loss = torch.sum(target_scores * ((pred_scores - ious) ** 2))
    noobj_loss = lambda_noobj * torch.sum((1 - target_scores) * (pred_scores ** 2))
    
    total_loss = coord_loss + obj_loss + noobj_loss
    
    return total_loss

In [34]:
def calculate_mAP(model, dataloader, iou_threshold=0.5):
    """
    Calculate mean Average Precision (mAP) for the YOLOv1 model
    """
    model.eval()
    all_detections = []
    all_ground_truths = []
    
    with torch.no_grad():
        for images, targets in dataloader:
            predictions = model(images)
            
            # Extract predicted boxes and scores
            pred_boxes = predictions[..., 1:5]  # x, y, w, h
            pred_scores = predictions[..., 0]  # Confidence scores
            
            for i in range(len(images)):
                boxes = pred_boxes[i].detach().cpu().numpy()
                scores = pred_scores[i].detach().cpu().numpy()
                all_detections.append((boxes, scores))
                
                gt_boxes = targets[i] if isinstance(targets[i], torch.Tensor) else torch.tensor(targets[i])
                gt_boxes = gt_boxes[..., 1:5].detach().cpu().numpy()
                all_ground_truths.append(gt_boxes)
    
    mean_ap = 0.0  # Placeholder for mAP calculation logic
    
    return mean_ap

In [None]:
def collate_fn(batch):
    images, targets = zip(*batch)
    images = torch.stack(images, 0)
    return images, targets
    
class YOLODataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.label_paths = []

        for phase in ["train", "val", "test"]:
            image_dir = os.path.join(root_dir, phase, "images")
            label_dir = os.path.join(root_dir, phase, "labels")
            
            for filename in os.listdir(image_dir):
                if filename.endswith(".jpg") or filename.endswith(".png"):
                    self.image_paths.append(os.path.join(image_dir, filename))
                    self.label_paths.append(os.path.join(label_dir, filename.replace(".jpg", ".txt").replace(".png", ".txt")))

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB")
        if self.transform:
            image = self.transform(image)
        
        target = torch.zeros((7, 7, 11))  # Shape: (S, S, C + B * 5)
        label_path = self.label_paths[idx]
        with open(label_path, 'r') as file:
            for line in file.readlines():
                class_label, x, y, width, height = map(float, line.strip().split())
                grid_x = int(x * 7)  # Convert to grid cell
                grid_y = int(y * 7)  # Convert to grid cell
                
                x_offset = x * 7 - grid_x
                y_offset = y * 7 - grid_y
                
                target[grid_y, grid_x, :5] = torch.tensor([1, x_offset, y_offset, width, height])
                target[grid_y, grid_x, 5:10] = torch.tensor([1, x_offset, y_offset, width, height])
                target[grid_y, grid_x, 10] = class_label
        
        return image/255 , target

In [36]:
def train_model(model, dataloader, epochs=10, learning_rate=0.001):
    """
    Train the YOLOv1 model
    """
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        for images, targets in dataloader:
            optimizer.zero_grad()
            predictions = model(images)
            loss = yolov1_loss(predictions, targets)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        
        mean_ap = calculate_mAP(model, dataloader)
        print(f"Epoch [{epoch + 1}/{epochs}], Loss: {epoch_loss / len(dataloader)}, mAP: {mean_ap}")

In [1]:
root_dir = "/home/minkescanor/Desktop/WORKPLACE/Hust/AI/Object_Detection/dataset"
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor()
])

dataset = YOLODataset(root_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)

model = YOLOv1(S=7, B=2, C=1)
train_model(model, dataloader, epochs=10)

NameError: name 'transforms' is not defined