In [45]:
from ultralytics import YOLO

In [46]:
import torch
from torch.utils.data import Dataset, DataLoader
import cv2
import os

def custom_collate_fn(batch):
    images, labels = zip(*batch)  # Separate images and labels

    # Stack images into a single tensor
    images = torch.stack(images)

    # Prepare targets (handle empty label cases)
    batch_idx = []
    cls_labels = []
    bboxes = []

    for i, label in enumerate(labels):
        if len(label) > 0:
            batch_idx.append(torch.full((label.shape[0], 1), i, dtype=torch.float32))  # Batch index
            cls_labels.append(label[:, 0].unsqueeze(1))  # Class ID
            bboxes.append(label[:, 1:])  # Bounding boxes (cx, cy, w, h)

    if len(batch_idx) > 0:
        batch_idx = torch.cat(batch_idx).to(images.device)
        cls_labels = torch.cat(cls_labels).to(images.device)
        bboxes = torch.cat(bboxes).to(images.device)
    else:
        batch_idx = torch.empty((0, 1), dtype=torch.float32).to(images.device)
        cls_labels = torch.empty((0, 1), dtype=torch.float32).to(images.device)
        bboxes = torch.empty((0, 4), dtype=torch.float32).to(images.device)

    batch_dict = {
        "img": images,
        "batch_idx": batch_idx,
        "cls": cls_labels,
        "bboxes": bboxes
    }
    
    return batch_dict


class YoloDataset(Dataset):
    def __init__(self, img_dir, label_dir, img_size=640, transform=None):
        self.img_dir = img_dir
        self.label_dir = label_dir
        self.img_size = img_size
        self.transform = transform
        self.image_files = [f for f in os.listdir(img_dir) if f.endswith(('.jpg', '.png'))]
    
    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, index):
        # Load image
        img_path = os.path.join(self.img_dir, self.image_files[index])
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (self.img_size, self.img_size))
        img = torch.tensor(img, dtype=torch.float32).permute(2, 0, 1) / 255.0  # Normalize to [0,1]

        # Load label (NO CONVERSION)
        label_path = os.path.join(self.label_dir, self.image_files[index].replace(".jpg", ".txt").replace(".png", ".txt"))
        labels = torch.zeros((0, 5), dtype=torch.float32)  # Default empty tensor for images without labels

        if os.path.exists(label_path):
            labels = torch.tensor([list(map(float, line.strip().split())) for line in open(label_path)], dtype=torch.float32)

        return img, labels  # Labels remain in (class_id, x_center, y_center, width, height) format

# Paths
train_img_dir = "../datasets/cocoa_diseases/images/train"
train_label_dir = "../datasets/cocoa_diseases/labels/train"

# Create DataLoader
train_dataset = YoloDataset(train_img_dir, train_label_dir)
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True, collate_fn=custom_collate_fn)


In [82]:
def match_targets_to_grid(targets, grid_size, num_classes):
    """
    Converts target bounding boxes to the YOLO feature map format.

    Args:
        targets: (num_objects, 6) tensor with (batch_idx, class_id, x_center, y_center, width, height).
        grid_size: The grid size (e.g., 80 for 80x80 feature maps).
        num_classes: Total number of object classes.

    Returns:
        target_map: Tensor shaped (batch_size, num_classes + 5, grid_size, grid_size)
    """

    batch_size = int(targets[:, 0].max().item()) + 1  # Get batch size from batch index
    target_map = torch.zeros((batch_size, num_classes + 5, grid_size, grid_size), dtype=torch.float32)

    for obj in targets:
        batch_idx, class_id, x, y, w, h = obj.tolist()
        batch_idx = int(batch_idx)
        class_id = int(class_id)

        # Convert to grid coordinates
        grid_x = int(x * grid_size)
        grid_y = int(y * grid_size)

        # Normalize box coordinates to the cell level
        x_cell = (x * grid_size) - grid_x
        y_cell = (y * grid_size) - grid_y
        w_cell = w * grid_size
        h_cell = h * grid_size

        # Set the target values in the grid cell
        target_map[batch_idx, class_id, grid_y, grid_x] = 1  # One-hot class label
        target_map[batch_idx, -5:, grid_y, grid_x] = torch.tensor([1, x_cell, y_cell, w_cell, h_cell])  # Objectness + bbox

    return target_map


In [83]:
import torch.nn.functional as F

class CustomYOLOLoss(nn.Module):
    def __init__(self, num_classes, grid_size=80):
        super().__init__()
        self.num_classes = num_classes
        self.grid_size = grid_size

    def forward(self, preds, targets):
        """
        Compute loss between model predictions and ground truth.
        Args:
            preds: List of 3 tensors (YOLO outputs at different scales)
            targets: (num_objects, 6) tensor with (batch_idx, class_id, x_center, y_center, width, height)
        """
        # Extract first scale output for simplicity
        preds = preds[0]  # Shape: (batch, 67, 80, 80)
        batch_size = preds.shape[0]

        # Convert targets to grid format
        target_map = match_targets_to_grid(targets, self.grid_size, self.num_classes).to(preds.device)

        # Split predictions into class scores, objectness, and bounding boxes
        pred_cls = preds[:, :-5, :, :]  # Class predictions
        pred_obj = preds[:, -5, :, :]   # Objectness score
        pred_boxes = preds[:, -4:, :, :]  # Bounding box predictions

        # Split targets the same way
        target_cls = target_map[:, :-5, :, :]
        target_obj = target_map[:, -5, :, :]
        target_boxes = target_map[:, -4:, :, :]

        # Compute losses
        class_loss = F.binary_cross_entropy_with_logits(pred_cls, target_cls)
        objectness_loss = F.binary_cross_entropy_with_logits(pred_obj, target_obj)
        box_loss = F.l1_loss(pred_boxes, target_boxes)

        return class_loss + objectness_loss + box_loss



In [96]:
import torch
import torch.nn.utils.prune as prune

# Apply pruning to YOLOv11 model
def apply_pruning(model, amount=0.5):
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Conv2d) or isinstance(module, torch.nn.Linear):
            prune.l1_unstructured(module, name="weight", amount=amount)
            module.weight_mask.requires_grad = False  # Ensure mask is not updated

# Function to enforce sparsity during training
def enforce_sparsity(model):
    with torch.no_grad():
        for name, module in model.named_modules():
            if isinstance(module, torch.nn.Conv2d) or isinstance(module, torch.nn.Linear):
                if hasattr(module, "weight_mask"):  # Check if pruning was applied
                    module.weight *= module.weight_mask  # Reapply sparsity


        


    
    
# def train_sparse_yolo_l1(model, dataloader, optimizer, device, num_epochs=5, l1_lambda=0.0001):
    
#     model = model.model.train()

#     for epoch in range(num_epochs):
#         for batch in dataloader:
#             # Move entire batch to device
#             batch = {k: v.to(device) for k, v in batch.items()}
#             images = batch["img"]  # Extract images from batch

#             optimizer.zero_grad()

#             # Forward pass
#             preds = model(images)
            
#             print(preds)
            

#             # Compute loss (including L1 regularization)
#             loss, _ = model.loss(tuple(preds), batch)

#             # L1 Regularization on model weights
#             l1_loss = sum(param.abs().sum() for param in model.parameters())
#             total_loss = loss + l1_lambda * l1_loss

#             # Backpropagation
#             total_loss.backward()
#             optimizer.step()


            
#            print(f"feats:{feats}")
            
            # for xi in feats:
            #     print(xi.shape)
            
            # print(f"Preds type: {type(preds)}")  # Check type of preds
            # if isinstance(preds, list) or isinstance(preds, tuple):
            #     print(f"Preds length: {len(preds)}")
            #     print(f"First element type: {type(preds[0])}")
            #     if isinstance(preds[0], torch.Tensor):
            #         print(f"Preds first shape: {preds[0].shape}")
            #     else:
            #         print(f"Preds first value: {preds[0]}")
            
            
            
            
def train_sparse_yolo_l1(model, dataloader, optimizer, device, num_epochs=5, l1_lambda=0.0001):
    
    model= model.model.train()
    model.dfl = False
    print(f"model_dfl:{model.dfl}")
    
    print("Model num_classes:", model.nc)

    
    custom_loss_fn = CustomYOLOLoss(num_classes=3).to(device)

    for epoch in range(num_epochs):
        for batch in dataloader:
            images = batch["img"].to(device)
            targets = torch.cat([batch["batch_idx"], batch["cls"], batch["bboxes"]], dim=1).to(device)

            print(targets)

            optimizer.zero_grad()

            # Forward pass
            preds = model(images)
            print(preds[0].shape)
            
            # # Ensure preds is a tensor, not a list
            # if isinstance(preds, list):
            #     preds = preds[0]

            # Compute loss using custom function
            loss = custom_loss_fn(preds, targets)

            # L1 Regularization on model weights
            l1_loss = sum(param.abs().sum() for param in model.parameters())
            total_loss = loss + l1_lambda * l1_loss

            # Backpropagation
            total_loss.backward()
            optimizer.step()


In [49]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [41]:
lst=list(range(3))
print(tuple(lst))

(0, 1, 2)


In [53]:
yolo_model=YOLO("models/yolo11x_trained_pruned_local_structured_30_channel.pt").to(device)
optimizer = torch.optim.AdamW(yolo_model.parameters(), lr=1e-4, weight_decay=1e-4)


In [97]:
train_sparse_yolo_l1(yolo_model,train_loader,optimizer=optimizer,device=device)

model_dfl:True
Model num_classes: 3
tensor([[0.0000, 1.0000, 0.5691, 0.5179, 0.3337, 0.4950]], device='cuda:0')
torch.Size([1, 67, 80, 80])


ValueError: Target size (torch.Size([1, 3, 80, 80])) must be the same as input size (torch.Size([1, 62, 80, 80]))

In [8]:
model=yolo_model.model.train()

for batch in train_loader:
    images = batch["img"]
    cls=batch["cls"]

In [12]:
preds=model(images)

print(preds)

RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor