In [1]:
import torch
import torch.nn as nn
from torchvision import models
import warnings
warnings.filterwarnings("ignore") 

In [8]:
class Yolov1(nn.Module):
    def __init__(self,S=7, B=2, C=20):
        super().__init__()
        resnet = models.resnet152(pretrained=True)
        self.backbone = nn.Sequential(*list(resnet.children())[:-2]) 

        # YOLO Detection Head (Example)
        self.head = nn.Sequential(
            nn.Flatten(),
            nn.Linear(2048*7*7, 4096),
            nn.Dropout(0.0),
            nn.LeakyReLU(0.1),
            nn.Linear(4096, S*S*(C+B*5))
        )
    def forward(self,x):
        x = self.backbone(x)
        x = self.head(x)
        return x
    

In [9]:
x = torch.rand(1,3,224,224)
yolo = Yolov1()
yolo(x).shape

torch.Size([1, 1470])

#=============================================================================================#

In [10]:
class CNNBlock(nn.Module):
    def __init__(self, in_channels, out_channels, bn_act = True, **kwargs):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, bias= not bn_act, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels)
        self.act = nn.LeakyReLU(0.1)
        self.use_bn_act = bn_act

    def forward(self, x):
        if self.use_bn_act:
            return self.act(self.bn(self.conv(x)))
        else:
            return self.conv(x)             
        

In [18]:
class ScalePrediction(nn.Module):
    def __init__(self, in_channels, num_classes):
        super().__init__()
        self.head = nn.Sequential(
            CNNBlock(in_channels, 2*in_channels,kernel_size=3, padding=1),
            CNNBlock(2*in_channels, (num_classes +5) *3, bn_act=False, kernel_size=1), #[po, x, y, w,h]
        )
        self.num_classes = num_classes
    def forward(self,x):
        x = self.head(x)
        x = x.reshape(x.shape[0], 3, self.num_classes + 5, x.shape[2], x.shape[3])
        x = x.permute(0,1,3,4,2)
        return x

In [22]:
conv = CNNBlock(3,64,kernel_size=3)
conv(x).shape

torch.Size([1, 64, 222, 222])

In [23]:
class Yolov3(nn.Module):
    def __init__(self):
        super().__init__()
        resnet = models.resnet152(pretrained=True)
        self.backbone = nn.Sequential(*list(resnet.children())[:-2]) 

        # YOLO Detection Head (Example)
        self.head = ScalePrediction(2048, 20)
    def forward(self,x):
        x = self.backbone(x)
        x = self.head(x)
        return x

In [24]:
x = torch.rand(1,3,224,224)
yolo = Yolov3()
yolo(x).shape

torch.Size([1, 3, 7, 7, 25])

#==================================================================================================#

In [None]:
class DFL(nn.Module):
    """
    Integral module of Distribution Focal Loss (DFL).

    Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
    """

    def __init__(self, c1=16):
        """Initialize a convolutional layer with a given number of input channels."""
        super().__init__()
        self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False)
        x = torch.arange(c1, dtype=torch.float)
        self.conv.weight.data[:] = nn.Parameter(x.view(1, c1, 1, 1))
        self.c1 = c1

    def forward(self, x):
        """Applies a transformer layer on input tensor 'x' and returns a tensor."""
        b, _, a = x.shape  # batch, channels, anchors
        return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view(b, 4, a)
        # return self.conv(x.view(b, self.c1, 4, a).softmax(1)).view(b, 4, a)

In [25]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
from collections import deque

In [26]:
memory = deque(maxlen=10000)

In [27]:
memory

deque([], maxlen=10000)

In [31]:
memory.extend([1,2,3])
memory

deque([10, 10, 11, 1, 2, 3], maxlen=10000)

In [32]:
type(memory)

collections.deque

In [6]:
import torch
import torch.nn as nn

class YOLOv8Detect(nn.Module):
    def __init__(self, num_classes=80, num_features=3, ch_in=[256, 512, 1024]):
        """
        YOLOv8 Detection Head
        :param num_classes: Number of object classes
        :param num_features: Number of feature levels (e.g., P3, P4, P5)
        :param ch_in: Input channels from backbone/FPN (P3, P4, P5)
        """
        super(YOLOv8Detect, self).__init__()
        self.num_classes = num_classes
        self.num_features = num_features
        
        self.reg_heads = nn.ModuleList()  # Regression heads (box prediction)
        self.cls_heads = nn.ModuleList()  # Classification heads
        self.obj_heads = nn.ModuleList()  # Objectness score heads

        for ch in ch_in:
            self.reg_heads.append(self._conv_block(ch, 4))       # Box regression: (x, y, w, h)
            self.cls_heads.append(self._conv_block(ch, num_classes))  # Classification
            self.obj_heads.append(self._conv_block(ch, 1))       # Objectness score
        
    def _conv_block(self, in_channels, out_channels):
        """Basic Conv Block for Detection Head"""
        return nn.Sequential(
            nn.Conv2d(in_channels, in_channels, 3, padding=1, bias=False),
            nn.BatchNorm2d(in_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels, out_channels, 1)
        )

    def forward(self, x):
        """
        Forward pass of YOLOv8 detection head
        :param x: List of feature maps from backbone (P3, P4, P5)
        :return: Bounding boxes, class scores, and objectness scores
        """
        outputs = []
        for i in range(self.num_features):
            reg = self.reg_heads[i](x[i])  # (B, 4, H, W)
            cls = self.cls_heads[i](x[i])  # (B, num_classes, H, W)
            obj = self.obj_heads[i](x[i])  # (B, 1, H, W)
            
            # Reshape to match YOLO format
            reg = reg.permute(0, 2, 3, 1).contiguous()  # (B, H, W, 4)
            cls = cls.permute(0, 2, 3, 1).contiguous()  # (B, H, W, num_classes)
            obj = obj.permute(0, 2, 3, 1).contiguous()  # (B, H, W, 1)
            
            # Concatenate all predictions
            out = torch.cat([reg, obj, cls], dim=-1)  # (B, H, W, 4 + 1 + num_classes)
            outputs.append(out)
        
        return outputs  # List of feature maps with predictions

# Example usage
if __name__ == "__main__":
    model = YOLOv8Detect(num_classes=80)
    P3, P4, P5 = torch.randn(1, 256, 80, 80), torch.randn(1, 512, 40, 40), torch.randn(1, 1024, 20, 20)
    preds = model([P3, P4, P5])
    for i, p in enumerate(preds):
        print(f"Output {i} Shape: {p.shape}")  # Expecting (1, H, W, num_classes + 5)


Output 0 Shape: torch.Size([1, 80, 80, 85])
Output 1 Shape: torch.Size([1, 40, 40, 85])
Output 2 Shape: torch.Size([1, 20, 20, 85])


In [3]:
import torch
import torch.nn as nn

class YOLOv8Detect(nn.Module):
    def __init__(self, num_classes=80, num_features=3, ch_in=[256, 512, 1024], inference = False):
        """
        YOLOv8 Detection Head with Post-Processing
        :param num_classes: Number of object classes
        :param num_features: Number of feature levels (e.g., P3, P4, P5)
        :param ch_in: Input channels from backbone/FPN (P3, P4, P5)
        """
        super(YOLOv8Detect, self).__init__()
        self.num_classes = num_classes
        self.num_features = num_features
        
        self.reg_heads = nn.ModuleList()
        self.cls_heads = nn.ModuleList()
        self.obj_heads = nn.ModuleList()

        for ch in ch_in:
            self.reg_heads.append(self._conv_block(ch, 4))       # Box regression: (x, y, w, h)
            self.cls_heads.append(self._conv_block(ch, num_classes))  # Classification
            self.obj_heads.append(self._conv_block(ch, 1))       # Objectness score
        
    def _conv_block(self, in_channels, out_channels):
        """Basic Conv Block for Detection Head"""
        return nn.Sequential(
            nn.Conv2d(in_channels, in_channels, 3, padding=1, bias=False),
            nn.BatchNorm2d(in_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels, out_channels, 1)
        )

    def forward(self, x):
        """
        Forward pass of YOLOv8 detection head
        :param x: List of feature maps from backbone (P3, P4, P5)
        :return: List of bounding boxes, class scores, and objectness scores
        """
        outputs = []
        for i in range(self.num_features):
            reg = self.reg_heads[i](x[i])  # (B, 4, H, W)
            cls = self.cls_heads[i](x[i])  # (B, num_classes, H, W)
            obj = self.obj_heads[i](x[i])  # (B, 1, H, W)
            
            # Reshape to match YOLO format
            reg = reg.permute(0, 2, 3, 1).contiguous()  # (B, H, W, 4)
            cls = cls.permute(0, 2, 3, 1).contiguous()  # (B, H, W, num_classes)
            obj = obj.permute(0, 2, 3, 1).contiguous()  # (B, H, W, 1)
            
            # Concatenate all predictions
            out = torch.cat([reg, obj, cls], dim=-1)  # (B, H, W, 4 + 1 + num_classes)
            outputs.append(out)
        
        return self.decode_predictions(outputs)

    def decode_predictions(self, outputs, conf_thresh=0.3, iou_thresh=0.5):
        """
        Decode predictions and apply Non-Maximum Suppression (NMS)
        :param outputs: Raw predictions from the model
        :param conf_thresh: Confidence threshold
        :param iou_thresh: IOU threshold for NMS
        :return: Filtered detections
        """
        batch_detections = []
        
        for output in outputs:  # Loop over P3, P4, P5 feature maps
            B, H, W, C = output.shape  # Get shape
            output = output.view(B, H * W, C)  # Flatten spatial dimensions
            
            # Split predictions
            reg = output[..., :4]  # Bounding box (x, y, w, h)
            obj = output[..., 4:5]  # Objectness score
            cls = output[..., 5:]  # Class scores
            
            # Convert objectness and class scores
            obj = obj.sigmoid()  # Normalize objectness
            cls = cls.softmax(dim=-1)  # Normalize class probabilities
            
            # Compute final confidence score
            scores, labels = cls.max(dim=-1)  # Get highest confidence class
            scores = scores * obj.squeeze(-1)  # Multiply by objectness score
            
            # Filter by confidence threshold
            mask = scores > conf_thresh
            reg, scores, labels = reg[mask], scores[mask], labels[mask]

            # Apply Non-Maximum Suppression (NMS)
            if reg.shape[0] > 0:
                keep = self.nms(reg, scores, iou_thresh)
                batch_detections.append((reg[keep], scores[keep], labels[keep]))

        return batch_detections

    def nms(self, boxes, scores, iou_thresh):
        """
        Apply Non-Maximum Suppression (NMS)
        :param boxes: Bounding boxes
        :param scores: Confidence scores
        :param iou_thresh: IOU threshold
        :return: Indices of kept boxes
        """
        keep = []
        _, idxs = scores.sort(descending=True)  # Sort by score

        while idxs.numel() > 0:
            max_idx = idxs[0]  # Highest confidence box
            keep.append(max_idx)

            if idxs.numel() == 1:
                break

            ious = self.iou(boxes[max_idx], boxes[idxs[1:]])
            idxs = idxs[1:][ious < iou_thresh]  # Keep boxes with low IOU

        return torch.tensor(keep, dtype=torch.long)

    def iou(self, box1, boxes):
        """
        Compute Intersection over Union (IoU)
        :param box1: Single bounding box
        :param boxes: Multiple bounding boxes
        :return: IoU values
        """
        # Compute intersection
        x1 = torch.max(box1[0], boxes[:, 0])
        y1 = torch.max(box1[1], boxes[:, 1])
        x2 = torch.min(box1[2], boxes[:, 2])
        y2 = torch.min(box1[3], boxes[:, 3])

        inter_area = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)

        # Compute union
        box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
        boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
        union_area = box1_area + boxes_area - inter_area

        return inter_area / (union_area + 1e-6)  # Avoid division by zero

# Example usage
if __name__ == "__main__":
    model = YOLOv8Detect(num_classes=80)
    P3, P4, P5 = torch.randn(1, 256, 80, 80), torch.randn(1, 512, 40, 40), torch.randn(1, 1024, 20, 20)
    detections = model([P3, P4, P5])

    # Print detections
    for i, (boxes, scores, labels) in enumerate(detections):
        print(f"Feature Map {i}: {len(boxes)} detections")


In [5]:
model = YOLOv8Detect(num_classes=80)
P3, P4, P5 = torch.randn(1, 256, 80, 80), torch.randn(1, 512, 40, 40), torch.randn(1, 1024, 20, 20)
detections = model([P3, P4, P5])

# Print detections
for i, (boxes, scores, labels) in enumerate(detections):
    print(f"Feature Map {i}: {len(boxes)} detections")

In [8]:
python--version

NameError: name 'python' is not defined

In [9]:
from platform import python_version

print(python_version())

3.9.18
