In [None]:
! pip install torch torchvision torchaudio


Collecting torch
  Using cached torch-2.9.0-cp310-none-macosx_11_0_arm64.whl.metadata (30 kB)
Collecting torchvision
  Using cached torchvision-0.24.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (5.9 kB)
Collecting torchaudio
  Using cached torchaudio-2.9.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (6.9 kB)
Collecting filelock (from torch)
  Using cached filelock-3.20.0-py3-none-any.whl.metadata (2.1 kB)
Collecting sympy>=1.13.3 (from torch)
  Using cached sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting networkx>=2.5.1 (from torch)
  Using cached networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Collecting jinja2 (from torch)
  Using cached jinja2-3.1.6-py3-none-any.whl.metadata (2.9 kB)
Collecting fsspec>=0.8.5 (from torch)
  Using cached fsspec-2025.10.0-py3-none-any.whl.metadata (10 kB)
Collecting numpy (from torchvision)
  Using cached numpy-2.2.6-cp310-cp310-macosx_14_0_arm64.whl.metadata (62 kB)
Collecting pillow!=8.3.*,>=5.3.0 (from torchvision)
  Using cached pillow-1

In [25]:
import torch
import torch.nn as nn
import torch.nn.functional as F


In [26]:
import torch
import torch.nn as nn

class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False)
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        return self.relu(self.bn(self.conv(x)))


In [27]:
class ResidualBlock(nn.Module):
    def __init__(self, channels):
        super().__init__()
        self.layer1 = ConvBlock(channels, channels)
        self.layer2 = ConvBlock(channels, channels)
    
    def forward(self, x):
        out = self.layer2(self.layer1(x))
        return out + x  # skip connection


In [28]:
class MyBackbone(nn.Module):
    def __init__(self):
        super().__init__()
        # Stage 1
        self.layer1 = ConvBlock(3, 32, stride=2)
        
        # Stage 2
        self.layer2 = nn.Sequential(
            ConvBlock(32, 64, stride=2),
            ResidualBlock(64)
        )
        
        # Stage 3
        self.layer3 = nn.Sequential(
            ConvBlock(64, 128, stride=2),
            ResidualBlock(128),
            ResidualBlock(128)
        )
        
        # Stage 4
        self.layer4 = nn.Sequential(
            ConvBlock(128, 256, stride=2),
            ResidualBlock(256),
            ResidualBlock(256)
        )
        
        # Stage 5
        self.layer5 = nn.Sequential(
            ConvBlock(256, 512, stride=2),
            ResidualBlock(512),
            ResidualBlock(512)
        )
    
    def forward(self, x):
        x1 = self.layer1(x)  # 320×320×32
        x2 = self.layer2(x1) # 160×160×64
        x3 = self.layer3(x2) # 80×80×128
        x4 = self.layer4(x3) # 40×40×256
        x5 = self.layer5(x4) # 20×20×512
        return x3, x4, x5  # multiple scales for detection


In [29]:

x = torch.randn(1, 3, 640, 640)  # one sample
model = MyBackbone()
f3, f4, f5 = model(x)

print(f3.shape)  # (1, 128, 80, 80)
print(f4.shape)  # (1, 256, 40, 40)
print(f5.shape)  # (1, 512, 20, 20)


torch.Size([1, 128, 80, 80])
torch.Size([1, 256, 40, 40])
torch.Size([1, 512, 20, 20])


In [30]:
# Neck (FPN) Code

In [31]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False)
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        return self.relu(self.bn(self.conv(x)))


class FPNNeck(nn.Module):
    def __init__(self, channels=[128, 256, 512]):
        super().__init__()

        # Reduce channels before fusion
        self.reduce_c3 = nn.Conv2d(channels[0], 128, 1)
        self.reduce_c4 = nn.Conv2d(channels[1], 256, 1)
        self.reduce_c5 = nn.Conv2d(channels[2], 512, 1)

        # FIXED: Input channels are sums of concatenated feature maps
        self.conv_c4 = ConvBlock(512 + 256, 256)  # up_c5(512) + c4(256)
        self.conv_c3 = ConvBlock(256 + 128, 128)  # up_p4(256) + c3(128)

    def forward(self, c3, c4, c5):
        # Step 1: Channel reduction
        c3 = self.reduce_c3(c3)  # (128 channels)
        c4 = self.reduce_c4(c4)  # (256 channels)
        c5 = self.reduce_c5(c5)  # (512 channels)

        # Step 2: Top-down fusion
        up_c5 = F.interpolate(c5, scale_factor=2, mode='nearest')
        fused_c4 = torch.cat([up_c5, c4], dim=1)
        p4 = self.conv_c4(fused_c4)

        up_p4 = F.interpolate(p4, scale_factor=2, mode='nearest')
        fused_c3 = torch.cat([up_p4, c3], dim=1)
        p3 = self.conv_c3(fused_c3)

        # Step 3: Return multi-scale outputs
        return p3, p4, c5


In [32]:
backbone = MyBackbone()
neck = FPNNeck(channels=[128, 256, 512])

x = torch.randn(1, 3, 640, 640)
f3, f4, f5 = backbone(x)
p3, p4, p5 = neck(f3, f4, f5)

print("p3:", p3.shape)
print("p4:", p4.shape)
print("p5:", p5.shape)


p3: torch.Size([1, 128, 80, 80])
p4: torch.Size([1, 256, 40, 40])
p5: torch.Size([1, 512, 20, 20])


In [33]:
import torch
import torch.nn as nn

class DetectionHead(nn.Module):
    def __init__(self, num_classes=1, anchors_per_scale=3):
        super().__init__()
        self.num_classes = num_classes
        self.anchors_per_scale = anchors_per_scale
        self.num_outputs = 5 + num_classes  # [x, y, w, h, obj, class]

        # 1x1 conv for each scale
        self.head_small = nn.Conv2d(128, anchors_per_scale * self.num_outputs, 1)
        self.head_medium = nn.Conv2d(256, anchors_per_scale * self.num_outputs, 1)
        self.head_large = nn.Conv2d(512, anchors_per_scale * self.num_outputs, 1)

    def forward(self, p3, p4, p5):
        # predictions for each scale
        out_small = self.head_small(p3)
        out_medium = self.head_medium(p4)
        out_large = self.head_large(p5)
        return [out_small, out_medium, out_large]


In [34]:
class BacteriaDetector(nn.Module):
    def __init__(self, num_classes=1):
        super().__init__()
        self.backbone = MyBackbone()
        self.neck = FPNNeck(channels=[128, 256, 512])
        self.head = DetectionHead(num_classes=num_classes)

    def forward(self, x):
        # 1️⃣ Extract features
        c3, c4, c5 = self.backbone(x)
        # 2️⃣ Fuse features
        p3, p4, p5 = self.neck(c3, c4, c5)
        # 3️⃣ Predict
        preds = self.head(p3, p4, p5)
        return preds


In [35]:
x = torch.randn(1, 3, 640, 640)
model = BacteriaDetector(num_classes=1)
preds = model(x)

for i, p in enumerate(preds):
    print(f"Scale {i+1}: {p.shape}")


Scale 1: torch.Size([1, 18, 80, 80])
Scale 2: torch.Size([1, 18, 40, 40])
Scale 3: torch.Size([1, 18, 20, 20])


In [36]:
import torch

def bbox_ciou(box1, box2, eps=1e-7):
    """
    box1, box2: [N, 4] in (x_center, y_center, width, height)
    Returns: CIoU loss for each pair
    """
    # Convert boxes to corner coordinates
    b1_x1, b1_y1 = box1[:, 0] - box1[:, 2] / 2, box1[:, 1] - box1[:, 3] / 2
    b1_x2, b1_y2 = box1[:, 0] + box1[:, 2] / 2, box1[:, 1] + box1[:, 3] / 2
    b2_x1, b2_y1 = box2[:, 0] - box2[:, 2] / 2, box2[:, 1] - box2[:, 3] / 2
    b2_x2, b2_y2 = box2[:, 0] + box2[:, 2] / 2, box2[:, 1] + box2[:, 3] / 2

    # Intersection area
    inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
            (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
    # Union area
    union = (b1_x2 - b1_x1) * (b1_y2 - b1_y1) + (b2_x2 - b2_x1) * (b2_y2 - b2_y1) - inter + eps
    iou = inter / union

    # Center distance
    cdist = (box1[:, 0] - box2[:, 0]) ** 2 + (box1[:, 1] - box2[:, 1]) ** 2

    # Enclosing box diagonal distance
    c_x1, c_y1 = torch.min(b1_x1, b2_x1), torch.min(b1_y1, b2_y1)
    c_x2, c_y2 = torch.max(b1_x2, b2_x2), torch.max(b1_y2, b2_y2)
    c_diag = (c_x2 - c_x1) ** 2 + (c_y2 - c_y1) ** 2 + eps

    # Aspect ratio term
    v = (4 / (3.14159265 ** 2)) * torch.pow(torch.atan(b2_x2 / (b2_y2 + eps)) - torch.atan(b1_x2 / (b1_y2 + eps)), 2)
    with torch.no_grad():
        alpha = v / (1 - iou + v + eps)

    ciou = iou - (cdist / c_diag + alpha * v)
    return 1 - ciou  # CIoU loss


In [37]:
import torch.nn as nn
bce = nn.BCELoss(reduction='sum')


In [38]:
def detection_loss(preds, targets, anchors, device, lambda_box=5.0, lambda_obj=1.0, lambda_cls=1.0):
    """
    preds: list of prediction tensors from detection head
    targets: list of ground truth [class, x, y, w, h]
    anchors: anchor sizes per scale
    """
    total_box_loss = 0
    total_obj_loss = 0
    total_cls_loss = 0

    for scale_i, pred in enumerate(preds):
        B, C, H, W = pred.shape
        pred = pred.view(B, 3, (5 + 1), H, W).permute(0, 1, 3, 4, 2)
        # pred shape → [B, 3, H, W, 6] for 1 class (x, y, w, h, obj, class)

        # Here you'd match targets to grid cells, compute CIoU and BCE losses
        # For demo purposes, we’ll just simulate basic structure:
        obj_target = torch.zeros_like(pred[..., 4], device=device)
        cls_target = torch.zeros_like(pred[..., 5], device=device)

        # Example dummy (no targets)
        box_loss = torch.tensor(0.0, device=device)
        obj_loss = bce(torch.sigmoid(pred[..., 4]), obj_target)
        cls_loss = bce(torch.sigmoid(pred[..., 5]), cls_target)

        total_box_loss += box_loss
        total_obj_loss += obj_loss
        total_cls_loss += cls_loss

    total_loss = lambda_box * total_box_loss + lambda_obj * total_obj_loss + lambda_cls * total_cls_loss
    return total_loss


In [39]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

x = torch.randn(1, 3, 640, 640).to(device)
model = BacteriaDetector(num_classes=1).to(device)
anchors = [
    [(10,13), (16,30), (33,23)],
    [(30,61), (62,45), (59,119)],
    [(116,90), (156,198), (373,326)]
]

preds = model(x)
loss = detection_loss(preds, targets=[], anchors=anchors, device=device)
print("Total loss:", loss.item())


Total loss: 35150.421875


In [44]:
from model import BacteriaDetector, detection_loss



ImportError: cannot import name 'BacteriaDetector' from 'model' (/Users/ashithrai/Documents/div/model.py)

In [43]:
import os
print(os.getcwd())


/Users/ashithrai/Documents/div
