### Housepital Object Detection Model

In [None]:
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F
import torchvision.transforms as T
from torch.utils.data import DataLoader
from pycocotools.coco import COCO
import os
from PIL import Image

def get_transform():
    return T.Compose([
        T.ToTensor(),
        T.Resize((800, 800)),  # Ensure consistent size
    ])

def validate_and_fix_boxes(target):
    boxes = target["boxes"]
    
    # Ensure boxes is 2D with shape [num_boxes, 4]
    if boxes.dim() == 1:
        boxes = boxes.view(-1, 4)  # Reshape to 2D if it's 1D
    
    # Ensure width and height are >= 1
    x_min, y_min, x_max, y_max = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
    x_max = torch.max(x_min + 1, x_max)  # Ensure width is >= 1
    y_max = torch.max(y_min + 1, y_max)  # Ensure height is >= 1
    
    # Fix boxes and return them
    target["boxes"] = boxes
    return target


# Define a custom dataset class for COCO
class COCODataset(torch.utils.data.Dataset):
    def __init__(self, img_dir, annotation_file, transforms=None):
        self.img_dir = img_dir
        self.coco = COCO(annotation_file)
        self.transforms = transforms
        self.image_ids = list(self.coco.imgs.keys())

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        image_info = self.coco.loadImgs(image_id)[0]
        image_path = os.path.join(self.img_dir, image_info['file_name'])
        image = Image.open(image_path).convert("RGB")
        
        # Load annotations
        ann_ids = self.coco.getAnnIds(imgIds=image_id)
        annotations = self.coco.loadAnns(ann_ids)
        
        # Prepare targets
        boxes = []
        labels = []
        for ann in annotations:
            bbox = ann['bbox']
            # COCO uses [x, y, width, height], but PyTorch uses [x_min, y_min, x_max, y_max]
            boxes.append([bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]])
            labels.append(ann['category_id'])
        
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        target = {"boxes": boxes, "labels": labels, "image_id": torch.tensor([image_id])}

        # Apply transforms if provided
        if self.transforms:
            image = self.transforms(image)
        target = validate_and_fix_boxes(target)

        return image, target

# Paths
train_dir = "dataset/objectDetection/train"
train_annotations = "dataset/objectDetection/train/_annotations.coco.json"
valid_dir = "dataset/objectDetection/valid"
valid_annotations = "dataset/objectDetection/valid/_annotations.coco.json"

# Datasets and loaders
batch_size = 2
train_dataset = COCODataset(train_dir, train_annotations, transforms=get_transform())
valid_dataset = COCODataset(valid_dir, valid_annotations, transforms=get_transform())
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))


loading annotations into memory...
Done (t=0.06s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


In [8]:
import os

# Function to get the model
def get_model(num_classes):
    # Load a pre-trained Faster R-CNN model
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    
    # Modify the classifier to match the number of classes
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    
    return model

# Set custom directory for model downloads
os.environ["TORCH_HOME"] = "D:\Practice Housepital Back\model\pretrain"

# Hyperparameters
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize model
num_classes = 4  # 3 pressure ulcer stages + 1 background
model = get_model(num_classes)
model.to(device)

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [9]:
# Define optimizer and learning rate scheduler
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
num_epochs = 10

# Training loop
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0

    for images, targets in train_loader:
        targets = [validate_and_fix_boxes(target) for target in targets]
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # Forward pass
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        # Backpropagation
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        epoch_loss += losses.item()

    lr_scheduler.step()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")

print("Training Complete!")

torch.save(model.state_dict(), "model/v1.pth")
print("Model Saved Successfully")

Epoch [1/10], Loss: 377.9687
Epoch [2/10], Loss: 459.7818
Epoch [3/10], Loss: 373.9950
Epoch [4/10], Loss: 363.8139
Epoch [5/10], Loss: 330.3437
Epoch [6/10], Loss: 327.3789
Epoch [7/10], Loss: 316.0093
Epoch [8/10], Loss: 304.4459
Epoch [9/10], Loss: 354.5105
Epoch [10/10], Loss: 291.0981
Training Complete!
Model Saved Successfully


In [12]:
# Test the model
from torchvision.ops import box_iou

model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

with torch.no_grad():
    total_images = 0
    total_correct = 0
    total_ground_truth = 0
    iou_threshold = 0.5  # Threshold for a "correct" detection

    for images, targets in valid_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # Predictions
        outputs = model(images)

        for i, output in enumerate(outputs):
            # Predicted boxes and labels
            pred_boxes = output["boxes"]
            pred_labels = output["labels"]

            # Ground truth boxes and labels
            gt_boxes = targets[i]["boxes"]
            gt_labels = targets[i]["labels"]

            # IoU matching
            iou_matrix = box_iou(pred_boxes, gt_boxes)

            matched_preds = 0
            for pred_idx, pred_label in enumerate(pred_labels):
                # Find the ground truth box with the highest IoU
                max_iou, gt_idx = iou_matrix[pred_idx].max(0)

                if max_iou > iou_threshold and pred_label == gt_labels[gt_idx]:
                    matched_preds += 1
                    iou_matrix[:, gt_idx] = 0  # Mark this GT as "used"

            total_correct += matched_preds
            total_ground_truth += len(gt_boxes)
            total_images += 1

    # Precision and Recall
    precision = 100.0 * total_correct / (total_ground_truth + 1e-6)
    recall = 100.0 * total_correct / total_images

    print(f"Precision: {precision:.2f}%")
    print(f"Recall: {recall:.2f}%")


Precision: 86.92%
Recall: 86.92%


In [13]:
# Test with one data

test_dir = "dataset/objectDetection/test"
test_annotations = "dataset/objectDetection/test/_annotations.coco.json"
batch_size = 2
test_dataset = COCODataset(train_dir, train_annotations, transforms=get_transform())
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))


model.eval()
with torch.no_grad():
    for images, targets in test_loader:
        images = list(image.to(device) for image in images)
        predictions = model(images)

        for i, prediction in enumerate(predictions):
            print(f"Image {i+1}:")
            print("Predicted Boxes:", prediction["boxes"])
            print("Predicted Labels:", prediction["labels"])
            print("Confidence Scores:", prediction["scores"])
            print("----------")
        break  # Test on only the first batch


loading annotations into memory...
Done (t=0.07s)
creating index...
index created!
Image 1:
Predicted Boxes: tensor([[377.1245,  69.6408, 631.0519, 324.8787],
        [315.0803,  33.4269, 513.2801, 334.7354],
        [465.1820,  60.8906, 664.0154, 376.1479],
        [390.5856,   3.4256, 585.0671, 252.5499],
        [361.6699,  63.4436, 467.8331, 246.0789],
        [392.9091,  37.2813, 500.4172, 223.9323],
        [331.9286,  87.2681, 438.3279, 271.7551],
        [137.9843,  40.3085, 553.5405, 383.8879],
        [314.5021,  32.8614, 509.9504, 337.5176],
        [228.8996,  68.5165, 479.6242, 303.5168],
        [436.4688,   5.6550, 565.3013, 144.9261],
        [293.9172, 106.3294, 674.0815, 298.0029],
        [300.6774,  79.2104, 403.6188, 296.5781],
        [361.9664,  64.5456, 466.9549, 245.4873],
        [415.6458,  72.3263, 527.0638, 254.8941],
        [ 82.3062,   8.6074, 551.1861, 371.3118],
        [392.6172,   4.9982, 581.4680, 250.4277],
        [573.8082,  54.3184, 686.4206, 28

In [None]:
# Save the model

torch.save(model.state_dict(), "model/v1.pth")
print("Model Saved Successfully")

Model Saved Successfully


In [6]:
# Load the model

num_classes = 4  # 3 pressure ulcer stages + 1 background
model = get_model(num_classes)
model.load_state_dict(torch.load("model/v1.pth"))
print("Model Loaded Successfully")

  model.load_state_dict(torch.load("model/v1.pth"))


Model Loaded Successfully


In [23]:
a = "dataset/train"
b = "045069bee69b9a6b457a69c564589895_0_png.rf.60f1b96146bdbb6713259dec4a7b224a.jpg"
image_path = os.path.join(a, b)
print(image_path)

image = Image.open(image_path).convert("RGB")

dataset/train\045069bee69b9a6b457a69c564589895_0_png.rf.60f1b96146bdbb6713259dec4a7b224a.jpg
