In [None]:
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.datasets import VOCDetection
import torchvision.transforms as T
from torch.utils.data import DataLoader
import random
import matplotlib.pyplot as plt
import numpy as np



In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = fasterrcnn_resnet50_fpn(pretrained=True)
model = model.to(device)
model.eval()



In [None]:
transform = T.Compose([T.ToTensor()])
dataset = VOCDetection(root='./data', year='2012', image_set='train', download=True, transform=transform)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

images, targets = next(iter(dataloader))

images = [image.to(device) for image in images]

with torch.no_grad():
    predictions = model(images)

def visualize_prediction(image, prediction):
    plt.figure(figsize=(10, 10))
    image_np = image.cpu().numpy().transpose(1, 2, 0)
    plt.imshow(image_np)

    for box, label, score in zip(prediction['boxes'], prediction['labels'], prediction['scores']):
        if score > 0.5:
            x1, y1, x2, y2 = box.cpu().numpy()
            plt.gca().add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1,
                                              fill=False, color='red', linewidth=2))
            plt.text(x1, y1, f'Label: {label} Score: {score:.2f}', color='yellow', fontsize=12)

    plt.show()


visualize_prediction(images[0], predictions[0])
