FCOS Dataset Testing 

Importing Libraries

In [1]:
import cv2
import torch
import torchvision
from torchvision import transforms
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches

Loading the model and Defining Image Transformation

In [2]:
# Load the model
model = torchvision.models.detection.fcos_resnet50_fpn(pretrained=True)
model.eval()  # Set the model to evaluation mode

# Define the image transformation
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert image to tensor
])



Detect Objects in a Frame

In [6]:
def detect_objects_in_frame(frame):
    # Convert frame to PIL image
    img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    img_tensor = transform(img).unsqueeze(0)  # Add batch dimension

    # Perform inference
    with torch.no_grad():
        predictions = model(img_tensor)[0]

    # Plot the image with bounding boxes
    for box, score, label in zip(predictions['boxes'], predictions['scores'], predictions['labels']):
        if score > 0.5:  # Only display predictions with confidence above 0.5
            xmin, ymin, xmax, ymax = box
            xmin, ymin, xmax, ymax = int(xmin), int(ymin), int(xmax), int(ymax)
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
            label_text = f'{label.item()}:{score:.2f}'
            cv2.putText(frame, label_text, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    return frame

Process the video

In [7]:
def process_video(input_video_path, output_video_path):
    # Open video file
    cap = cv2.VideoCapture(input_video_path)
    if not cap.isOpened():
        print("Error opening video file.")
        return

    # Get video properties
    fps = cap.get(cv2.CAP_PROP_FPS)
    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'XVID')  # or use 'mp4v' for .mp4 files
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) 
    frame_count = 0  # Initialize frame counter
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        frame_count += 1  # Update the frame counter

        # Process the frame
        processed_frame = detect_objects_in_frame(frame)
        
        # Write the frame to the output video
        out.write(processed_frame)
        if frame_count % 25 == 0:
            print(f'Processing frame {frame_count}/{total_frames}')

    # Release everything
    cap.release()
    out.release()
    cv2.destroyAllWindows()

Testing Example on Video

In [None]:
input_video_path = 'C:/Users/Maazi/Desktop/Final Year Project/FaisalTown.mp4' 
output_video_path = 'C:/Users/Maazi/Desktop/Final Year Project/output_video.mp4'  
process_video(input_video_path, output_video_path)