In [None]:
import torch
import torch.nn.functional as F
import cv2
import numpy as np
import time
from torchvision import transforms
from torchvision.models.detection import fasterrcnn_resnet50_fpn

# Check device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load Faster R-CNN model
model = fasterrcnn_resnet50_fpn(pretrained=True).to(device)
model.eval()

# OpenCV CUDA acceleration (if available)
use_cuda_opencv = cv2.cuda.getCudaEnabledDeviceCount() > 0

# Function to preprocess frames with CUDA acceleration
def preprocess_frame(frame, target_size=(800, 800)):
    if use_cuda_opencv:
        # Convert frame to GPU
        gpu_frame = cv2.cuda_GpuMat()
        gpu_frame.upload(frame)

        # Resize using OpenCV CUDA
        gpu_frame = cv2.cuda.resize(gpu_frame, target_size)

        # Download back to CPU as numpy array
        frame = gpu_frame.download()

    # Convert frame to PyTorch tensor and normalize
    transform = transforms.Compose([
        transforms.ToTensor(),
    ])
    frame_tensor = transform(frame).to(device)
    return frame_tensor

# Function to process batch of frames
def process_batch(frames_batch, model):
    with torch.no_grad():
        outputs = model(frames_batch)
    return outputs

# Function to draw detections on the frame
def draw_detections(frame, detections, threshold=0.5):
    for i in range(len(detections["boxes"])):
        score = detections["scores"][i].item()
        if score >= threshold:
            x1, y1, x2, y2 = map(int, detections["boxes"][i].tolist())
            label = f"Object {detections['labels'][i].item()} ({score:.2f})"
            
            # Draw bounding box
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    return frame

# Main function to process video with FPS calculation
def process_video(video_path, model, batch_size=4):
    video_capture = cv2.VideoCapture(video_path)
    
    if not video_capture.isOpened():
        print("Error: Could not open video file.")
        return

    frames_batch = []
    original_frames = []  # Store original frames for drawing later
    frame_count = 0
    total_time = 0.0  # To calculate average FPS

    while True:
        start_time = time.time()  # Start time for FPS calculation
        
        ret, frame = video_capture.read()
        if not ret:
            break  # End of video

        original_frames.append(frame)
        frame_tensor = preprocess_frame(frame)
        frames_batch.append(frame_tensor)

        if len(frames_batch) == batch_size:
            # Stack batch and move to GPU
            frames_batch_tensor = torch.stack(frames_batch).to(device)

            # Run inference
            outputs = process_batch(frames_batch_tensor, model)

            # Process each frame output
            for i in range(len(outputs)):
                original_frames[i] = draw_detections(original_frames[i], outputs[i])

                # Calculate FPS
                end_time = time.time()
                fps = 1 / (end_time - start_time)
                total_time += (end_time - start_time)
                frame_count += 1

                # Display FPS on frame
                cv2.putText(original_frames[i], f"FPS: {fps:.2f}", (10, 30), 
                            cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)

                # Show frame
                cv2.imshow("Real-Time Object Detection", original_frames[i])

                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

            # Clear batch lists
            frames_batch = []
            original_frames = []

    # Calculate and print average FPS
    avg_fps = frame_count / total_time if total_time > 0 else 0
    print(f"Average FPS: {avg_fps:.2f}")

    # Release resources
    video_capture.release()
    cv2.destroyAllWindows()

# Run the pipeline
video_path = "VID_20240320164919_F (online-video-cutter.com).mp4"
process_video(video_path, model, batch_size=4)
