In [3]:
import torch
import torchvision
import cv2
import numpy as np
import torchvision.transforms as T
import time

# Load the pre-trained Faster R-CNN model
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()  # Set the model to evaluation mode

# Function to preprocess frames
def preprocess_frame(frame):
    # Convert frame to RGB and normalize
    transform = T.Compose([T.ToTensor()])
    frame_tensor = transform(frame)
    return frame_tensor

# Function to display FPS
def display_fps(frame, fps):
    text = f"FPS: {fps:.2f}"
    cv2.putText(frame, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

# Initialize webcam
cap = cv2.VideoCapture(0)  # Use 0 for the default camera

if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

print("Press 'q' to exit the real-time detection.")

# Start video capture loop
confidence_threshold = 0.5  # Initial confidence threshold
start_time = time.time()
frame_count = 0

while True:
    ret, frame = cap.read()
    if not ret:
        print("Error: Could not read frame.")
        break

    frame_count += 1

    # Convert the frame from BGR (OpenCV format) to RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Preprocess the frame for the model
    frame_tensor = preprocess_frame(rgb_frame)
    
    # Perform detection
    with torch.no_grad():
        predictions = model([frame_tensor])
    
    # Loop through detections
    for i, box in enumerate(predictions[0]['boxes']):
        score = predictions[0]['scores'][i].item()

        # Draw bounding boxes for predictions above the threshold
        if score > confidence_threshold:
            x_min, y_min, x_max, y_max = map(int, box.tolist())
            cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)  # Green box
            label = f"Object: {score:.2f}"
            cv2.putText(frame, label, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    
    # Calculate and display FPS
    elapsed_time = time.time() - start_time
    fps = frame_count / elapsed_time
    display_fps(frame, fps)
    
    # Display the frame with detections
    cv2.imshow("Real-Time Object Detection", frame)

    # Check for user input
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):  # Exit when 'q' is pressed
        break
    elif key == ord('+'):  # Increase confidence threshold
        confidence_threshold = min(confidence_threshold + 0.1, 1.0)
        print(f"Confidence Threshold: {confidence_threshold}")
    elif key == ord('-'):  # Decrease confidence threshold
        confidence_threshold = max(confidence_threshold - 0.1, 0.1)
        print(f"Confidence Threshold: {confidence_threshold}")

# Release resources
cap.release()
cv2.destroyAllWindows()

    
    


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to C:\Users\Amritanshu Bhardwaj/.cache\torch\hub\checkpoints\fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:06<00:00, 26.6MB/s] 


Press 'q' to exit the real-time detection.
