In [None]:
import torch
import numpy as np
import cv2
from torchvision import transforms as T
import cvzone
import warnings

warnings.filterwarnings('ignore')

In [None]:
from torchvision.models.detection import fasterrcnn_resnet50_fpn
FasterRCNN_model = fasterrcnn_resnet50_fpn(pretrained=True)
FasterRCNN_model.eval()

In [None]:

# Set the device to GPU if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
FasterRCNN_model.to(device)

# Define the transformation to apply to each frame
transform = transforms.Compose([transforms.ToTensor()])

# Define function to detect birds in a frame
def detect_birds(frame):
    # Convert the frame to tensor and move to the appropriate device
    frame_tensor = transform(frame).to(device)
    # Add a batch dimension since the model expects a batch of images
    frame_tensor = frame_tensor.unsqueeze(0)
    
    # Perform inference
    with torch.no_grad():
        predictions = FasterRCNN_model(frame_tensor)
    
    # Get the predicted labels and bounding boxes
    labels = predictions[0]['labels']
    boxes = predictions[0]['boxes']
    
    # Filter out the detections that are not birds (assuming bird label is 16 in COCO dataset)
    bird_indices = (labels == 16)
    bird_boxes = boxes[bird_indices]
    
    return bird_boxes.cpu().numpy()

# Load the video
video_capture = cv2.VideoCapture('test video.mp4')

# Get video properties
frame_width = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(video_capture.get(cv2.CAP_PROP_FPS))

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'XVID')
output_video = cv2.VideoWriter('output.avi', fourcc, fps, (frame_width, frame_height))

# Process each frame of the video
while True:
    ret, frame = video_capture.read()
    if not ret:
        break
    
    # Detect birds in the frame
    bird_boxes = detect_birds(frame)
    
    # Draw bounding boxes around the birds and add label 'bird'
    for box in bird_boxes:
        x1, y1, x2, y2 = map(int, box)
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, 'bird', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 2)
    
    # Write the frame to the output video
    output_video.write(frame)

    # Display the frame with bounding boxes
    cv2.imshow('Object_detect', frame)
    
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

# Release the VideoCapture and VideoWriter objects
video_capture.release()
output_video.release()

# Close all OpenCV windows
cv2.destroyAllWindows()