In [3]:
import torch
import torchvision.transforms as transforms
from torchvision.models.detection import fasterrcnn_resnet50_fpn
import cv2
import numpy as np

# Load the pre-trained Faster R-CNN model
FasterRCNN_model = fasterrcnn_resnet50_fpn(pretrained=True)
FasterRCNN_model.eval()

# Define a list of class labels (assuming COCO classes)
class_labels = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
    'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
    'N/A', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
    'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'N/A',
    'backpack', 'umbrella', 'N/A', 'N/A', 'handbag', 'tie', 'suitcase',
    'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
    'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle',
    'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
    'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
    'bed', 'N/A', 'dining table', 'N/A', 'N/A', 'toilet', 'N/A', 'tv',
    'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
    'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book', 'clock',
    'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

# Define a function to perform bird detection and count birds
def detect_and_count_birds(frame):
    # Convert frame to tensor
    tensor_img = transforms.ToTensor()(frame)

    # Perform inference
    with torch.no_grad():
        predictions = FasterRCNN_model([tensor_img])

    # Get the prediction result
    prediction = predictions[0]

    # Count birds with high confidence scores
    bird_count = sum(1 for label, score in zip(prediction['labels'], prediction['scores']) if
                     class_labels[label] == 'bird' and score > 0.5)

    return bird_count

# Open the video file
cap = cv2.VideoCapture("test video.mp4")

# Get video frame properties
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
fps = cap.get(cv2.CAP_PROP_FPS)

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('output.avi', fourcc, fps, (frame_width, frame_height))

# Process each frame
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Perform bird detection and count birds in the frame
    bird_count = detect_and_count_birds(frame)

    # Draw count on the frame
    cv2.putText(frame, f'Bird Count: {bird_count}', (100, 150), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 3)

    # Write the frame to the output video
    out.write(frame)

    # Display the frame
    cv2.imshow('Object Counting', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release video capture and writer objects
cap.release()
out.release()
cv2.destroyAllWindows()
