In [14]:
# Object Counting for single videos
import torch
import torchvision.transforms as transforms
from torchvision.models.detection import ssd300_vgg16
import cv2
import numpy as np

# Load the pre-trained SSD model
SSD_model = ssd300_vgg16(pretrained=True, progress=False)
SSD_model.eval()

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SSD_model.to(device)

# Define a list of class labels (assuming COCO classes)
class_labels = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
    'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
    'N/A', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
    'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'N/A',
    'backpack', 'umbrella', 'N/A', 'N/A', 'handbag', 'tie', 'suitcase',
    'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
    'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle',
    'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
    'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
    'bed', 'N/A', 'dining table', 'N/A', 'N/A', 'toilet', 'N/A', 'tv',
    'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
    'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book', 'clock',
    'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

# Initialize variables for object counting
prev_objects = set()
count = 0

# Define a function to perform object detection and count unique moving objects
def detect_and_count(frame):
    global prev_objects, count

    # Convert frame to tensor and move to GPU if available
    tensor_img = transforms.ToTensor()(frame).unsqueeze(0).to(device)

    # Perform inference
    with torch.no_grad():
        predictions = SSD_model(tensor_img)

    # Get bounding boxes and scores
    boxes = predictions[0]['boxes']
    scores = predictions[0]['scores']
    labels = predictions[0]['labels']

    # Identify new objects and update count
    new_objects = set()
    for i in range(len(boxes)):
        box = boxes[i].cpu().numpy().astype(int)
        label = class_labels[int(labels[i])]
        score = float(scores[i])
        if score > 0.1:  # filtering detections with confidence score > 0.5
            obj_id = (label, tuple(box))
            if obj_id not in prev_objects:
                new_objects.add(obj_id)

    count += len(new_objects)
    prev_objects = {obj_id for obj_id in prev_objects if obj_id not in new_objects}

    return count

# Open the video file
cap = cv2.VideoCapture("test video.mp4")

# Get video frame properties
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
fps = cap.get(cv2.CAP_PROP_FPS)

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('output.avi', fourcc, fps, (frame_width, frame_height))

# Process each frame
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Perform object detection and count unique moving objects in the frame
    count = detect_and_count(frame)

    # Draw count on the frame
    cv2.putText(frame, f'Count: {count}', (100, 150), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 3)

    # Write the frame to the output video
    out.write(frame)

    # Display the frame
    cv2.imshow('Object Counting', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release video capture and writer objects
cap.release()
out.release()
cv2.destroyAllWindows()