In [2]:
from ultralytics import YOLO
import cv2
import time
import csv
import matplotlib.pyplot as plt
import torch

def process_video_on_gpu(video_path, output_path, csv_file, batch_size=16):
    # Load YOLO model
    model = YOLO("yolo11x.pt").to("cuda")  # Ensure the model runs on GPU

    # Initialize video input and output
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

    # Write CSV headers
    with open(csv_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Time (s)", "People Count"])

    frame_count = 0
    start_time = time.time()
    timestamps = []
    people_counts = []
    frames_batch = []

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame_count += 1
        current_time = frame_count / fps
        timestamps.append(current_time)
        frames_batch.append(frame)

        # Process frames in batches
        if len(frames_batch) == batch_size:
            process_batch(frames_batch, current_time, model, out, csv_file, people_counts)
            frames_batch = []

    # Process remaining frames
    if frames_batch:
        process_batch(frames_batch, current_time, model, out, csv_file, people_counts)

    cap.release()
    out.release()

    # Save graph
    plt.figure(figsize=(10, 6))
    plt.plot(timestamps, people_counts, marker='o', color='b', label="People Count")
    plt.title(f"People Count Over Time - {video_path}")
    plt.xlabel("Time (s)")
    plt.ylabel("Number of People")
    plt.grid(True)
    plt.legend()
    graph_path = csv_file.replace('.csv', '_graph.png')
    plt.savefig(graph_path)
    plt.close()

    total_time = time.time() - start_time
    print(f"Processed {video_path} - Total Frames: {frame_count}, Total Time: {total_time:.2f} seconds")

def process_batch(frames_batch, current_time, model, out, csv_file, people_counts):
    # Resize frames to be divisible by 32
    resized_frames = [cv2.resize(frame, (640, 640)) for frame in frames_batch]

    # Convert frames to tensors and normalize
    frames_tensor = [torch.tensor(frame).permute(2, 0, 1).unsqueeze(0).float().to("cuda") for frame in resized_frames]
    batch_tensor = torch.cat(frames_tensor) / 255.0  # Normalize the images

    # Run YOLO inference
    results = model(batch_tensor)

    for i, frame in enumerate(frames_batch):
        # Extract people detections for each frame
        people_count = sum(1 for detection in results[i].boxes if int(detection.cls) == 0)
        people_counts.append(people_count)

        # Log to CSV
        with open(csv_file, mode='a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow([current_time, people_count])

        # Draw bounding boxes and count on the frame
        for detection in results[i].boxes:
            if int(detection.cls) == 0:
                bbox = detection.xyxy[0].cpu().numpy()
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 2)

        cv2.putText(frame, f"People Count: {people_count}", (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
        out.write(frame)


def main():
    videos = [
        {"input": "video (1).mp4", "output": "output (1).mp4", "csv": "output (1).csv"},
        {"input": "video (2).mp4", "output": "output (2).mp4", "csv": "output (2).csv"},
        {"input": "video (3).mp4", "output": "output (3).mp4", "csv": "output (3).csv"},
        {"input": "video (4).mp4", "output": "output (4).mp4", "csv": "output (4).csv"},
        {"input": "video (5).mp4", "output": "output (5).mp4", "csv": "output (5).csv"},
        {"input": "video (6).mp4", "output": "output (6).mp4", "csv": "output (6).csv"},
        {"input": "video (7).mp4", "output": "output (7).mp4", "csv": "output (7).csv"},
        {"input": "video (8).mp4", "output": "output (8).mp4", "csv": "output (8).csv"},
        {"input": "video (9).mp4", "output": "output (9).mp4", "csv": "output (9).csv"},
        {"input": "video (10).mp4", "output": "output (10).mp4", "csv": "output (10).csv"}
    ]

    for video in videos:
        process_video_on_gpu(video["input"], video["output"], video["csv"])


if __name__ == "__main__":
    main()



0: 640x640 (no detections), 51.8ms
1: 640x640 (no detections), 51.8ms
2: 640x640 (no detections), 51.8ms
3: 640x640 (no detections), 51.8ms
4: 640x640 (no detections), 51.8ms
5: 640x640 (no detections), 51.8ms
6: 640x640 (no detections), 51.8ms
7: 640x640 (no detections), 51.8ms
8: 640x640 (no detections), 51.8ms
9: 640x640 (no detections), 51.8ms
10: 640x640 (no detections), 51.8ms
11: 640x640 (no detections), 51.8ms
12: 640x640 (no detections), 51.8ms
13: 640x640 (no detections), 51.8ms
14: 640x640 (no detections), 51.8ms
15: 640x640 (no detections), 51.8ms
Speed: 0.0ms preprocess, 51.8ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 53.7ms
1: 640x640 (no detections), 53.7ms
2: 640x640 (no detections), 53.7ms
3: 640x640 (no detections), 53.7ms
4: 640x640 (no detections), 53.7ms
5: 640x640 (no detections), 53.7ms
6: 640x640 (no detections), 53.7ms
7: 640x640 (no detections), 53.7ms
8: 640x640 (no detections), 53.7ms
9: 640x640 (no detec

KeyboardInterrupt: 