In [2]:
import cv2
from ultralytics import YOLO
import pandas as pd
import json
import os
import time

duration_sec = 30  
output_video_path = "output_live.mp4"
output_csv_path = "results_live.csv"
output_json_path = "results_live.json"

model = YOLO("yolov8n.pt")
cap = cv2.VideoCapture(0)

target_size = (320, 180)
fps = cap.get(cv2.CAP_PROP_FPS) or 30 

out = cv2.VideoWriter(output_video_path,
                      cv2.VideoWriter_fourcc(*'mp4v'),
                      fps,
                      target_size)

log_data = []
frame_count = 0
start_time = time.time()

while cap.isOpened():
    ret, frame = cap.read()
    if not ret or (time.time() - start_time) > duration_sec:
        break

    frame = cv2.resize(frame, target_size)
    frame_count += 1
    timestamp = round(time.time() - start_time, 2)

    results = model.predict(source=frame, device='cpu')[0]

    car_count = 0
    bus_count = 0
    truck_count = 0
    person_count = 0

    for box in results.boxes.data.tolist():
        x1, y1, x2, y2, conf, cls = box
        cls = int(cls)
        label = None
        color = (255, 255, 255)

        if cls == 0:
            person_count += 1
            label = f"Person {conf:.2f}"
            color = (255, 255, 0)
        elif cls == 2:
            car_count += 1
            label = f"Car {conf:.2f}"
            color = (0, 255, 0)
        elif cls == 5:
            bus_count += 1
            label = f"Bus {conf:.2f}"
            color = (0, 165, 255)
        elif cls == 7:
            truck_count += 1
            label = f"Truck {conf:.2f}"
            color = (255, 0, 0)

        if label:
            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), color, 1)
            cv2.putText(frame, label, (int(x1), int(y1) - 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.4, color, 1)

    out.write(frame)

    log_data.append({
        "timestamp_sec": timestamp,
        "car_count": car_count,
        "bus_count": bus_count,
        "truck_count": truck_count,
        "person_count": person_count
    })

cap.release()
out.release()

df = pd.DataFrame(log_data)
df.to_csv(output_csv_path, index=False)

with open(output_json_path, 'w') as f:
    json.dump(log_data, f, indent=4)

print(f"Done capturing live feed!")
print(f"Video saved to {output_video_path}")
print(f"CSV: {output_csv_path}")
print(f"JSON: {output_json_path}")



0: 384x640 2 persons, 73.4ms
Speed: 6.8ms preprocess, 73.4ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 79.1ms
Speed: 3.0ms preprocess, 79.1ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 80.6ms
Speed: 5.2ms preprocess, 80.6ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 72.1ms
Speed: 2.5ms preprocess, 72.1ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 72.7ms
Speed: 1.9ms preprocess, 72.7ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 63.8ms
Speed: 2.8ms preprocess, 63.8ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 71.3ms
Speed: 3.1ms preprocess, 71.3ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 56.1ms
Speed: 1.6ms preprocess, 56.1ms inference, 1.1ms postprocess per image at shape (

Speed: 2.1ms preprocess, 64.9ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 63.1ms
Speed: 2.3ms preprocess, 63.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 62.0ms
Speed: 2.1ms preprocess, 62.0ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 76.3ms
Speed: 2.0ms preprocess, 76.3ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 75.2ms
Speed: 2.1ms preprocess, 75.2ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 61.6ms
Speed: 1.7ms preprocess, 61.6ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 58.2ms
Speed: 2.0ms preprocess, 58.2ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 58.8ms
Speed: 2.2ms preprocess, 58.8ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 


0: 384x640 2 persons, 76.7ms
Speed: 1.8ms preprocess, 76.7ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 72.2ms
Speed: 1.8ms preprocess, 72.2ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 76.2ms
Speed: 2.2ms preprocess, 76.2ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 92.0ms
Speed: 2.4ms preprocess, 92.0ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 104.6ms
Speed: 3.1ms preprocess, 104.6ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 93.7ms
Speed: 2.8ms preprocess, 93.7ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 112.8ms
Speed: 3.5ms preprocess, 112.8ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 145.4ms
Speed: 2.3ms preprocess, 145.4ms inference, 2.1ms postprocess per image at s


0: 384x640 3 persons, 83.5ms
Speed: 2.4ms preprocess, 83.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 79.4ms
Speed: 3.0ms preprocess, 79.4ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 97.7ms
Speed: 4.3ms preprocess, 97.7ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 1 donut, 93.8ms
Speed: 2.7ms preprocess, 93.8ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 84.8ms
Speed: 3.4ms preprocess, 84.8ms inference, 2.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 152.1ms
Speed: 4.6ms preprocess, 152.1ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 117.2ms
Speed: 2.9ms preprocess, 117.2ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 114.1ms
Speed: 4.2ms preprocess, 114.1ms inference, 1.7ms postprocess per i


0: 384x640 2 persons, 128.5ms
Speed: 4.5ms preprocess, 128.5ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 135.1ms
Speed: 5.0ms preprocess, 135.1ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 112.4ms
Speed: 2.9ms preprocess, 112.4ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 149.2ms
Speed: 2.3ms preprocess, 149.2ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 1 cell phone, 175.3ms
Speed: 4.8ms preprocess, 175.3ms inference, 4.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 1 cell phone, 119.1ms
Speed: 3.8ms preprocess, 119.1ms inference, 2.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 1 cell phone, 164.9ms
Speed: 4.7ms preprocess, 164.9ms inference, 5.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 tv, 1 cell phone, 121.3ms
Speed: