In [1]:
import cv2 
from ultralytics import YOLO 
from collections import defaultdict

model = YOLO('Sacks_detector.pt')

In [2]:
class_list = model.names
class_list

{0: 'SACKS', 1: 'White sack', 2: 'green sack'}

In [3]:
input_path = 'Test_video\Problem Statement Scenario2.mp4'
cap = cv2.VideoCapture(input_path)

In [5]:
import cv2
from ultralytics import YOLO
from collections import defaultdict
import os

# 1. SETUP MODEL
model = YOLO('Sacks_detector.pt')
class_list = model.names

# 2. OPEN INPUT VIDEO
input_path = 'Test_video/Problem Statement Scenario2.mp4' # Ensure path uses forward slashes or double backslashes
cap = cv2.VideoCapture(input_path)

# 3. SETUP OUTPUT VIDEO SAVING
# Create the directory if it doesn't exist
if not os.path.exists('Output_video'):
    os.makedirs('Output_video')

# Get video properties to ensure output matches input
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

# Initialize VideoWriter
# We use 'mp4v' for MP4 format. 
output_path = 'Output_video/output_sacks_detected.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

line_y_red = 430 
class_counts = defaultdict(int)
crossed_ids = set()

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break 

    # Run tracking
    results = model.track(frame, persist=True)
    
    # Check if there are detections AND if track IDs exist
    if results[0].boxes.data is not None and results[0].boxes.id is not None:
        boxes = results[0].boxes.xyxy.cpu()
        class_indices = results[0].boxes.cls.int().cpu().tolist()
        confidence = results[0].boxes.conf.cpu()
        
        # --- FIX: Get the unique Track IDs ---
        track_ids = results[0].boxes.id.int().cpu().tolist()
        
        cv2.line(frame, (690, line_y_red), (1130, line_y_red), (0, 0, 255), 3)
        
        # Zip track_ids along with other data
        for box, class_idx, conf, track_id in zip(boxes, class_indices, confidence, track_ids):
            x1, y1, x2, y2 = map(int, box)
            cx = (x1 + x2) // 2
            cy = (y1 + y2) // 2
            
            class_name = class_list[class_idx]
            
            cv2.circle(frame, (cx, cy), 4, (0, 0, 255), -1)
            
            # Display Track ID and Class Name
            cv2.putText(frame, f"ID:{track_id} {class_name}", (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            
            # --- FIX: Use track_id for counting, not class_idx ---
            if cy > line_y_red and track_id not in crossed_ids:
                crossed_ids.add(track_id)
                class_counts[class_name] += 1
        
    y_offset = 30
    for class_name, count in class_counts.items():
        cv2.putText(frame, f"{class_name}: {count}", (50, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
        y_offset += 30
        
    # 4. SAVE THE FRAME
    out.write(frame)

    cv2.imshow("YOLO Object tracking and Counting", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break 

cap.release()
out.release() # Release the VideoWriter
cv2.destroyAllWindows()


0: 640x384 1 White sack, 66.8ms
Speed: 28.0ms preprocess, 66.8ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 White sack, 126.2ms
Speed: 3.5ms preprocess, 126.2ms inference, 5.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 White sack, 140.3ms
Speed: 7.8ms preprocess, 140.3ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 White sack, 129.4ms
Speed: 6.2ms preprocess, 129.4ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 green sack, 164.6ms
Speed: 6.6ms preprocess, 164.6ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 2 White sacks, 1 green sack, 129.9ms
Speed: 5.5ms preprocess, 129.9ms inference, 2.7ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 2 green sacks, 156.9ms
Speed: 7.0ms preprocess, 156.9ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 green sack, 129.6ms
Speed: 5.8ms preprocess, 12