In [1]:
from pathlib import Path
import cv2 
from ultralytics import YOLO
import torch

In [2]:
print(f"Is CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    

Is CUDA available: False


  return torch._C._cuda_getDeviceCount() > 0


In [3]:
# yolo11n.pt  - Nano
# yolo11s.pt  - Small
# yolo11m.pt  - Medium 
# yolo11l.pt  - Large 
# yolo11x.pt  - Extra Large

device = "cuda:0" if torch.cuda.is_available() else "cpu"
model = YOLO("yolo11x.pt")
model.to(device)

YOLO(
  (model): DetectionModel(
    (model): Sequential(
      (0): Conv(
        (conv): Conv2d(3, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(96, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (1): Conv(
        (conv): Conv2d(96, 192, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(192, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (2): C3k2(
        (cv1): Conv(
          (conv): Conv2d(192, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(192, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU(inplace=True)
        )
        (cv2): Conv(
          (conv): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(384, eps=0.001, momentum=0.03, affine=True, track_

In [4]:
video_path = "/home/marco/Documents/smartcooler-camera-monitor/clips/cheetos_crunchy_56.7g_v1-20250908T143944Z-1-001/cheetos_crunchy_56.7g_v1/clip_cam2_cheetos_crunchy_56.7g_v1_20250829_095416.mp4"
video = Path(video_path)
cap = cv2.VideoCapture(video)


In [5]:
# cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
# cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)

width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
print(f"Resolution: {width}x{height}")

original_fps = cap.get(cv2.CAP_PROP_FPS)
print(f"Original fps: {original_fps}")


Resolution: 640x480
Original fps: 30.0


In [6]:
if not cap.isOpened():
    print("Could not open the video")
else:

    paused = False
    frame_count = 0
    
    while True:
        if not paused:
            ret, frame = cap.read()
            if not ret:
                # Restart video
                cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
                continue
            
            frame_count += 1
            
            # Detection with YOLO
            results = model(frame, verbose=False)  
            
            # Draw detection
            annotated_frame = results[0].plot()
            
            detections = results[0].boxes
            if detections is not None and len(detections) > 0:
                detection_info = f"Frame {frame_count}: {len(detections)} objects found"
                cv2.putText(annotated_frame, detection_info, (10, 30), 
                           cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
                
                for i, box in enumerate(detections):
                    class_id = int(box.cls[0])
                    confidence = float(box.conf[0])
                    class_name = model.names[class_id]
                    if confidence > 0.5:  
                        print(f"  {class_name}: {confidence:.2f}")
            
            cv2.imshow('Smart Cooler Tests - YOLO Detection', annotated_frame)
        
        key = cv2.waitKey(33) & 0xFF
        if key == ord('q'):
            break
        elif key == ord('p'):
            paused = not paused

    cap.release()
    cv2.destroyAllWindows()
    print("Video closed")

  handbag: 0.59
  handbag: 0.75
  handbag: 0.74
  handbag: 0.75
  handbag: 0.75
  handbag: 0.75
  handbag: 0.75
  handbag: 0.75
  handbag: 0.75
  handbag: 0.75
Video closed
