In [1]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.69-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.69-py3-none-any.whl (914 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m914.6/914.6 kB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.14-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.69 ultralytics-thop-2.0.14


In [2]:
import cv2
from ultralytics import YOLO

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [3]:
def process_video(input_path, output_path, model_path):
    model = YOLO(model_path)
    cap = cv2.VideoCapture(input_path)
    
    # Optimization parameters
    frame_skip = 3
    resize_width = 640
    
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) * resize_width / cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) * resize_width / cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    fps = cap.get(cv2.CAP_PROP_FPS)
    
    out = cv2.VideoWriter(output_path, 
                          cv2.VideoWriter_fourcc(*'mp4v'), 
                          fps, (resize_width, height))
    
    eye_closed_frames = 0
    drowsy_threshold = 15
    frame_count = 0
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        frame_count += 1
        if frame_count % frame_skip != 0:
            continue
        
        frame = cv2.resize(frame, (resize_width, height))
        
        # Inference handling
        results = model(frame, 
                        conf=0.5,    
                        iou=0.45,    
                        max_det=3)[0]  
        
        eyes_closed = False
        yawning = False
        
        for box in results.boxes:
            cls = int(box.cls[0])
            if cls == 0:  # eyes_closed
                eyes_closed = True
            elif cls == 2:  # yawning
                yawning = True
        
        if eyes_closed or yawning:
            eye_closed_frames += 1
            if eye_closed_frames > drowsy_threshold:
                cv2.putText(frame, "DROWSY", (50, 50), 
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        else:
            eye_closed_frames = 0
        
        for box in results.boxes:
            cls = int(box.cls[0])
            label = model.names[cls]
            conf = float(box.conf[0])
            x1, y1, x2, y2 = box.xyxy[0]
            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), 
                          (0, 255, 0), 2)
            cv2.putText(frame, f"{label} {conf:.2f}", 
                        (int(x1), int(y1-10)), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
        
        out.write(frame)
    
    cap.release()
    out.release()

In [4]:
input_video = '/kaggle/input/testing-video/test_video.mp4'
output_video = '/kaggle/working/output_video.mp4'
model_path = '/kaggle/input/yolo-model/pytorch/default/1/best.pt'

process_video(input_video, output_video, model_path)


0: 384x640 1 eyes_closed, 75.9ms
Speed: 6.2ms preprocess, 75.9ms inference, 249.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 eyes_closed, 8.6ms
Speed: 1.1ms preprocess, 8.6ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 eyes_closed, 8.6ms
Speed: 1.2ms preprocess, 8.6ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 eyes_closed, 8.8ms
Speed: 1.1ms preprocess, 8.8ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 eyes_closed, 7.9ms
Speed: 1.1ms preprocess, 7.9ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 eyes_closed, 9.5ms
Speed: 1.1ms preprocess, 9.5ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 eyes_closed, 8.3ms
Speed: 1.2ms preprocess, 8.3ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 eyes_closed, 9.0ms
Speed: 1.2ms preprocess, 9.0ms inference, 1.2ms postprocess 