## Initialize Environment


In [1]:
!python -m venv venv

Remember to change the kernel to the venv


In [None]:
%pip install opencv-python numpy Pillow ultralytics

## Import Dependencies


In [2]:
import cv2
from ultralytics import YOLO
from PIL import Image

In [3]:
SIZE = (WIDTH, HEIGHT) = (640, 480)
CLS_MODEL = r'models\1014\weights\last.pt'
SEG_MODEL = 'yolov8s-seg.pt'
SEG_KWARG = {
    'classes':  0,
    'imgsz': WIDTH,
    'boxes': True,
    'save': False,
    'show_labels': False,
    'show_conf': False,
    'max_det': 1,
    'verbose': False
}

## Define Functions


In [4]:
def init_models(cls=CLS_MODEL, seg=SEG_MODEL):
    global img_seg_model, img_class_model
    img_seg_model = YOLO(seg)
    img_class_model = YOLO(cls)

In [5]:
def pre_process(image, size=SIZE):
    if image is None:
        print(f"Unable to read image")
        return None

    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # image = cv2.resize(image, size)

    return image

In [6]:
def apply_mask(image, _kwarg=SEG_KWARG):
    global img_seg_model

    # transform image from matlike to pil image
    image = Image.fromarray(image)

    image = img_seg_model(image, **_kwarg)

    return image[0].plot(conf=False, labels=False, pil=True)

In [7]:
def predict_distraction(image):
    global img_class_model, CLASS_NAMES

    img = pre_process(image)

    img = apply_mask(img)

    cls = img_class_model(img, verbose=False)[0].probs.top1

    # take the class name
    cls = CLASS_NAMES[cls]

    return img, cls

## MAIN


In [None]:
# Initialize the camera
cap = cv2.VideoCapture(0)

# Calculate fps to wait between model predictions
times_per_second = 1
fps = cap.get(cv2.CAP_PROP_FPS)
wait = fps//times_per_second

# Initialize the models
init_models()

# Class names
CLASS_NAMES = {
    0: 'good ',
    1: 'texting ',
    2: 'talking ',
    3: 'radio ',
    4: 'drink ',
    5: 'behind ',
    6: 'passenger '
}

# Start the loop
frames = 0
cls = 'None'
while True:

    # Read the frame
    ret, frame = cap.read()

    if not ret:
        print("No more frames")
        break

    frame = cv2.resize(frame, SIZE)

    # Predict the distraction
    if frames % wait == 0:
        frame, cls = predict_distraction(frame)

    # Put the class name on the frame and show it
    cv2.putText(frame, cls + str(frames), (10, 50),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    cv2.imshow('frame', frame)

    framef = frame
    frames += 1

    # Press q to early stop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()