In [16]:
import cv2
import numpy as np
import onnxruntime as ort
from typing import List

class DistractionDetector:
    def __init__(self, model_path: str, input_name: str, input_width: int, input_height: int):
        self.session = ort.InferenceSession(model_path, providers=["CUDAExecutionProvider"])
        self.input_name = input_name
        self.input_width = input_width
        self.input_height = input_height

        self.class_dict = {
            0: "safe driving",
            1: "texting",
            2: "talking on the phone",
            3: "drinking",
            4: "reaching behind",
        }
        self.outputs_softmax = []

    def softmax(self, x: np.ndarray, axis: int) -> np.ndarray:
        x = x - np.max(x, axis=axis, keepdims=True)
        exp_x = np.exp(x)
        return exp_x / np.sum(exp_x, axis=axis, keepdims=True)

    def run_inference(self, frame: np.ndarray) -> List[float]:
        # Convert to grayscale (single channel)
        #Convert to grayscale
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)  # Shape: (H, W)
        frame_gray = np.expand_dims(frame_gray, axis=-1)  # Shape: (H, W, 1), matches Torchvision Grayscale

        # Resize to 224x224
        resized_frame = cv2.resize(frame_gray, (self.input_width, self.input_height))

        # Convert to float32 and normalize to [-1, 1] (same as Torchvision Normalize((0.5,), (0.5,)))
        normalized_frame = resized_frame.astype(np.float32) / 255.0  # Scale to [0,1]
        normalized_frame = (normalized_frame - 0.5) / 0.5  # Normalize to [-1, 1]

        # Expand grayscale image to 3 channels (Replicating it across 3 channels)
        expanded_frame = np.repeat(normalized_frame[:, :, np.newaxis], 3, axis=-1)

        # Convert to (C, H, W) format for the model
        chw_frame = np.transpose(expanded_frame, (2, 0, 1))

        # Convert to tensor format for ONNX inference
        input_tensor = np.expand_dims(chw_frame, axis=0).astype(np.float32)

        # Display the grayscale normalized image before inference        display_frame = (((normalized_frame ) * 255)).astype(np.uint16)  # Convert back to [0,255] for display
    

        # Run inference
        outputs = self.session.run(None, {self.input_name: input_tensor})
        logits = outputs[0][0]
        self.outputs_softmax = self.softmax(logits, axis=0).tolist()
        return self.outputs_softmax

    def display_probabilities(self, probabilities: List[float]):
        for class_index, class_name in self.class_dict.items():
            print(f"{class_name}: {probabilities[class_index]:.4f}")

if __name__ == "__main__":
    model_path = "/home/theosiam/Repos/Autotrust/Autotrust/Driver_Distraction_Detection/Onnx_versions/onnx_version5.onnx"
    input_name = "input"
    input_width, input_height = 224, 224
    detector = DistractionDetector(model_path, input_name, input_width, input_height)

    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Error: Could not open webcam.")
        exit()

    while True:
        ret, frame = cap.read()
        if not ret:
            print("Error: Failed to capture image.")
            break

        probabilities = detector.run_inference(frame)
        label = max(detector.class_dict, key=lambda i: probabilities[i])
        text = f"{detector.class_dict[label]} ({probabilities[label]:.2f})"
        cv2.putText(frame, text, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.imshow("Distraction Detector", frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()


[1;31m2025-01-31 15:07:49.429923553 [E:onnxruntime:Default, provider_bridge_ort.cc:1992 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1637 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcudnn.so.9: cannot open shared object file: No such file or directory
[m
[0;93m2025-01-31 15:07:49.429941083 [W:onnxruntime:Default, onnxruntime_pybind_state.cc:965 CreateExecutionProviderInstance] Failed to create CUDAExecutionProvider. Require cuDNN 9.* and CUDA 12.*. Please install all dependencies as mentioned in the GPU requirements page (https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements), make sure they're in the PATH, and that your GPU is supported.[m
