In [2]:
!nvidia-smi

/bin/bash: line 1: nvidia-smi: command not found


In [3]:
!pip install gdown



In [4]:
! rm -rf video.mp4

In [5]:
import gdown

# Update with your file's specific ID
file_id = "1Qi2KRxQoLrv0_ukVkTBP6xLm4uGykihz"
url = f"https://drive.google.com/uc?id={file_id}"

output = "safety_detection.mp4"
gdown.download(url, output, quiet=False)

Downloading...
From: https://drive.google.com/uc?id=1Qi2KRxQoLrv0_ukVkTBP6xLm4uGykihz
To: /kaggle/working/safety_detection.mp4
100%|██████████| 192k/192k [00:00<00:00, 58.1MB/s]


'safety_detection.mp4'

In [6]:
import os
HOME = os.getcwd()
print(HOME)

/kaggle/working


In [7]:
SOURCE_VIDEO_PATH = "/kaggle/working/safety_detection.mp4"

In [8]:
# Pip install method (recommended)

!pip install "ultralytics<=8.3.40"

from IPython import display
display.clear_output()

import ultralytics
ultralytics.checks()

Ultralytics 8.3.40 🚀 Python-3.11.11 torch-2.6.0+cu124 CPU (Intel Xeon 2.20GHz)
Setup complete ✅ (4 CPUs, 31.4 GB RAM, 6362.0/8062.4 GB disk)


In [10]:
MODEL = "/kaggle/input/safety-detection-model/ppe.pt"

In [11]:
from ultralytics import YOLO

model = YOLO(MODEL)
model.fuse()

Model summary (fused): 268 layers, 43,614,318 parameters, 0 gradients, 164.9 GFLOPs


In [12]:
!pip install supervision==0.3.0

Collecting supervision==0.3.0
  Downloading supervision-0.3.0-py3-none-any.whl.metadata (6.4 kB)
Downloading supervision-0.3.0-py3-none-any.whl (21 kB)
Installing collected packages: supervision
Successfully installed supervision-0.3.0


In [16]:
import supervision as sv
from ultralytics import YOLO
import os
import json
import cv2  # OpenCV for image saving
import numpy as np

# -------------------------------
# CONFIGURATION
# -------------------------------
TARGET_VIDEO_PATH = 'output_video.mp4'
FRAME_SAVE_DIR = 'frames/'  # Directory to save frames
FRAME_DATA_PATH = 'frame_data.json'  # JSON file to save frame data

# All classes in our custom model
classNames = [
    'Hardhat', 'Mask', 'NO-Hardhat', 'NO-Mask', 'NO-Safety Vest',
    'Person', 'Safety Cone', 'Safety Vest', 'machinery', 'vehicle'
]

SAFETY_CLASSES = {"Hardhat", "Mask", "Safety Vest"}
UNSAFE_CLASSES = {"NO-Hardhat", "NO-Mask", "NO-Safety Vest"}

# -------------------------------
# INITIAL SETUP
# -------------------------------
os.makedirs(FRAME_SAVE_DIR, exist_ok=True)

video_info = sv.VideoInfo.from_video_path(SOURCE_VIDEO_PATH)
box_annotator = sv.BoxAnnotator(thickness=2, text_thickness=2, text_scale=1)

id_counter = 1
id_map = {}
frame_data_list = []

# -------------------------------
# UTILITY FUNCTION
# -------------------------------
def get_class_name(class_id):
    return classNames[class_id] if class_id < len(classNames) else f"Class {class_id}"

# -------------------------------
# PROCESSING FRAMES
# -------------------------------
with sv.VideoSink(TARGET_VIDEO_PATH, video_info) as sink:
    for frame_number, result in enumerate(
        model.track(
            source=SOURCE_VIDEO_PATH,
            tracker='bytetrack.yaml',
            show=False,
            stream=True,
            agnostic_nms=True,
            persist=True
        )
    ):
        frame = result.orig_img
        detections = sv.Detections.from_yolov8(result)

        if result.boxes.id is not None:
            ids = result.boxes.id.cpu().numpy().astype(int)
            class_ids = result.boxes.cls.cpu().numpy().astype(int)
            confs = result.boxes.conf.cpu().numpy()
            boxes = result.boxes.xyxy.cpu().numpy()

            detected = []

            for i in range(len(ids)):
                tracker_id = ids[i]
                class_id = class_ids[i]
                confidence = float(confs[i])
                bbox = list(map(int, boxes[i]))
                class_name = get_class_name(class_id)

                # Assign new sequential ID
                if tracker_id not in id_map:
                    id_map[tracker_id] = id_counter
                    id_counter += 1
                new_id = id_map[tracker_id]

                detected.append({
                    "id": new_id,
                    "tracker_id": tracker_id,
                    "class_id": class_id,
                    "class_name": class_name,
                    "confidence": confidence,
                    "bbox": bbox
                })

           
            # Separate people and other objects
            people = [d for d in detected if d["class_name"] == "Person"]
            objects = [d for d in detected if d["class_name"] != "Person"]

            people_info = []

            for person in people:
                tid = person["tracker_id"]
                new_id = id_map[tid]
                person_bbox = person["bbox"]
            
                # Initialize
                person_state = {
                    "hardhat": None,
                    "mask": None,
                    "safety_vest": None
                }

                # Check which objects are near the person’s bounding box
                for obj in objects:
                    obj_class = obj["class_name"]
                    obj_bbox = obj["bbox"]
            
                    # IoU or proximity check — here we use simple overlap
                    px1, py1, px2, py2 = person_bbox
                    ox1, oy1, ox2, oy2 = obj_bbox
            
                    is_near = not (ox2 < px1 or ox1 > px2 or oy2 < py1 or oy1 > py2)
            
                    if is_near:
                        if obj_class == "Hardhat":
                            person_state["hardhat"] = True
                        elif obj_class == "NO-Hardhat":
                            person_state["hardhat"] = False
                        elif obj_class == "Mask":
                            person_state["mask"] = True
                        elif obj_class == "NO-Mask":
                            person_state["mask"] = False
                        elif obj_class == "Safety Vest":
                            person_state["safety_vest"] = True
                        elif obj_class == "NO-Safety Vest":
                            person_state["safety_vest"] = False

                # Determine safety status
                missing_items = [item for item, value in person_state.items() if value != True]
                status = "Safe" if not missing_items else "Unsafe"
                color = (0, 255, 0) if status == "Safe" else (0, 0, 255)
            
                label = f"ID {new_id} | {status}"
                cv2.rectangle(frame, (person_bbox[0], person_bbox[1]), (person_bbox[2], person_bbox[3]), color, 2)
                cv2.putText(frame, label, (person_bbox[0], person_bbox[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
            
                person_state.update({
                    "tracker_id": new_id,
                    "safety_status": status,
                    "missing_items": missing_items,
                    "bbox": person_bbox
                })
            
                people_info.append(person_state)

            # Save frame
            frame_path = os.path.join(FRAME_SAVE_DIR, f"frame_{frame_number:04d}.jpg")
            cv2.imwrite(frame_path, cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))

            # Save JSON info
            frame_data_list.append({
                "frame_number": frame_number,
                "people": people_info
            })

            # Write annotated frame to video
            sink.write_frame(frame)

print("Safety detection and video generation completed.")

# -------------------------------
# SAVE FRAME DATA TO JSON
# -------------------------------
with open(FRAME_DATA_PATH, 'w') as json_file:
    json.dump(frame_data_list, json_file, indent=4)

print(f"Frames saved to '{FRAME_SAVE_DIR}' and data saved to '{FRAME_DATA_PATH}'.")



video 1/1 (frame 1/56) /kaggle/working/safety_detection.mp4: 384x640 1 Hardhat, 1 NO-Mask, 1 NO-Safety Vest, 1 Person, 1412.7ms
video 1/1 (frame 2/56) /kaggle/working/safety_detection.mp4: 384x640 1 Hardhat, 1 Person, 1353.7ms
video 1/1 (frame 3/56) /kaggle/working/safety_detection.mp4: 384x640 1 Hardhat, 1 Person, 1395.8ms
video 1/1 (frame 4/56) /kaggle/working/safety_detection.mp4: 384x640 1 Hardhat, 1 Person, 1379.0ms
video 1/1 (frame 5/56) /kaggle/working/safety_detection.mp4: 384x640 1 Hardhat, 1 Person, 1414.9ms
video 1/1 (frame 6/56) /kaggle/working/safety_detection.mp4: 384x640 1 Hardhat, 1 Person, 1381.0ms
video 1/1 (frame 7/56) /kaggle/working/safety_detection.mp4: 384x640 1 Hardhat, 1 Person, 1363.9ms
video 1/1 (frame 8/56) /kaggle/working/safety_detection.mp4: 384x640 1 Hardhat, 1 Person, 1374.1ms
video 1/1 (frame 9/56) /kaggle/working/safety_detection.mp4: 384x640 1 Hardhat, 1 NO-Safety Vest, 1 Person, 1369.7ms
video 1/1 (frame 10/56) /kaggle/working/safety_detection.mp4: