In [30]:
import os
import time
import subprocess
from pathlib import Path

import torch
import cv2
import urllib.parse
import numpy as np
import matplotlib.pyplot as plt
from ultralytics import YOLO
from IPython.display import display, clear_output
from dotenv import load_dotenv


In [None]:
if not os.path.exists(os.path.join(os.getcwd(), ".env")):
    raise FileNotFoundError("'.env' file not found at current directory.")

load_dotenv()

HOST = os.getenv("HOST")
INPUT_STREAM_NAME = os.getenv("INPUT_STREAM_NAME")
OUT_STREAM_NAME = os.getenv("OUT_STREAM_NAME")

MAX_FRAMES = int(os.getenv("MAX_FRAMES"))


In [None]:
MODEL_PATH = (Path.cwd() / "from_GitHub/dmmmit_smoking_detection/models/final_model.pt").resolve()

print("Model path:", MODEL_PATH)
print("Exists:", MODEL_PATH.exists(), "Is file:", MODEL_PATH.is_file())

assert MODEL_PATH.is_file(), f"Expected a file, got: {MODEL_PATH}"


Model path: /home/aleks/GitHub.Projects/AI_Vision_Smoking/from_GitHub/dmmmit_smoking_detection/models/final_model.pt
Exists: True Is file: True


In [8]:
model = YOLO(str(MODEL_PATH))
model_type = "ultralytics.YOLO"


In [None]:
print("Task:", getattr(model, "task", None))
print("Class names:", getattr(model, "names", None))

core = getattr(model, "model", None)

if core is not None:

    print("Core type:", type(core))
    if hasattr(core, "yaml"):
        print("YAML:", core.yaml)

    if hasattr(core, "stride"):
        print("Stride:", core.stride)

    if hasattr(core, "args"):
        print("Args:", core.args)

    n_params = sum(p.numel() for p in core.parameters())
    print("Param count:", n_params)

    print("Core module:", core)


Task: detect
Class names: {0: 'Person', 1: 'cell phone', 2: 'cigarette', 3: 'hands_with_cigarettes'}
Core type: <class 'ultralytics.nn.tasks.DetectionModel'>
YAML: {'nc': 4, 'depth_multiple': 0.33, 'width_multiple': 0.25, 'backbone': [[-1, 1, 'Conv', [64, 3, 2]], [-1, 1, 'Conv', [128, 3, 2]], [-1, 3, 'C2f', [128, True]], [-1, 1, 'Conv', [256, 3, 2]], [-1, 6, 'C2f', [256, True]], [-1, 1, 'Conv', [512, 3, 2]], [-1, 6, 'C2f', [512, True]], [-1, 1, 'Conv', [1024, 3, 2]], [-1, 3, 'C2f', [1024, True]], [-1, 1, 'SPPF', [1024, 5]]], 'head': [[-1, 1, 'nn.Upsample', ['None', 2, 'nearest']], [[-1, 6], 1, 'Concat', [1]], [-1, 3, 'C2f', [512]], [-1, 1, 'nn.Upsample', ['None', 2, 'nearest']], [[-1, 4], 1, 'Concat', [1]], [-1, 3, 'C2f', [256]], [-1, 1, 'Conv', [256, 3, 2]], [[-1, 12], 1, 'Concat', [1]], [-1, 3, 'C2f', [512]], [-1, 1, 'Conv', [512, 3, 2]], [[-1, 9], 1, 'Concat', [1]], [-1, 3, 'C2f', [1024]], [[15, 18, 21], 1, 'Detect', ['nc']]], 'ch': 3}
Stride: tensor([ 8., 16., 32.])
Args: {'task': 

In [None]:
# Input/output probe

DUMMY_H = 640
DUMMY_W = 640

dummy = np.zeros((DUMMY_H, DUMMY_W, 3), dtype=np.uint8)
print("Input dummy shape:", dummy.shape, "dtype:", dummy.dtype)

results = model.predict(dummy, verbose=False)
r = results[0]

print("Output: boxes.xyxy", r.boxes.xyxy.shape)
print("Output: boxes.conf", r.boxes.conf.shape)
print("Output: boxes.cls", r.boxes.cls.shape)

if r.masks is not None:
    print("Output: masks", r.masks.data.shape)


Input dummy shape: (640, 640, 3) dtype: uint8
Output: boxes.xyxy torch.Size([1, 4])
Output: boxes.conf torch.Size([1])
Output: boxes.cls torch.Size([1])


In [29]:
# go2rtc отдаёт rtsp на порту 8554

STREAM_URL = f"rtsp://{HOST}:8554/{urllib.parse.quote(INPUT_STREAM_NAME)}"
print("STREAM_URL:", STREAM_URL)

# иногда помогает для RTSP в OpenCV/FFMPEG
os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "rtsp_transport;tcp|max_delay;500000|stimeout;5000000"


STREAM_URL: rtsp://100.70.149.57:8554/cam_11_%D1%83%D0%BB%D0%B8%D1%86%D0%B0


In [None]:
OUT_STREAM_URL = f"rtsp://{HOST}:8554/{OUT_STREAM_NAME}"

# эти значения заполним после первого кадра
W = 0
H = 0
FPS = 10  # можно 10-15 для стабильности

ffmpeg_proc = None


In [None]:
CONF = 0.25
IMG_SIZE = 640
FRAME_STRIDE = 1


In [None]:
video_capture = cv2.VideoCapture(STREAM_URL, cv2.CAP_FFMPEG)
video_capture.set(cv2.CAP_PROP_BUFFERSIZE, 1)

if not video_capture.isOpened():
    raise RuntimeError(f"RTSP stream not opened: {STREAM_URL}")


In [None]:
def iou(boxA, boxB):
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    inter = max(0, xB - xA) * max(0, yB - yA)

    if inter == 0:
        return 0.0

    areaA = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
    areaB = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])

    return inter / (areaA + areaB - inter + 1e-9)


In [None]:
def smoking_score(results):
    # Эвристика: максимум из
    # - conf у класса hands_with_cigarettes
    # - conf у сигареты, которая пересекается с человеком
    names = results[0].names
    boxes = results[0].boxes

    persons = []
    cigarettes = []
    hands = []

    for b in boxes:
        cls_id = int(b.cls.item())
        conf = float(b.conf.item())
        name = names.get(cls_id, str(cls_id))
        xyxy = b.xyxy[0].cpu().numpy().tolist()

        if name == "Person":
            persons.append((xyxy, conf))

        elif name == "cigarette":
            cigarettes.append((xyxy, conf))

        elif name == "hands_with_cigarettes":
            hands.append(conf)

    score = max(hands) if hands else 0.0

    for c_box, c_conf in cigarettes:
        for p_box, _ in persons:
            if iou(c_box, p_box) > 0.01:
                score = max(score, c_conf)

    return score


In [None]:
frame_idx = 0
shown = 0

while shown < MAX_FRAMES:

    is_video_capture_read, frame = video_capture.read()

    if not is_video_capture_read:
        print("Stream ended or read failed.")
        break

    if frame_idx % FRAME_STRIDE != 0:
        frame_idx += 1
        continue

    results = model.predict(frame, conf=CONF, imgsz=IMG_SIZE, verbose=False)
    annotated = results[0].plot()

    # список детекций с вероятностями
    detections = []

    for b in results[0].boxes:
        cls_id = int(b.cls.item())
        conf = float(b.conf.item())
        name = results[0].names.get(cls_id, str(cls_id))
        detections.append(f"{name}:{conf:.2f}")

    smoke_prob = smoking_score(results)
    text = f"smoking_prob (heuristic): {smoke_prob:.2f}"

    # рисуем текст поверх кадра
    cv2.putText(
            annotated, text, (10, 30),
            cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 255), 2, cv2.LINE_AA
            )

    # annotated — это BGR numpy array от results[0].plot()

    if ffmpeg_proc is None:
        H, W = annotated.shape[:2]

        cmd = [
                "ffmpeg", "-re",
                "-f", "rawvideo",
                "-pix_fmt", "bgr24",
                "-s", f"{W}x{H}",
                "-r", str(FPS),
                "-i", "-",
                "-an",
                "-c:v", "libx264",
                "-preset", "veryfast",
                "-tune", "zerolatency",
                "-pix_fmt", "yuv420p",
                "-profile:v", "baseline",
                "-g", "30",
                "-bf", "0",
                "-f", "rtsp",
                "-rtsp_flags", "listen",
                OUT_STREAM_URL
                ]

        ffmpeg_proc = subprocess.Popen(cmd, stdin=subprocess.PIPE)

    # отправляем кадр в RTSP
    ffmpeg_proc.stdin.write(annotated.tobytes())

    # clear_output(wait=True)
    # plt.figure(figsize=(8, 5))
    # plt.imshow(cv2.cvtColor(annotated, cv2.COLOR_BGR2RGB))
    # plt.axis("off")
    # display(plt.gcf())
    # plt.close()

    print("Detections:", ", ".join(detections) if detections else "none")
    shown += 1
    frame_idx += 1

video_capture.release()
print("Done.")


In [None]:
if ffmpeg_proc is not None:
    ffmpeg_proc.stdin.close()
    ffmpeg_proc.terminate()
    ffmpeg_proc.wait()
