In [None]:
!python --version

Python 3.12.11


In [None]:
!nvidia-smi

Wed Sep  3 14:31:44 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   34C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [1]:
!pip install rtmlib -i https://pypi.org/simple
!pip install onnxruntime-gpu



### Code

In [3]:
import os
import cv2
import numpy as np
import threading
import time
import argparse
from datetime import datetime
import pickle
import gc
from google.colab import output


import tensorflow as tf
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("[INFO] Memory growth has been activated.")
    except RuntimeError as e:
        # Memory growth
        print("[ERROR]:", e)


from rtmlib import Body, draw_skeleton

from model.model_violence import build_violence_model

frame_buffer = []
buffer_lock = threading.Lock()
stop_event = threading.Event()

def detect_motion(frames, sequence_length, method, threshold):
    if not frames:
        return 0, False

    indices = np.round(np.linspace(0, len(frames) - 1, sequence_length)).astype(int)
    sampled_frames = [frames[i] for i in indices]

    final_score = 0
    motion_values = []

    for i in range(1, len(sampled_frames)):
        frame1 = sampled_frames[i - 1].astype(np.float32)
        frame2 = sampled_frames[i].astype(np.float32)
        diff = np.abs(frame2 - frame1)
        motion_values.append(np.mean(diff))

    if not motion_values:
        return 0, False

    if method == 'average':
        final_score = np.mean(motion_values)
    elif method == 'maximum':
        final_score = np.max(motion_values)

    is_significant = final_score >= threshold
    return final_score, is_significant


def preprocess_streams(frames, body_estimator, sequence_length, final_frame_count, frame_w, frame_h):
    indices = np.round(np.linspace(0, len(frames) - 1, sequence_length)).astype(int)
    sampled_frames = [frames[i] for i in indices]


    resized_frames = [cv2.resize(f, (frame_w, frame_h)) for f in sampled_frames]
    resized_frames_np = np.array(resized_frames, dtype=np.float32)

    diffs = np.abs(resized_frames_np[1:] - resized_frames_np[:-1])
    gray_diff = np.dot(diffs[...,:3], [0.299, 0.587, 0.114])

    grouped_diff_list = []
    for i in range(final_frame_count):
        r, g, b = [gray_diff[i * 3 + j] for j in range(3)]
        grouped_diff_list.append(np.stack([r, g, b], axis=-1))

    diff_input = np.expand_dims(np.stack(grouped_diff_list), axis=0)


    skeleton_frames = []
    for frame_rgb in sampled_frames:
        keypoints, scores = body_estimator(frame_rgb)
        blank_bg = np.zeros_like(frame_rgb)
        skeleton_image = draw_skeleton(blank_bg, keypoints, scores, kpt_thr=0.2, line_width=3, radius=3)
        resized_skel = cv2.resize(skeleton_image, (frame_w, frame_h))
        skeleton_frames.append(resized_skel.astype(np.float32))

    skeleton_frames_np = np.array(skeleton_frames)
    gray_skel = np.dot(skeleton_frames_np[...,:3], [0.299, 0.587, 0.114])
    gray_skel = gray_skel[:final_frame_count * 3]

    grouped_skel_list = []
    for i in range(final_frame_count):
        r, g, b = [gray_skel[i * 3 + j] for j in range(3)]
        grouped_skel_list.append(np.stack([r, g, b], axis=-1))

    skel_input = np.expand_dims(np.stack(grouped_skel_list), axis=0)

    return diff_input, skel_input


def classify_and_get_details(model, diff_input, skel_input, classification_threshold):
    pred_value = model.predict([diff_input, skel_input], verbose=0)[0][0]
    prediction_text = "Violence" if pred_value > classification_threshold else "Non-Violence"
    details = f"({pred_value:.4f})"
    return prediction_text, details


def log_and_print_result(log_path, prediction, details, batch_count, delay_seconds):
    start_second = (batch_count - 1) * delay_seconds
    end_second = batch_count * delay_seconds
    video_time_range = f"{time.strftime('%H:%M:%S', time.gmtime(start_second))}-{time.strftime('%H:%M:%S', time.gmtime(end_second))}"
    console_time = datetime.now().strftime("%H:%M:%S")

    log_entry = f"[{video_time_range}] Prediction: {prediction} {details}\n"
    print_msg = f"[{video_time_range}] Prediction: {prediction} {details}"

    print(print_msg)
    if prediction == "Violence":
        output.eval_js('new Audio("https://upload.wikimedia.org/wikipedia/commons/0/05/Beep-09.ogg").play()')
    with open(log_path, "a") as f:
        f.write(log_entry)


def frame_reader(cap, target_fps):
    delay = 1.0 / target_fps if target_fps > 0 else 0
    MAX_BUFFER_SIZE = target_fps * 5
    while cap.isOpened() and not stop_event.is_set():
        if len(frame_buffer) < MAX_BUFFER_SIZE:
            ret, frame = cap.read()
            if not ret:
                break

            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            with buffer_lock:
                frame_buffer.append(frame_rgb)
        else:
            time.sleep(0.01)



def process_buffer(args, cap, model, body_estimator):
    batch_count = 0
    now = datetime.now().strftime("%Y%m%d_%H%M%S")
    log_path = f"log_{os.path.basename(args.video).split('.')[0]}_{now}.txt"

    with open(log_path, "w") as f:
        f.write(f"Log for {os.path.basename(args.video)} - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")

    next_time = time.time() + args.delay
    while not stop_event.is_set():
        sleep_time = next_time - time.time()
        if sleep_time > 0:
            time.sleep(sleep_time)
        next_time += args.delay

        with buffer_lock:
            current_frames = frame_buffer.copy()
            frame_buffer.clear()



        if not cap.isOpened() and not current_frames:
            stop_event.set()
            continue
        if 0 < len(current_frames) < args.sequence_length:
            padding_needed = args.sequence_length - len(current_frames)



            h, w, c = current_frames[0].shape
            black_frame = np.zeros((h, w, c), dtype=np.uint8)


            padding_frames = [black_frame] * padding_needed


            current_frames.extend(padding_frames)

        if not current_frames:
            cap.release()
            print("[INFO] Buffer empty")
            continue

        motion_score, is_significant = detect_motion(
            current_frames, args.sequence_length, args.motion_method, args.motion_threshold
        )

        if not is_significant:
            prediction = "Non-Violence"
            details = f"(No Motion: {motion_score:.4f})"
        else:
            diff_input, skel_input = preprocess_streams(
                current_frames, body_estimator, args.sequence_length,
                args.final_frames, args.frame_width, args.frame_height
            )
            prediction, details = classify_and_get_details(
                model, diff_input, skel_input, args.class_threshold
            )

        batch_count += 1
        log_and_print_result(log_path, prediction, details, batch_count, args.delay)

def main(args):
    global frame_buffer, stop_event
    frame_buffer = []
    stop_event = threading.Event()

    if not os.path.exists(args.video):
        print(f"[ERROR] File not found: {args.video}")
        return

    model = build_violence_model(args.model_path)
    body_estimator = Body(pose='rtmo', to_openpose=False, mode='lightweight', backend='onnxruntime', device='cuda')

    print("\n⚙️ Warming up Keras model...")
    dummy_raw = np.zeros((1, 16, 96, 96, 3), dtype=np.float32)
    dummy_skel = np.zeros((1, 16, 96, 96, 3), dtype=np.float32)
    _ = model.predict([dummy_raw, dummy_skel], verbose=0)
    print("✅ Keras model is ready.")

    print("⚙️ Warming up pose estimation model...")
    dummy_pose = np.zeros((96, 96, 3), dtype=np.uint8)
    _ = body_estimator(dummy_pose)
    print("✅ Pose model is ready.\n")


    cap = cv2.VideoCapture(args.video)
    if not cap.isOpened():
        print(f"[ERROR] Cannot Open Video: {args.video}")
        return

    fps = cap.get(cv2.CAP_PROP_FPS)
    print(f"[INFO] Video '{os.path.basename(args.video)}' opened with {fps:.2f} FPS.")

    reader_thread = threading.Thread(target=frame_reader, args=(cap, fps))
    processor_thread = threading.Thread(target=process_buffer, args=(args, cap, model, body_estimator))

    reader_thread.start()
    processor_thread.start()

    try:
        reader_thread.join()
        processor_thread.join()
    except KeyboardInterrupt:
        print("\n[STOP] process stopped by user.")
        stop_event.set()
    finally:
        if cap.isOpened():
            cap.release()

    print("\n✅ Inference Done.")


[INFO] Memory growth has been activated.


### run

In [5]:
from IPython.display import HTML
from base64 import b64encode

def show_video(video_path, video_width = 800):

  video_file = open(video_path, "r+b").read()

  video_url = f"data:video/mp4;base64,{b64encode(video_file).decode()}"
  return HTML(f"""<video width={video_width} controls><source src="{video_url}"></video>""")

show_video("videos/F_161_0_0_0_0.mp4")

In [6]:
args = argparse.Namespace(
    video="videos/F_161_0_0_0_0.mp4",
    model_path="model/model_best_val_loss.keras",
    frame_width=96,
    frame_height=96,
    sequence_length=49,
    final_frames=16,
    delay=5,
    motion_method="average",
    motion_threshold=1.0138,
    class_threshold=0.5713
)
main(args)

[INFO] Loading model weights from: model/model_best_val_loss.keras
[INFO] Model weights loaded successfully.
load /root/.cache/rtmlib/hub/checkpoints/rtmo-s_8xb32-600e_body7-640x640-dac2bf74_20231211.onnx with onnxruntime backend

⚙️ Warming up Keras model...
✅ Keras model is ready.
⚙️ Warming up pose estimation model...
✅ Pose model is ready.

[INFO] Video 'F_161_0_0_0_0.mp4' opened with 30.00 FPS.
[00:00:00-00:00:05] Prediction: Non-Violence (No Motion: 0.9496)
[00:00:05-00:00:10] Prediction: Non-Violence (0.0155)
[00:00:10-00:00:15] Prediction: Violence (0.7851)
[00:00:15-00:00:20] Prediction: Non-Violence (0.1691)
[INFO] Buffer empty

✅ Inference Done.
