In [None]:
# Install all necessary packages
!pip install ultralytics opencv-python mediapipe numpy torch transformers Pillow timm einops

# For CUDA support with PyTorch (recommended for GPU)
# Visit https://pytorch.org/ to get the correct command for your system

Collecting ultralytics
  Downloading ultralytics-8.3.185-py3-none-any.whl.metadata (37 kB)
Collecting mediapipe
  Downloading mediapipe-0.10.21-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.16-py3-none-any.whl.metadata (14 kB)
INFO: pip is looking at multiple versions of mediapipe to determine which version is compatible with other requirements. This could take a while.
Collecting mediapipe
  Downloading mediapipe-0.10.20-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
  Downloading mediapipe-0.10.18-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
  Downloading mediapipe-0.10.15-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
  Downloading mediapipe-0.10.14-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting protobuf<5,>=4.25.3 (from mediapipe)
  Downloading protobuf-4.25.8-cp37-abi3-many

In [None]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.185-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.16-py3-none-any.whl.metadata (14 kB)
Downloading ultralytics-8.3.185-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m52.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.16-py3-none-any.whl (28 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.185 ultralytics-thop-2.0.16


In [1]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.185-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.16-py3-none-any.whl.metadata (14 kB)
Downloading ultralytics-8.3.185-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m26.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.16-py3-none-any.whl (28 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.185 ultralytics-thop-2.0.16


In [None]:
import time, json, os, cv2
from huggingface_hub import hf_hub_download
from ultralytics import YOLO
from google.colab.patches import cv2_imshow
# --------------------
# CONFIG
# --------------------
INPUT = "/content/gun_holding (online-video-cutter.com).mp4"        # change to your .mp4
OUTPUT = None                            # None = auto "<input>_annotated.mp4"
JSONL  = None                            # None = auto "<input>_detections.jsonl"
CONF_THRESHOLD = 0.35
IOU_THRESHOLD  = 0.45
SHOW = True                              # Show live preview window (may not work in Colab, works locally)

# Which keywords count as weapons
WEAPON_KEYWORDS = {"gun", "knife", "pistol", "rifle", "revolver", "bomb", "time bomb"}

# --------------------
# FUNCTIONS
# --------------------
def download_weights():
    # Pretrained Suspicious Activity YOLOv11 (includes Gun/Knife etc.)
    path = hf_hub_download(
        repo_id="Accurateinfosolution/Suspicious_activity_detection_Yolov11_Custom",
        filename="Suspicious_Activities_nano.pt"
    )
    return path

def is_weapon_class(name: str) -> bool:
    n = name.strip().lower()
    return any(k in n for k in WEAPON_KEYWORDS)

# --------------------
# MAIN PIPELINE
# --------------------
def run_detection():
    weights_path = download_weights()
    model = YOLO(weights_path)
    model.fuse()

    cap = cv2.VideoCapture(INPUT)
    if not cap.isOpened():
        raise RuntimeError(f"Cannot open video: {INPUT}")

    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS) or 30.0

    out_path = OUTPUT or os.path.splitext(INPUT)[0] + "_annotated.mp4"
    jsonl_path = JSONL or os.path.splitext(INPUT)[0] + "_detections.jsonl"

    writer = cv2.VideoWriter(
        out_path,
        cv2.VideoWriter_fourcc(*"mp4v"),
        fps,
        (w, h)
    )
    if not writer.isOpened():
        raise RuntimeError(f"Cannot open VideoWriter for: {out_path}")

    class_names = model.model.names if hasattr(model.model, "names") else {}
    weapon_class_ids = {cid for cid, name in class_names.items() if is_weapon_class(name)}

    print(f"[INFO] Model classes: {class_names}")
    print(f"[INFO] Treating these as weapons: {sorted(weapon_class_ids)}")

    with open(jsonl_path, "w", encoding="utf-8") as jf:
        frame_idx = 0
        t0 = time.time()
        while True:
            ok, frame = cap.read()
            if not ok:
                break

            # Inference
            results = model.predict(
                source=frame,
                conf=CONF_THRESHOLD,
                iou=IOU_THRESHOLD,
                verbose=False
            )
            r = results[0]

            # Draw & log
            if r.boxes is not None and len(r.boxes) > 0:
                xyxy = r.boxes.xyxy.cpu().numpy()
                confs = r.boxes.conf.cpu().numpy()
                clss  = r.boxes.cls.cpu().numpy().astype(int)

                for (x1, y1, x2, y2), c, k in zip(xyxy, confs, clss):
                    name = class_names.get(int(k), str(k))
                    x1, y1, x2, y2 = map(int, (x1, y1, x2, y2))
                    color = (60, 220, 60) if (k in weapon_class_ids) else (120, 120, 120)

                    cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                    label = f"{name} {c:.2f}"
                    cv2.putText(frame, label, (x1, max(20, y1-8)),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0), 3, cv2.LINE_AA)
                    cv2.putText(frame, label, (x1, max(20, y1-8)),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,255), 1, cv2.LINE_AA)

                    # Log weapon detections
                    if k in weapon_class_ids:
                        # Save the frame for later VLM analysis
                        # Ensure a directory for frames exists
                        frames_dir = "harmful_frames"
                        if not os.path.exists(frames_dir):
                            os.makedirs(frames_dir)

                        frame_path = os.path.join(frames_dir, f"frame_{frame_idx}_{name}.png")
                        cv2.imwrite(frame_path, frame)
                        print(f"Saved harmful frame for VLM analysis: {frame_path}")

                        # Your original JSON logging code
                        jf.write(json.dumps({
                            "frame_index": frame_idx,
                            "timestamp_sec": frame_idx / fps,
                            "bbox_xyxy": [x1, y1, x2, y2],
                            "class_id": int(k),
                            "class_name": name,
                            "confidence": float(c),
                            "vlm_analysis_pending": True # Mark this entry for future VLM analysis
                        }) + "\n")

            # FPS HUD
            elapsed = time.time() - t0
            fps_now = (frame_idx+1) / elapsed if elapsed > 0 else 0.0
            cv2.putText(frame, f"FPS: {fps_now:.1f}", (10, 25),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,0), 3, cv2.LINE_AA)
            cv2.putText(frame, f"FPS: {fps_now:.1f}", (10, 25),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255,255,255), 1, cv2.LINE_AA)

            writer.write(frame)
            if SHOW:
                # NOTE: in Colab, cv2.imshow won't work; locally it will.
                cv2_imshow(frame)
                if cv2.waitKey(1) & 0xFF == 27:
                    break

            frame_idx += 1

    cap.release()
    writer.release()
    cv2.destroyAllWindows()
    print(f"[DONE] Annotated video saved: {out_path}")
    print(f"[DONE] Detections JSONL saved: {jsonl_path}")

# --------------------
# RUN
# --------------------
run_detection()


In [3]:
! pip install -U google-generativeai



In [13]:
frames_dir="/content/Test_frames"

In [18]:
import os
import json
from PIL import Image
import google.generativeai as genai
from google.colab import userdata # Import the userdata module

def analyze_harmful_frames_with_vlm(frames_dir: str, output_jsonl: str):
    """
    Analyzes frames from a directory using the Gemini VLM and saves the descriptions.
    """
    try:
        # Securely get the API key from Colab's user data
        api_key = userdata.get('GOOGLE_API_KEY')
        if not api_key:
            raise ValueError("API key not found in Colab Secrets. Please add 'GOOGLE_API_KEY'.")

        genai.configure(api_key=api_key)
        model = genai.GenerativeModel('gemini-1.5-flash')
    except Exception as e:
        print(f"Error initializing Gemini model: {e}")
        return

    analysis_results = []

    image_files = sorted([f for f in os.listdir(frames_dir) if f.endswith('.png')])
    if not image_files:
        print(f"No harmful frames found in {frames_dir}.")
        return

    for image_file in image_files:
        image_path = os.path.join(frames_dir, image_file)
        print(f"Analyzing {image_path} with Gemini VLM...")

        try:
            image = Image.open(image_path)

            prompt = (
               """Analyze the provided image and return your response strictly in JSON format.
                  The JSON must contain the following fields:
                  {
                    'frame_id': '<unique identifier for the frame>',
                    'scene_description': '<brief description of the overall scene>',
                    'people': [
                      {
                        'id': '<person_id>',
                        'description': '<appearance and position in scene>',
                        'actions': '<actions the person is performing>',
                        'weapons': [
                          {
                            'type': '<weapon type if present, else null>',
                            'position': '<where the weapon is held or located>'
                          }
                        ]
                      }
                    ],
                    'objects': [
                      {
                        'type': '<object type>',
                        'description': '<appearance and location in the scene>'
                      }
                    ],
                    'harmful_activity': '<true/false>',
                    'activity_description': '<if harmful activity is true, describe what it is>'
                  }
                  Ensure the output is valid JSON only, with no extra commentary."""

            )

            response = model.generate_content([prompt, image])
            vlm_response = response.text

            result = {
                "image_filename": image_file,
                "timestamp_sec": float(image_file.split('_')[1]) / 30.0,
                "vlm_analysis": vlm_response
            }
            analysis_results.append(result)

        except Exception as e:
            print(f"Error processing {image_path}: {e}")
            continue

    with open(output_jsonl, 'w') as f:
        for res in analysis_results:
            f.write(json.dumps(res) + '\n')

    print(f"VLM analysis complete. Results saved to {output_jsonl}")

In [19]:
analyze_harmful_frames_with_vlm(frames_dir,"/content/gun_holding.jsonl")

Analyzing /content/Test_frames/frame_125_Terrorist_With_Time_Bomb.png with Gemini VLM...
Analyzing /content/Test_frames/frame_127_Terrorist_With_Time_Bomb.png with Gemini VLM...
Analyzing /content/Test_frames/frame_133_Terrorist_With_Time_Bomb.png with Gemini VLM...
VLM analysis complete. Results saved to /content/gun_holding.jsonl
