In [3]:
!pip install soundfile

Collecting soundfile
  Downloading soundfile-0.13.1-py2.py3-none-win_amd64.whl.metadata (16 kB)
Downloading soundfile-0.13.1-py2.py3-none-win_amd64.whl (1.0 MB)
   ---------------------------------------- 0.0/1.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.0 MB ? eta -:--:--
   ---------- ----------------------------- 0.3/1.0 MB ? eta -:--:--
   -------------------- ------------------- 0.5/1.0 MB 932.9 kB/s eta 0:00:01
   -------------------- ------------------- 0.5/1.0 MB 932.9 kB/s eta 0:00:01
   -------------------- ------------------- 0.5/1.0 MB 932.9 kB/s eta 0:00:01
   ------------------------------ --------- 0.8/1.0 MB 729.2 kB/s eta 0:00:01
   ---------------------------------------- 1.0/1.0 MB 702.2 kB/s eta 0:00:00
Installing collected packages: soundfile
Successfully installed soundfile-0.13.1


In [7]:
!pip install vosk


Collecting vosk
  Downloading vosk-0.3.45-py3-none-win_amd64.whl.metadata (1.8 kB)
Collecting srt (from vosk)
  Downloading srt-3.5.3.tar.gz (28 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting websockets (from vosk)
  Downloading websockets-15.0.1-cp312-cp312-win_amd64.whl.metadata (7.0 kB)
Downloading vosk-0.3.45-py3-none-win_amd64.whl (14.0 MB)
   ---------------------------------------- 0.0/14.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/14.0 MB ? eta -:--:--
    --------------------------------------- 0.3/14.0 MB ? eta -:--:--
   - -------------------------------------- 0.5/14.0 MB 799.2 kB/s eta 0:00:17
   - -------------------------------------- 0.5/14.0 MB 799.2 kB/s eta 0:00:17
   -- ------------------------------------- 0.8/14.0 MB 684.4 kB/s eta 0:00:20
   -- ------------------------------------- 0.8/14.0 MB 684.4 kB/s eta 0:00:20
   -- ------------------------------------- 1.0/14.0 MB 

In [7]:
!pip install noisereduce librosa soundfile


Collecting noisereduce
  Downloading noisereduce-3.0.3-py3-none-any.whl.metadata (14 kB)
Collecting librosa
  Downloading librosa-0.11.0-py3-none-any.whl.metadata (8.7 kB)
Collecting audioread>=2.1.9 (from librosa)
  Downloading audioread-3.0.1-py3-none-any.whl.metadata (8.4 kB)
Collecting pooch>=1.1 (from librosa)
  Downloading pooch-1.8.2-py3-none-any.whl.metadata (10 kB)
Collecting soxr>=0.3.2 (from librosa)
  Downloading soxr-0.5.0.post1-cp312-abi3-win_amd64.whl.metadata (5.6 kB)
Downloading noisereduce-3.0.3-py3-none-any.whl (22 kB)
Downloading librosa-0.11.0-py3-none-any.whl (260 kB)
Downloading audioread-3.0.1-py3-none-any.whl (23 kB)
Downloading pooch-1.8.2-py3-none-any.whl (64 kB)
Downloading soxr-0.5.0.post1-cp312-abi3-win_amd64.whl (164 kB)
Installing collected packages: soxr, audioread, pooch, noisereduce, librosa
Successfully installed audioread-3.0.1 librosa-0.11.0 noisereduce-3.0.3 pooch-1.8.2 soxr-0.5.0.post1


In [1]:
import os
import cv2
import whisper
import numpy as np
import mediapipe as mp
import pandas as pd
from moviepy.editor import VideoFileClip

# Initialize models
model = whisper.load_model("tiny")  # Use "tiny" if you're on low hardware
mp_pose = mp.solutions.pose

# Keywords to detect
KEYWORDS = ['help', 'emergency', 'save me']

# Output CSV file
CSV_FILE = "alerts.csv"


def extract_audio(video_path, audio_path="temp_audio.wav"):
    clip = VideoFileClip(video_path)
    clip.audio.write_audiofile(audio_path, verbose=False, logger=None)
    return audio_path


def detect_voice_commands(audio_path):
    print("🎤 Detecting voice commands...")
    result = model.transcribe(audio_path, fp16=False)
    transcript = result["text"].lower()
    detected_keywords = [word for word in KEYWORDS if word in transcript]
    if detected_keywords:
        print("   -> Detected:", ", ".join(detected_keywords))
    else:
        print("   -> No keywords detected.")
    return detected_keywords, transcript


def detect_fall(video_path):
    print("🤸 Detecting falls (simulated)...")
    cap = cv2.VideoCapture(video_path)
    pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5)
    
    fall_detected = False
    timestamps = []
    prev_angle = None

    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        result = pose.process(frame_rgb)
        frame_count += 1

        if result.pose_landmarks:
            lm = result.pose_landmarks.landmark

            left_shoulder = lm[mp_pose.PoseLandmark.LEFT_SHOULDER]
            right_shoulder = lm[mp_pose.PoseLandmark.RIGHT_SHOULDER]
            left_hip = lm[mp_pose.PoseLandmark.LEFT_HIP]
            right_hip = lm[mp_pose.PoseLandmark.RIGHT_HIP]

            avg_shoulder_y = (left_shoulder.y + right_shoulder.y) / 2
            avg_hip_y = (left_hip.y + right_hip.y) / 2

            vertical_diff = avg_hip_y - avg_shoulder_y  # if shoulder drops low, may indicate fall

            # Angle-based fall detection (basic logic)
            if vertical_diff < -0.1:  # Shoulder lower than hip significantly
                timestamp = round(frame_count / fps, 2)
                print(f"   -> Fall detected at {timestamp} seconds")
                timestamps.append(timestamp)
                fall_detected = True

    cap.release()
    pose.close()
    return fall_detected, timestamps


def save_alerts_to_csv(voice_alerts, fall_alerts, transcript):
    data = []

    if voice_alerts:
        for keyword in voice_alerts:
            data.append({
                "Type": "Voice Command",
                "Keyword": keyword,
                "Time": "N/A"
            })
    else:
        data.append({
            "Type": "Voice Command",
            "Keyword": "None",
            "Time": "N/A"
        })

    if fall_alerts:
        for t in fall_alerts:
            data.append({
                "Type": "Fall Detected",
                "Keyword": "Fall",
                "Time": f"{t:.2f} seconds"
            })
    else:
        data.append({
            "Type": "Fall Detected",
            "Keyword": "None",
            "Time": "N/A"
        })

    df = pd.DataFrame(data)
    df.to_csv(CSV_FILE, index=False)
    print(f"✅ Detection complete. Results saved to '{CSV_FILE}'")

def main(video_path):
    print("🎥 Processing video:", video_path)

    audio_path = extract_audio(video_path)
    voice_alerts, transcript = detect_voice_commands(audio_path)
    fall_detected, fall_times = detect_fall(video_path)
    save_alerts_to_csv(voice_alerts, fall_times, transcript)

    # Cleanup
    if os.path.exists(audio_path):
        os.remove(audio_path)


# 🟢 Example usage (replace path as needed)
if __name__ == "__main__":
    video_file = r"C:\Users\hp\Downloads\Kids Getting Hurt Vs. Adults #Shorts.mp4" # ← Replace with your video path
    main(video_file)

🎥 Processing video: C:\Users\hp\Downloads\Kids Getting Hurt Vs. Adults #Shorts.mp4
🎤 Detecting voice commands...
   -> No keywords detected.
🤸 Detecting falls (simulated)...
   -> Fall detected at 5.71 seconds
   -> Fall detected at 5.74 seconds
   -> Fall detected at 5.77 seconds
   -> Fall detected at 5.81 seconds
   -> Fall detected at 5.84 seconds
   -> Fall detected at 5.87 seconds
   -> Fall detected at 5.91 seconds
   -> Fall detected at 5.94 seconds
   -> Fall detected at 5.97 seconds
   -> Fall detected at 6.01 seconds
   -> Fall detected at 6.04 seconds
   -> Fall detected at 6.07 seconds
   -> Fall detected at 6.11 seconds
   -> Fall detected at 6.14 seconds
   -> Fall detected at 6.17 seconds
   -> Fall detected at 6.21 seconds
   -> Fall detected at 6.24 seconds
   -> Fall detected at 6.27 seconds
   -> Fall detected at 6.31 seconds
   -> Fall detected at 6.34 seconds
   -> Fall detected at 6.37 seconds
   -> Fall detected at 6.41 seconds
   -> Fall detected at 6.44 second