In [None]:
!pip install SpeechRecognition
!pip install pyaudio
!pip install pipwin
!pipwin install pyaudio
!pip install pocketsphinx
!pip install pydub
!pip install librosa
!pip install soundfile
!pip install tensorflow
!pip install numpy
!pip install pandas
!pip install matplotlib
!pip install sklearn
!pip install tensorflow-lite

Collecting pyaudio
  Using cached PyAudio-0.2.14.tar.gz (47 kB)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: pyaudio
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mBuilding wheel for pyaudio [0m[1;32m([0m[32mpyproject.toml[0m[1;32m)[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Building wheel for pyaudio (pyproject.toml) ... [?25l[?25herror
[31m  ERROR: Failed building wheel for pyaudio[0m[31m
[0mFailed to build pyaudio
[31mERROR: ERROR: Failed to build installable wheels for some pyproject.toml based projects (pyaudio)[0m[31m
  warn("Found a non Windows system. Package installation might not work.")
Building cache. Han

In [None]:
!unzip archive.zip
!unzip RakshaKavach_Dataset.zip


unzip:  cannot find or open archive.zip, archive.zip.zip or archive.zip.ZIP.
Archive:  RakshaKavach_Dataset.zip
   creating: RakshaKavach_Dataset/
   creating: RakshaKavach_Dataset/normal_sounds/
   creating: RakshaKavach_Dataset/threat_sounds/
  inflating: RakshaKavach_Dataset/threat_sounds/03-01-01-01-01-01-10.wav  
  inflating: RakshaKavach_Dataset/threat_sounds/03-01-01-01-01-01-24.wav  
  inflating: RakshaKavach_Dataset/threat_sounds/03-01-01-01-01-02-10.wav  
  inflating: RakshaKavach_Dataset/threat_sounds/03-01-01-01-01-02-24.wav  
  inflating: RakshaKavach_Dataset/threat_sounds/03-01-01-01-02-01-10.wav  
  inflating: RakshaKavach_Dataset/threat_sounds/03-01-01-01-02-01-24.wav  
  inflating: RakshaKavach_Dataset/threat_sounds/03-01-01-01-02-02-10.wav  
  inflating: RakshaKavach_Dataset/threat_sounds/03-01-01-01-02-02-24.wav  
  inflating: RakshaKavach_Dataset/threat_sounds/03-01-02-01-01-01-10.wav  
  inflating: RakshaKavach_Dataset/threat_sounds/03-01-02-01-01-01-24.wav  
  inf

In [None]:
# Raksha Kavach: Voice Analysis Prototype (Updated & Corrected)
# This script analyzes a given audio file for signs of distress using a multi-layered AI approach.
# It is designed as a proof-of-concept and should not be used as a real-world safety device without extensive validation.

import os
import numpy as np
import tensorflow as tf
import librosa
import soundfile as sf
import speech_recognition as sr
from pydub import AudioSegment
import csv

# --- CONFIGURATION ---
# Paths to the pre-trained models and their label maps.
# Ensure you have downloaded these and placed them in the correct location.
YAMNET_MODEL_PATH = "/content/1.tflite"
YAMNET_CLASSES_PATH = "/content/yamnet_class_map.csv"

# For SER, a pre-trained TFLite model is assumed.
SER_MODEL_PATH = "/content/SER_quant.tflite"
SER_CLASSES_PATH = "models/ser_labels.txt" # e.g., a text file with "anger", "fear", "happy", "neutral" on separate lines.

# --- LAYER 1: SOUND EVENT DETECTION (YAMNet) ---

class SoundEventDetector:
    """
    Analyzes audio for general sound events using the YAMNet model.
    Focuses on detecting universal sounds of distress like screams or shouts.
    """
    def __init__(self, model_path, classes_path):
        if not os.path.exists(model_path) or not os.path.exists(classes_path):
            raise FileNotFoundError("YAMNet model or class map not found. Please download them.")

        self.interpreter = tf.lite.Interpreter(model_path)
        self.interpreter.allocate_tensors()
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()

        self.class_names = []
        with open(classes_path, 'r') as f:
            reader = csv.reader(f)
            next(reader) # Skip header
            for row in reader:
                self.class_names.append(row[2])

        # Threat sounds are now keywords to search for within the detected labels.
        self.threat_sounds = {
            "scream": 8, "shout": 7, "yell": 7, "crying": 5, "sobbing": 5,
            "glass": 6, "gunshot": 10
        }

    def analyze(self, audio_data, sample_rate):
        """
        Analyzes the audio data and returns detected threat sounds.
        """
        if sample_rate != 16000:
            audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)

        input_len = self.input_details[0]['shape'][-1]

        if audio_data.shape[0] < input_len:
            audio_data = np.pad(audio_data, (0, input_len - audio_data.shape[0]))
        else:
            audio_data = audio_data[:input_len]

        input_tensor = np.array(audio_data, dtype=np.float32)

        self.interpreter.set_tensor(self.input_details[0]['index'], input_tensor)
        self.interpreter.invoke()
        scores = self.interpreter.get_tensor(self.output_details[0]['index'])[0]

        print("\n[Sound Event Detector Top 5 Detections]")
        top_5_indices = np.argsort(scores)[-5:][::-1]
        for i in top_5_indices:
            print(f"- {self.class_names[i]}: {scores[i]:.4f}")

        detected_threats = []
        high_confidence_indices = np.where(scores > 0.1)[0]

        for index in high_confidence_indices:
            detected_sound_label = self.class_names[index].lower()
            confidence = scores[index]

            for threat_keyword, score_weight in self.threat_sounds.items():
                if threat_keyword in detected_sound_label:
                    detected_threats.append({
                        "type": "Sound Event",
                        "detail": self.class_names[index],
                        "confidence": float(confidence),
                        "score_impact": int(score_weight * confidence)
                    })
                    break

        return detected_threats

# --- LAYER 2: SPEECH EMOTION RECOGNITION (Placeholder) ---

class EmotionDetector:
    """
    Analyzes detected speech for emotional content.
    NOTE: This is a placeholder. Real implementation requires a specific SER model.
    """
    def __init__(self, model_path, classes_path):
        self.model_available = os.path.exists(model_path)
        if not self.model_available:
            print("\n[INFO] Speech Emotion Recognition model not found. This analysis layer will be skipped.")
        self.threat_emotions = {"fear": 7, "angry": 5}

    def analyze(self, audio_data, sample_rate):
        if not self.model_available:
            return []
        return []

# --- LAYER 3: KEYWORD SPOTTING (Online + Offline Support) ---

class KeywordSpotter:
    """
    Transcribes audio to text and searches for specific distress keywords.
    NOTE: For offline mode, you must install PocketSphinx: pip install pocketsphinx
    """
    def __init__(self, use_offline=False):
        self.recognizer = sr.Recognizer()
        self.use_offline = use_offline
        self.distress_keywords = {
            "help": 8, "stop": 7, "police": 8, "save": 8,
            "kaapaadandi": 9, "aapu": 7, "bachao": 8
        }

    def analyze(self, audio_file_path):
        detected_threats = []
        try:
            with sr.AudioFile(audio_file_path) as source:
                audio_data = self.recognizer.record(source)
                text = ""
                try:
                    if self.use_offline:
                        print("\n[Transcription Mode: Offline - PocketSphinx]")
                        text = self.recognizer.recognize_sphinx(audio_data).lower()
                    else:
                        print("\n[Transcription Mode: Online - Google API]")
                        text = self.recognizer.recognize_google(audio_data).lower()
                except sr.RequestError:
                    print("\n[API Fallback]: Online API failed, trying offline engine...")
                    try:
                        text = self.recognizer.recognize_sphinx(audio_data).lower()
                    except sr.UnknownValueError:
                         print("\n[Transcription Error]: Offline engine could not understand audio.")
                         return []
                except sr.UnknownValueError:
                    print("\n[Transcription Error]: Could not understand audio.")
                    return []

                print(f"[Transcription Result]: \"{text}\"")

                for keyword, score_weight in self.distress_keywords.items():
                    if keyword in text:
                        detected_threats.append({
                            "type": "Keyword",
                            "detail": f"'{keyword}' detected",
                            "confidence": 1.0,
                            "score_impact": score_weight
                        })
        except Exception as e:
            print(f"[Keyword Spotter Error]: An unexpected error occurred: {e}")

        return detected_threats

# --- MAIN FUSION ENGINE ---

def run_voice_analysis(audio_file_path, use_offline_transcription=False):
    """
    Main function to run the entire analysis pipeline on a given audio file.
    """
    if not os.path.exists(audio_file_path):
        print(f"Error: Audio file not found at '{audio_file_path}'")
        return

    print("\n--- Raksha Kavach: Voice Analysis Report ---")
    print(f"File: {audio_file_path}")
    print("Initializing analysis engines...")

    temp_wav_path = "/content/temp_analysis_audio.wav"
    try:
        audio = AudioSegment.from_file(audio_file_path)
        audio.set_channels(1).export(temp_wav_path, format="wav")
        waveform, sample_rate = sf.read(temp_wav_path)
    except Exception as e:
        print(f"Error loading or converting audio file: {e}")
        if os.path.exists(temp_wav_path):
            os.remove(temp_wav_path)
        return

    sound_detector = SoundEventDetector(YAMNET_MODEL_PATH, YAMNET_CLASSES_PATH)
    emotion_detector = EmotionDetector(SER_MODEL_PATH, SER_CLASSES_PATH)
    keyword_spotter = KeywordSpotter(use_offline=use_offline_transcription)

    print("\n[ANALYSIS STARTED]")
    all_evidence = []
    all_evidence.extend(sound_detector.analyze(waveform, sample_rate))
    all_evidence.extend(emotion_detector.analyze(waveform, sample_rate))
    all_evidence.extend(keyword_spotter.analyze(temp_wav_path))

    print("\n[ANALYSIS COMPLETE]")
    total_threat_score = 0

    if not all_evidence:
        print("\nNo specific threats detected.")
    else:
        print("\nEVIDENCE DETECTED:")
        unique_evidence = [dict(t) for t in {tuple(d.items()) for d in all_evidence}]
        for evidence in unique_evidence:
            print(f"- [{evidence['type']:<12}]: {evidence['detail']} (Impact: +{evidence['score_impact']})")
            total_threat_score += evidence['score_impact']

    # --- IMPROVEMENT: Three-tiered assessment logic ---
    sos_threshold = 9
    pre_alert_threshold = 5
    print("\n" + "="*40)
    print(f"FINAL THREAT SCORE: {total_threat_score}")
    print(f"(Pre-Alert Threshold: {pre_alert_threshold}, SOS Threshold: {sos_threshold})")

    if total_threat_score >= sos_threshold:
        print("ASSESSMENT: HIGH CONFIDENCE THREAT DETECTED.")
        print("ACTION:     Initiating IMMEDIATE SOS Protocol.")
    elif total_threat_score >= pre_alert_threshold:
        print("ASSESSMENT: POTENTIAL THREAT DETECTED.")
        print("ACTION:     Entering PRE-ALERT mode. Awaiting user confirmation or timeout.")
    else:
        print("ASSESSMENT: No immediate voice threats identified.")
    print("="*40)

    if os.path.exists(temp_wav_path):
        os.remove(temp_wav_path)

# --- SCRIPT ENTRY POINT ---

if __name__ == '__main__':
    print("DISCLAIMER: This is a prototype for demonstration purposes.")
    print("It is NOT a life-saving device. Accuracy depends heavily on audio quality and model performance.\n")

    sample_audio = "/content/woman-screaming-sfx-screaming-sound-effect-320169.mp3"

    run_voice_analysis(sample_audio, use_offline_transcription=False)


DISCLAIMER: This is a prototype for demonstration purposes.
It is NOT a life-saving device. Accuracy depends heavily on audio quality and model performance.


--- Raksha Kavach: Voice Analysis Report ---
File: /content/woman-screaming-sfx-screaming-sound-effect-320169.mp3
Initializing analysis engines...

[ANALYSIS STARTED]

[Sound Event Detector Top 5 Detections]
- Screaming: 0.8516
- Speech: 0.0430
- Inside, small room: 0.0430
- Shout: 0.0195
- Yell: 0.0156

[Transcription Mode: Online - Google API]

[Transcription Error]: Could not understand audio.

[ANALYSIS COMPLETE]

EVIDENCE DETECTED:
- [Sound Event ]: Screaming (Impact: +6)

FINAL THREAT SCORE: 6
(Pre-Alert Threshold: 5, SOS Threshold: 9)
ASSESSMENT: POTENTIAL THREAT DETECTED.
ACTION:     Entering PRE-ALERT mode. Awaiting user confirmation or timeout.


In [None]:
# Raksha Kavach: Custom AI Model Training Script
# This script trains a custom audio classification model to distinguish between
# "threat" and "normal" sounds, and exports it as a .tflite file.

import os
import numpy as np
import tensorflow as tf
import librosa
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
from tensorflow.keras.utils import to_categorical
import shutil

# --- 1. CONFIGURATION & SETUP ---

# Define paths. You will need to create these folders and place your audio files inside.
DATASET_PATH = "/content/RakshaKavach_Dataset"
THREAT_SOUNDS_PATH = os.path.join(DATASET_PATH, "threat_sounds")
NORMAL_SOUNDS_PATH = os.path.join(DATASET_PATH, "normal_sounds")
MODEL_SAVE_PATH = "/content/raksha_kavach_model.h5"
TFLITE_MODEL_SAVE_PATH = "/content/raksha_kavach_model.tflite"

# Audio processing parameters
SAMPLE_RATE = 22050
DURATION = 3  # seconds
N_MELS = 128  # Number of Mel bands
N_FFT = 2048
HOP_LENGTH = 512

def setup_directory_structure():
    """Creates the necessary folders for the training data."""
    print("Setting up directory structure...")
    if os.path.exists(DATASET_PATH):
        shutil.rmtree(DATASET_PATH) # Clean up previous runs
    os.makedirs(THREAT_SOUNDS_PATH)
    os.makedirs(NORMAL_SOUNDS_PATH)
    print(f"Please upload your 'threat' audio files to: {THREAT_SOUNDS_PATH}")
    print(f"Please upload your 'normal' audio files to: {NORMAL_SOUNDS_PATH}")
    print("Once uploaded, run the rest of the cells.")

# --- 2. FEATURE EXTRACTION (Audio to Image) ---

def audio_to_mel_spectrogram(file_path):
    """
    Loads an audio file and converts it into a Mel Spectrogram.
    A spectrogram is a visual representation of sound, which a CNN can analyze like an image.
    """
    try:
        y, sr = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)

        # Pad or truncate to a fixed length
        target_length = DURATION * sr
        if len(y) < target_length:
            y = np.pad(y, (0, target_length - len(y)))
        else:
            y = y[:target_length]

        mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=N_FFT, hop_length=HOP_LENGTH, n_mels=N_MELS)
        log_mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)

        return log_mel_spectrogram
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

# --- 3. DATA LOADING & PREPARATION ---

def load_data(data_path):
    """
    Loads all audio files from the subdirectories, extracts features, and assigns labels.
    """
    X, y = [], []
    labels_map = {"normal_sounds": 0, "threat_sounds": 1}

    for label, folder_name in enumerate(labels_map):
        folder_path = os.path.join(data_path, folder_name)
        print(f"\nLoading files from: {folder_path}")

        file_count = 0
        for filename in os.listdir(folder_path):
            if filename.endswith(('.wav', '.mp3', '.ogg')):
                file_path = os.path.join(folder_path, filename)
                spectrogram = audio_to_mel_spectrogram(file_path)

                if spectrogram is not None:
                    X.append(spectrogram)
                    y.append(labels_map[folder_name])
                    file_count += 1
        print(f"Loaded {file_count} files.")

    if not X:
        print("\nERROR: No audio files were loaded. Please check your dataset paths and file formats.")
        return None, None

    return np.array(X), np.array(y)

# --- 4. MODEL ARCHITECTURE (The AI Brain) ---

def build_model(input_shape):
    """
    Builds a Convolutional Neural Network (CNN) designed for classifying spectrograms.
    """
    model = Sequential([
        Input(shape=input_shape),

        Conv2D(32, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Dropout(0.25),

        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Dropout(0.25),

        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),

        Dense(2, activation='softmax') # 2 outputs: normal (0) and threat (1)
    ])

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# --- 5. THE COMPLETE TRAINING PIPELINE ---

def train_model():
    """
    Runs the entire training process from data loading to model saving.
    """
    # Load and preprocess the data
    X, y = load_data(DATASET_PATH)
    if X is None:
        return

    # Add a channel dimension for the CNN (like a grayscale image)
    X = X[..., np.newaxis]
    y = to_categorical(y, num_classes=2)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

    print(f"\nTraining data shape: {X_train.shape}")
    print(f"Test data shape: {X_test.shape}")

    # Build the model
    model = build_model(X_train.shape[1:])
    model.summary()

    # Train the model
    print("\n--- Starting Model Training ---")
    history = model.fit(X_train, y_train,
                        epochs=50,
                        batch_size=32,
                        validation_data=(X_test, y_test),
                        callbacks=[tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)])

    print("\n--- Training Complete ---")

    # Evaluate the model
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f"\nFinal Test Accuracy: {accuracy*100:.2f}%")

    # Save the trained Keras model
    model.save(MODEL_SAVE_PATH)
    print(f"\nKeras model saved to: {MODEL_SAVE_PATH}")

    # Convert and save the TensorFlow Lite model
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    tflite_model = converter.convert()

    with open(TFLITE_MODEL_SAVE_PATH, 'wb') as f:
        f.write(tflite_model)
    print(f"TFLite model saved to: {TFLITE_MODEL_SAVE_PATH}")
    print("\nThis .tflite file is the 'engine' you can now use in your analysis application.")

# --- SCRIPT EXECUTION ---

if __name__ == '__main__':
    setup_directory_structure()
    # train_model()


Setting up directory structure...
Please upload your 'threat' audio files to: /content/RakshaKavach_Dataset/threat_sounds
Please upload your 'normal' audio files to: /content/RakshaKavach_Dataset/normal_sounds
Once uploaded, run the rest of the cells.
