In [None]:
pip install opensmile

In [None]:
import os
import pandas as pd
import re
from tqdm import tqdm
import numpy as np
import joblib
import tensorflow as tf
import keras
import opensmile

In [None]:
# initialize OpenSMILE
smile_lld = opensmile.Smile(
    feature_set=opensmile.FeatureSet.ComParE_2016,
    feature_level=opensmile.FeatureLevel.LowLevelDescriptors
)
smile_func = opensmile.Smile(
    feature_set=opensmile.FeatureSet.eGeMAPSv02,
    feature_level=opensmile.FeatureLevel.Functionals
)

def compute_lld_stats(X_lld):
    """Compute global statistical features (mean, std, max, min) for LLDs"""
    lld_values = X_lld.values
    stats = np.hstack([
        np.mean(lld_values, axis=0).reshape(1, -1),
        np.std(lld_values, axis=0).reshape(1, -1),
        np.max(lld_values, axis=0).reshape(1, -1),
        np.min(lld_values, axis=0).reshape(1, -1)
    ])
    return stats

def process_audio_file(filepath):
    """Extract and combine acoustic features from a single audio file"""
    try:
        features = {}

        # 1. Extract Low-Level Descriptors (LLDs)
        X_lld = smile_lld.process_file(filepath)
        lld_stats = compute_lld_stats(X_lld)

        # 2. Extract Functionals (eGeMAPS)
        X_func = smile_func.process_file(filepath)

        # 3. Combine all features
        acoustic_features = np.hstack([lld_stats, X_func.values])

        # 4. Generate feature names
        lld_feature_names = [f"{col}_{stat}" for col in X_lld.columns for stat in ['mean', 'std', 'max', 'min']]
        func_feature_names = X_func.columns.tolist()
        all_feature_names = lld_feature_names + func_feature_names

        # 5. Map features to names
        for name, value in zip(all_feature_names, acoustic_features[0]):
            features[name] = value

        return features

    except Exception as e:
        print(f"Error processing audio: {str(e)}")
        return None

In [None]:
def compute_lld_stats(X_lld):
    """Compute global statistical features (mean, std, max, min) for LLDs"""
    lld_values = X_lld.values
    stats = np.hstack([
        np.mean(lld_values, axis=0).reshape(1, -1),
        np.std(lld_values, axis=0).reshape(1, -1),
        np.max(lld_values, axis=0).reshape(1, -1),
        np.min(lld_values, axis=0).reshape(1, -1)
    ])
    return stats

In [None]:
def process_audio_file(filepath):
    """Extract and combine acoustic features from a single audio file"""
    try:
        features = {}

        # 1. Extract Low-Level Descriptors (LLDs)
        X_lld = smile_lld.process_file(filepath)
        lld_stats = compute_lld_stats(X_lld)

        # 2. Extract Functionals (eGeMAPS)
        X_func = smile_func.process_file(filepath)

        # 3. Combine all features
        acoustic_features = np.hstack([lld_stats, X_func.values])

        # 4. Generate feature names
        lld_feature_names = [f"{col}_{stat}" for col in X_lld.columns for stat in ['mean', 'std', 'max', 'min']]
        func_feature_names = X_func.columns.tolist()
        all_feature_names = lld_feature_names + func_feature_names

        # 5. Map features to names
        for name, value in zip(all_feature_names, acoustic_features[0]):
            features[name] = value

        return features

    except Exception as e:
        print(f"Error processing audio: {str(e)}")
        return None


In [None]:
# ========= Configuration =========
SCALER_PATH = "/content/drive/My Drive/BECU Capstone_Duress/Model/scaler.pkl"
FEATURES_PATH = '/content/drive/My Drive/BECU Capstone_Duress/Model/selected_idx.pkl'
LSTM_MODEL_PATH = "/content/drive/My Drive/BECU Capstone_Duress/Model/lstm_model.keras"
RF_MODEL_PATH = "/content/drive/My Drive/BECU Capstone_Duress/Model/rf_model.pkl"

In [None]:
import glob

# Data to test(path needed to be change)
input_files = glob.glob('/content/drive/BECU Capstone_Duress/Data/911call/*.wav')

In [None]:
all_features = []
file_names = []

for input_file in input_files:
    print(f"Processing: {input_file}")
    features = process_audio_file(input_file)
    if features:
        all_features.append(features)
        file_names.append(input_file.split('/')[-1])

# Combine and save
if all_features:
    df = pd.DataFrame(all_features)
else:
    print("No features extracted.")

In [None]:
# ========= Data Loading =========
print("[1/5] Loading data...")
new_data = df

[1/5] Loading data...


In [None]:
# ========= Preprocessing =========
print("[2/5] Loading preprocessing objects...")
scaler = joblib.load(SCALER_PATH)
selected_idx = joblib.load(FEATURES_PATH)

print("[3/5] Preprocessing data...")
data_scaled = scaler.transform(new_data)
data_sel = data_scaled[:, selected_idx]

# ========= Model Loading =========
print("[4/5] Loading models...")
keras.config.enable_unsafe_deserialization()
best_lstm_model = tf.keras.models.load_model(
    LSTM_MODEL_PATH,
    custom_objects={'tf': tf}
)
best_rf_model = joblib.load(RF_MODEL_PATH)

# ========= Prediction =========
print("[5/5] Making predictions...")
# Prepare LSTM input
new_data_lstm = data_sel.reshape((-1, 1, data_sel.shape[1]))

# Get model probabilities
lstm_proba = best_lstm_model.predict(new_data_lstm).flatten()
rf_proba = best_rf_model.predict_proba(data_sel)[:, 1]

# Ensemble predictions
final_proba = (lstm_proba + rf_proba) / 2  # Simple average ensemble
final_pred = (final_proba > 0.7).astype(int)  # Threshold at 0.7 (reduce false positive)

# ========= Results =========
print("\nPrediction Results:")
result_df = pd.DataFrame({
    "Filename": file_names,
    "Prediction_Code": final_pred,
    "Prediction_Label": pd.Series(final_pred).map({1: "duress", 0: "no duress"}),"pro": final_proba
})

print(result_df)

[2/5] Loading preprocessing objects...
[3/5] Preprocessing data...
[4/5] Loading models...
[5/5] Making predictions...
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 122ms/step

Prediction Results:
          Filename  Prediction_Code Prediction_Label       pro
0   call_111_0.wav                0        no duress  0.589801
1   call_107_0.wav                0        no duress  0.357011
2   call_108_0.wav                0        no duress  0.545051
3   call_109_0.wav                0        no duress  0.310220
4   call_115_0.wav                0        no duress  0.606012
..             ...              ...              ...       ...
95   call_12_0.wav                0        no duress  0.545815
96    call_2_0.wav                0        no duress  0.674830
97   call_10_0.wav                0        no duress  0.586833
98    call_9_0.wav                0        no duress  0.676746
99   call_13_0.wav                0        no duress  0.441214

[100 rows x 4 columns]
