In [None]:
# renamed - original
# 2sec.h5 - audio_deepfake_detector.h5
# 2sec_scaler - audio_deepfake_scaler.pkl
# 10sec - deepfake_audio_detector.pkl
# 10sec_scaler - feature_scaler.pkl
# mfcc - audio_classification_model.h5
# mfcc_scaler - scaler.pkl

import os
import numpy as np
import joblib
import librosa
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import StandardScaler

# ---------- MODEL 1 SETUP ----------
MODEL1_PATH = "../models/10sec.pkl"
SCALER1_PATH = "../models/10sec_scaler.pkl"
SAMPLE_RATE_1 = 22050

def extract_features_model1(file_path):
    try:
        y, sr = librosa.load(file_path, sr=SAMPLE_RATE_1)
        features = {}
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        features['spectral_centroid_mean'] = np.mean(spectral_centroid)
        features['spectral_centroid_std'] = np.std(spectral_centroid)
        features['spectral_bandwidth_mean'] = np.mean(spectral_bandwidth)
        features['spectral_bandwidth_std'] = np.std(spectral_bandwidth)
        features['spectral_rolloff_mean'] = np.mean(spectral_rolloff)
        features['spectral_rolloff_std'] = np.std(spectral_rolloff)
        zcr = librosa.feature.zero_crossing_rate(y)
        features['zcr_mean'] = np.mean(zcr)
        features['zcr_std'] = np.std(zcr)
        y_harmonic = librosa.effects.harmonic(y)
        y_percussive = librosa.effects.percussive(y)
        features['harmonic_mean'] = np.mean(y_harmonic)
        features['harmonic_std'] = np.std(y_harmonic)
        features['percussive_mean'] = np.mean(y_percussive)
        features['percussive_std'] = np.std(y_percussive)
        rms = librosa.feature.rms(y=y)
        features['rms_mean'] = np.mean(rms)
        features['rms_std'] = np.std(rms)
        features['rms_dynamic_range'] = np.max(rms) - np.min(rms)
        return pd.DataFrame([features])
    except Exception as e:
        print(f"[Model 1] Error processing: {e}")
        return None

def predict_model1(file_path, model, scaler):
    features = extract_features_model1(file_path)
    if features is None:
        return None
    scaled = scaler.transform(features)
    pred = model.predict(scaled)
    return 'fake' if pred[0] == 1 else 'real'

# ---------- MODEL 2 SETUP ----------
MODEL2_PATH = "../models/2sec.h5"
SCALER2_PATH = "../models/2sec_scaler.pkl"
SAMPLE_RATE_2 = 16000

def extract_features_model2(file_path):
    try:
        y, sr = librosa.load(file_path, sr=SAMPLE_RATE_2)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)
        spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        rms = librosa.feature.rms(y=y)
        features = np.concatenate([
            np.mean(mfccs.T, axis=0),
            np.mean(chroma.T, axis=0),
            np.mean(spectral_centroids.T, axis=0),
            np.mean(zcr.T, axis=0),
            np.mean(rms.T, axis=0)
        ])
        return features
    except Exception as e:
        print(f"[Model 2] Error processing: {e}")
        return None

def predict_model2(file_path, model, scaler):
    features = extract_features_model2(file_path)
    if features is None:
        return None
    scaled = scaler.transform([features])
    prediction = model.predict(scaled)[0][0]
    return 'fake' if prediction > 0.5 else 'real'

# ---------- MODEL 3 SETUP ----------
MODEL3_PATH = "../models/mfcc.h5"
SCALER3_PATH = "../models/mfcc_scaler.pkl"

def extract_features_model3(file_path):
    try:
        y, sr = librosa.load(file_path, sr=16000)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
        return np.mean(mfcc.T, axis=0)
    except Exception as e:
        print(f"[Model 3] Error processing: {e}")
        return None

def predict_model3(file_path, model, scaler):
    features = extract_features_model3(file_path)
    if features is None:
        return None
    scaled = scaler.transform([features])
    pred = model.predict(scaled)
    label = np.argmax(pred)
    return 'fake' if label == 1 else 'real'

# ---------- ENSEMBLE PREDICTOR ----------
def load_models_and_scalers():
    model1 = joblib.load(MODEL1_PATH)
    scaler1 = joblib.load(SCALER1_PATH)

    model2 = tf.keras.models.load_model(MODEL2_PATH)
    scaler2 = joblib.load(SCALER2_PATH)

    model3 = tf.keras.models.load_model(MODEL3_PATH)
    scaler3 = joblib.load(SCALER3_PATH)

    return model1, scaler1, model2, scaler2, model3, scaler3

def ensemble_predict(file_path, model1, scaler1, model2, scaler2, model3, scaler3):
    print(f"\n🔍 Predicting for file: {file_path}")
    
    predictions = []
    predictions.append(predict_model1(file_path, model1, scaler1))
    predictions.append(predict_model2(file_path, model2, scaler2))
    predictions.append(predict_model3(file_path, model3, scaler3))

    print(f"Model Predictions: {predictions}")

    final_verdict = 'FAKE' if "fake" in predictions else 'REAL'
    print(f"\n🟥 FINAL VERDICT: {final_verdict}" if final_verdict == 'FAKE' else f"\n🟩 FINAL VERDICT: {final_verdict}")
    return final_verdict


In [None]:
# First, load all models and scalers (do this once)
model1, scaler1, model2, scaler2, model3, scaler3 = load_models_and_scalers()

# Then call the ensemble_predict function with any audio file
file_path = "../audio_files/fake.wav"
result = ensemble_predict(file_path, model1, scaler1, model2, scaler2, model3, scaler3)


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations



🔍 Predicting for file: ./audio_files/fake.wav
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
Model Predictions: ['fake', 'fake', 'real']

🟥 FINAL VERDICT: FAKE
