<a href="https://colab.research.google.com/github/Shivan5h/Audio-Detection/blob/main/LFCC_GMM_ASVspoof_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🔍 LFCC-GMM Based Audio Deepfake Detection
Using the ASVspoof 2019 LA Dataset


In [None]:
# 📦 Install Required Libraries
!pip install librosa numpy scikit-learn joblib soundfile


In [None]:
# 📁 Dataset & Paths Setup
import os
from glob import glob
import librosa
import numpy as np
from sklearn.mixture import GaussianMixture
from sklearn.metrics import roc_curve, auc
import joblib

# Path to ASVspoof 2019 LA dataset
bonafide_path = 'path_to/bonafide/'
spoofed_path = 'path_to/spoofed/'


In [None]:
# 🔄 LFCC Feature Extraction
def extract_lfcc(y, sr, n_mfcc=20):
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    delta = librosa.feature.delta(mfcc)
    delta2 = librosa.feature.delta(mfcc, order=2)
    return np.vstack([mfcc, delta, delta2]).T

def load_data(path, label):
    X, y = [], []
    for file in glob(os.path.join(path, '*.flac')):
        audio, sr = librosa.load(file, sr=16000)
        lfcc = extract_lfcc(audio, sr)
        X.append(np.mean(lfcc, axis=0))
        y.append(label)
    return np.array(X), np.array(y)

# Load both bonafide and spoofed data
X_bona, y_bona = load_data(bonafide_path, 0)
X_spoof, y_spoof = load_data(spoofed_path, 1)
X = np.concatenate([X_bona, X_spoof], axis=0)
y = np.concatenate([y_bona, y_spoof], axis=0)


In [None]:
# 🤖 Train GMM Classifier
gmm_bonafide = GaussianMixture(n_components=8, covariance_type='diag', max_iter=200)
gmm_spoofed = GaussianMixture(n_components=8, covariance_type='diag', max_iter=200)

gmm_bonafide.fit(X_bona)
gmm_spoofed.fit(X_spoof)

# Save models
joblib.dump(gmm_bonafide, 'gmm_bonafide.pkl')
joblib.dump(gmm_spoofed, 'gmm_spoofed.pkl')


In [None]:
# 📊 Evaluate Model
scores = []
for sample in X:
    log_bona = gmm_bonafide.score_samples([sample])[0]
    log_spoof = gmm_spoofed.score_samples([sample])[0]
    scores.append(log_bona - log_spoof)

fpr, tpr, _ = roc_curve(y, scores)
eer = fpr[np.nanargmin(np.absolute((1 - tpr) - fpr))]
print(f'Equal Error Rate (EER): {eer * 100:.2f}%')