In [None]:
import numpy as np
import librosa
import os
from sklearn.svm import OneClassSVM
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import pickle
from tqdm import tqdm

# Load dataset paths (update these paths as per your local dataset organization)
train_dir = "./ASVspoof2019_LA_train/"
dev_dir = "./ASVspoof2019_LA_dev/"
eval_dir = "./ASVspoof2019_LA_eval/"

# Extract features from audio files
def extract_features(file_path, n_mfcc=20):
    """Extract MFCC features from an audio file."""
    y, sr = librosa.load(file_path, sr=None)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return np.mean(mfcc.T, axis=0)

# Load data and extract features
def load_and_preprocess_data(dir_path, label=None):
    """Load data and extract features for all audio files in a directory."""
    features = []
    labels = []
    for root, _, files in os.walk(dir_path):
        for file in tqdm(files, desc=f"Processing {dir_path}", unit="file"):
            if file.endswith(".flac"):
                file_path = os.path.join(root, file)
                features.append(extract_features(file_path))
                if label is not None:
                    labels.append(label)
    return np.array(features), np.array(labels) if labels else np.array(features)

# Load and preprocess the dataset
print("Loading training data...")
train_features, _ = load_and_preprocess_data(train_dir)
print("Loading development data...")
dev_features, dev_labels = load_and_preprocess_data(dev_dir, label="bona fide" if "bonafide" in dev_dir else "spoof")

# Scale features
scaler = StandardScaler()
train_features = scaler.fit_transform(train_features)
dev_features = scaler.transform(dev_features)

# Train One-Class SVM
print("Training One-Class SVM...")
ocsvm = OneClassSVM(kernel='rbf', gamma='scale', nu=0.5)
ocsvm.fit(train_features)

# Predict on development data
print("Evaluating model...")
dev_scores = ocsvm.decision_function(dev_features)
dev_predictions = ocsvm.predict(dev_features)

# Map predictions to binary labels
predicted_labels = (dev_predictions == 1).astype(int)  # 1 for inliers (bona fide), 0 for outliers (spoof)
actual_labels = (np.array(dev_labels) == "bona fide").astype(int)

# Compute ROC AUC
roc_auc = roc_auc_score(actual_labels, dev_scores)
print(f"ROC AUC Score: {roc_auc:.4f}")

# Plot ROC Curve
fpr, tpr, thresholds = roc_curve(actual_labels, dev_scores)
plt.figure()
plt.plot(fpr, tpr, label=f"ROC Curve (AUC = {roc_auc:.4f})")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve for One-Class SVM")
plt.legend()
plt.grid()
plt.show()

# Save the model and scaler
with open("one_class_svm_model.pkl", "wb") as model_file:
    pickle.dump(ocsvm, model_file)

with open("scaler.pkl", "wb") as scaler_file:
    pickle.dump(scaler, scaler_file)

print("Model and scaler saved successfully.")