In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf
from keras.preprocessing.sequence import pad_sequences
import pickle
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, classification_report, accuracy_score, roc_curve
from scipy.fftpack import dct
import gc

In [None]:
X = np.load('../SavedFeatures/X_mfcc.npy')
y = np.load('../SavedFeatures/y_mfcc.npy')
X_dl = np.expand_dims(X, axis=-1)  

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = X_train.reshape(X_train.shape[0], -1)  # to 2D
X_val = X_val.reshape(X_val.shape[0], -1)  # to 2D

In [None]:
del X, y
gc.collect()

In [16]:
rf_model = RandomForestClassifier(n_estimators=150, random_state=42)
rf_model.fit(X_train, y_train)
rf_val_predictions = rf_model.predict(X_val)

In [None]:
accuracy = accuracy_score(y_val, rf_val_predictions)
f1_class_0 = f1_score(y_val, rf_val_predictions, pos_label=0)
f1_class_1 = f1_score(y_val, rf_val_predictions, pos_label=1) 
f1_score = (f1_class_0 + f1_class_1)/2

# Tính EER

y_val_probabilities1 = rf_model.predict_proba(X_val)[:, 1]
fpr1, tpr1, thresholds1 = roc_curve(y_val, y_val_probabilities1)
frr1 = 1 - tpr1
eer_index1 = np.nanargmin(np.abs(fpr1 - frr1))
eer1 = fpr1[eer_index1] 
eer_threshold1 = thresholds1[eer_index1]


y_val_probabilities = rf_model.predict_proba(X_val)[:, 0]
fpr, tpr, thresholds = roc_curve(y_val, y_val_probabilities)
frr = 1 - tpr
eer_index = np.nanargmin(np.abs(fpr - frr))
eer = fpr[eer_index]  
eer_threshold = thresholds[eer_index]




print("===================RF - MFCC===================")
print(f"Accuracy: {accuracy:.5f}")
print(f"F1-Score  {f1_score:.5f}")
print("\nClassification Report:\n", classification_report(y_val, rf_val_predictions))
print("EER of Bonafine: ---------")
print(f"Equal Error Rate (EER): {eer1:.5f}")
print(f"EER Threshold: {eer_threshold1:.5f}")
print("EER of Spoof: ---------")
print(f"Equal Error Rate (EER): {eer:.5f}")
print(f"EER Threshold: {eer_threshold:.5f}")


Accuracy: 0.89730
F1-Score  0.88827

Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.99      0.96      1904
           1       0.98      0.90      0.94      1280

    accuracy                           0.95      3184
   macro avg       0.96      0.94      0.95      3184
weighted avg       0.95      0.95      0.95      3184

EER of Bonafine: ---------
Equal Error Rate (EER): 0.05725
EER Threshold: 0.32667
EER of Spoof: ---------
Equal Error Rate (EER): 0.94275
EER Threshold: 0.68000


In [17]:
with open("../SavedModels/MFCC_rf_dfd.pkl", 'wb') as file:
    pickle.dump(rf_model, file)

In [None]:
del rf_model, rf_val_predictions, X_train, X_val, y_train, y_val
gc.collect()