In [1]:
import os
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
import pickle
import gc

import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras.layers import (
    Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout,
    BatchNormalization, GlobalAveragePooling2D
)
from tensorflow.keras.applications import ResNet50, MobileNet, VGG16
from tensorflow.keras.utils import to_categorical

from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score, roc_curve
from sklearn.tree import DecisionTreeClassifier


2025-03-09 14:34:24.894260: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-09 14:34:24.902448: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1741505664.911361  576585 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1741505664.914021  576585 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-09 14:34:24.924606: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [None]:
X = np.load('../SavedFeatures/X_mfcc3.npy')
y = np.load('../SavedFeatures/y_mfcc3.npy')

N_MELS = 128
input_shape = (N_MELS, 109, 1)
num_classes = 2

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
y_train_keras = to_categorical(y_train, num_classes)
y_test_keras = to_categorical(y_test, num_classes)

del X, y  
gc.collect()

In [5]:
def model_vgg16(input_shape, num_classes):
    base_model = VGG16(weights= None, include_top=False, input_shape=input_shape)
    
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(512, activation='relu')(x)
    x = BatchNormalization()(x)  
    x = Dropout(0.5)(x)
    
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.4)(x)
    
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.3)(x)
   
    x = Dense(num_classes, activation='softmax')(x)
    return Model(inputs=base_model.input, outputs=x)

def model_resnet(input_shape, num_classes):
    base_model = ResNet50(include_top=False, weights=None, input_shape=input_shape)
    x = Flatten()(base_model.output)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.4)(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.3)(x)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.2)(x)
    x = Dense(num_classes, activation='softmax')(x)
    return Model(inputs=base_model.input, outputs=x)


def model_mobilenet(input_shape, num_classes):
    base_model = MobileNet(input_shape=input_shape, weights=None, include_top=False)
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(512, activation='relu')(x)
    x = BatchNormalization()(x)  
    x = Dropout(0.5)(x) 
     
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x) 
    x = Dropout(0.4)(x)  
    
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.3)(x)  
    
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.2)(x)  
    
    x = Dense(num_classes, activation='softmax')(x)
    
    return Model(inputs=base_model.input, outputs=x)

In [None]:
tf.keras.backend.clear_session()

In [None]:
cv_folds = 5
skf = StratifiedKFold(n_splits=cv_folds, shuffle=True, random_state=42)
meta_train = np.zeros((X_train.shape[0], 3))
meta_test = np.zeros((X_test.shape[0], 3))



for train_idx, val_idx in skf.split(X_train, y_train):
    X_tr, X_val = X_train[train_idx], X_train[val_idx]
    y_tr, y_val = y_train_keras[train_idx], y_train_keras[val_idx]
    models = {
        "vgg": model_vgg16(input_shape, num_classes),
        "resnet": model_resnet(input_shape, num_classes),
        "mobilenet": model_mobilenet(input_shape, num_classes)
    }   
    for name, model in models.items():
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                      loss='binary_crossentropy', metrics=['accuracy'])
        
        model.fit(X_tr, y_tr, epochs=10, batch_size=16, verbose=0)
        
        meta_train[val_idx, list(models.keys()).index(name)] = np.argmax(model.predict(X_val), axis=1)
        
        # clear GPU
        del model  
        tf.keras.backend.clear_session()
        gc.collect()



final_models = {
    "vgg": model_vgg16(input_shape, num_classes),
    "resnet": model_resnet(input_shape, num_classes),
    "mobilenet": model_mobilenet(input_shape, num_classes)
}




for name, model in final_models.items():
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                  loss='binary_crossentropy', metrics=['accuracy'])
    
    model.fit(X_train, y_train_keras, epochs=10, batch_size=16, verbose=0)
    
    meta_test[:, list(final_models.keys()).index(name)] = np.argmax(model.predict(X_test), axis=1)
    
    # clear GPU
    del model
    tf.keras.backend.clear_session()
    gc.collect()

I0000 00:00:1741505789.331914  576585 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 11439 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4060 Ti, pci bus id: 0000:01:00.0, compute capability: 8.9
I0000 00:00:1741505792.404164  577328 service.cc:148] XLA service 0x410ff0d0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1741505792.404184  577328 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce RTX 4060 Ti, Compute Capability 8.9
2025-03-09 14:36:32.446964: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1741505792.746847  577328 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1741505799.281538  577328 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 50ms/step
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 45ms/step






[1m73/80[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 3ms/step







[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 27ms/step
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 28ms/step
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 37ms/step
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 29ms/step
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 51ms/step
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 48ms/step
[1m72/80[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 3ms/step







[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 28ms/step
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 39ms/step
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 29ms/step
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step


In [None]:
meta_model = DecisionTreeClassifier(max_depth=20)
meta_model.fit(meta_train, y_train)
pickle.dump(meta_model, open("../SavedModels/MFCC_ADD_DL_Stacking_model.pkl", "wb"))
del meta_train
gc.collect()

0

In [None]:
meta_prob = meta_model.predict_proba(meta_test)[:, 1]
meta_predictions = (meta_prob > 0.5).astype(int)
accuracy = accuracy_score(y_test, meta_predictions)
f1 = f1_score(y_test, meta_predictions)



# Tính EER
fpr, tpr, thresholds = roc_curve(y_test, meta_prob)
fnr = 1 - tpr
eer_threshold = thresholds[np.nanargmin(np.abs(fpr - fnr))]
eer = fpr[np.nanargmin(np.abs(fpr - fnr))]

print(f"Accuracy: {accuracy:.4f}")
print(f"F1-Score: {f1:.4f}")
print(f"EER: {eer:.4f}")


Accuracy: 0.9786
F1-Score: 0.9741
EER: 0.0352


0

In [None]:

del meta_test, meta_model, X_train, X_test, y_train, y_test  
gc.collect()
