In [1]:
import os
import numpy as np
import librosa.display
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import gc
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras.applications import MobileNet, MobileNetV3Small, MobileNetV3Large, VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input, decode_predictions
from tensorflow.keras.layers import (
    Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D, BatchNormalization
)
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, classification_report, accuracy_score, roc_curve

2025-03-11 21:44:32.909710: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-11 21:44:33.026816: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1741704273.072210    7055 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1741704273.084555    7055 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-11 21:44:33.190226: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

# Feature Extraction

In [None]:
NUM_CLASSES = 2 
X = np.load('../SavedFeatures/X_f0.npy')
y = np.load('../SavedFeatures/y_f0.npy')

y_encoded = to_categorical(y, NUM_CLASSES)
split_index = int(0.8 * len(X))
X_train, X_val = X[:split_index], X[split_index:]
y_train, y_val = y_encoded[:split_index], y_encoded[split_index:]

# Reshape data for MobileNet
X_train = np.expand_dims(X_train, axis=-1)  
X_train = np.tile(X_train, (1, 1, 109))    

X_val = np.expand_dims(X_val, axis=-1)
X_val = np.tile(X_val, (1, 1, 109))       


In [6]:
def model_vgg16(input_shape=(109, 109, 1), num_classes=2):
    base_model = VGG16(weights= None, include_top=False, input_shape=input_shape)

    x = Flatten()(base_model.output)
    
    x = Dense(512, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)

    x = Dense(256, activation='relu')(x)
    x = Dropout(0.2)(x)
    
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.2)(x)
    
    x = Dense(num_classes, activation='softmax')(x)

    return Model(inputs=base_model.input, outputs=x)

In [7]:
tf.keras.backend.clear_session()

In [8]:
mvgg16 = model_vgg16(input_shape=(109, 109, 1), num_classes=2)


mvgg16.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])


hist = mvgg16.fit(X_train, y_train, batch_size=16, epochs=10, validation_data=(X_val, y_val))

I0000 00:00:1741704283.277137    7055 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14282 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4060 Ti, pci bus id: 0000:01:00.0, compute capability: 8.9


Epoch 1/10


I0000 00:00:1741704286.090867    7131 service.cc:148] XLA service 0x7d340c0036f0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1741704286.090989    7131 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce RTX 4060 Ti, Compute Capability 8.9
2025-03-11 21:44:46.139407: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1741704286.420240    7131 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m  5/796[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m27s[0m 35ms/step - accuracy: 0.5023 - loss: 0.7592

I0000 00:00:1741704291.044641    7131 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m796/796[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 44ms/step - accuracy: 0.6916 - loss: 0.6277 - val_accuracy: 0.7365 - val_loss: 0.5932
Epoch 2/10
[1m796/796[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 37ms/step - accuracy: 0.7544 - loss: 0.5480 - val_accuracy: 0.7503 - val_loss: 0.5507
Epoch 3/10
[1m796/796[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 37ms/step - accuracy: 0.7467 - loss: 0.5469 - val_accuracy: 0.7013 - val_loss: 0.7877
Epoch 4/10
[1m796/796[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 37ms/step - accuracy: 0.7621 - loss: 0.5171 - val_accuracy: 0.7739 - val_loss: 0.5409
Epoch 5/10
[1m796/796[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 37ms/step - accuracy: 0.7715 - loss: 0.5126 - val_accuracy: 0.7641 - val_loss: 0.5101
Epoch 6/10
[1m796/796[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 37ms/step - accuracy: 0.7736 - loss: 0.5000 - val_accuracy: 0.7742 - val_loss: 0.5629
Epoch 7/10
[1m796/796[0m 

#### Evaluation Metrics

In [None]:
test_loss, test_accuracy = mvgg16.evaluate(X_val, y_val, verbose=0)
y_pred = mvgg16.predict(X_val)

y_pred_classes = y_pred.argmax(axis=1) 
y_true_classes = y_val.argmax(axis=1)   
f1 = f1_score(y_true_classes, y_pred_classes, average='weighted')

print("===================VGG16 - F0===================")
print(f"TAccuracy: {test_accuracy:.5f}")
print(f"F1-Score: {f1:.5f}")

eers = []

# Tính EER for each class
for i in range(y_pred.shape[1]): 
    
    y_true_binary = y_val[:, i]
    y_pred_prob = y_pred[:, i]

    
    fpr, tpr, thresholds = roc_curve(y_true_binary, y_pred_prob)
    fnr = 1 - tpr
    
    
    eer_threshold = thresholds[np.nanargmin(np.abs(fpr - fnr))]
    eer = fpr[np.nanargmin(np.abs(fpr - fnr))]
    eers.append(eer)
    print(f"Class {i}: EER = {eer:.5f} at threshold {eer_threshold:.5f}")



mean_eer = np.mean(eers)
print(f"EER: {mean_eer:.5f}")

[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step
TAccuracy: 0.79020
F1-Score: 0.77902
Class 0: EER = 0.22623 at threshold 0.89415
Class 1: EER = 0.22553 at threshold 0.10612
EER: 0.22588


In [None]:

mvgg16.save_weights("F0_mvgg16.weights.h5")

In [None]:

del mvgg16, X_train, X_val, y_train, y_val
gc.collect()
