In [1]:
import os
import numpy as np
import librosa.display
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications import MobileNet, ResNet50
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras.layers import (
    Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D, BatchNormalization
)
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, classification_report, accuracy_score, roc_curve
import gc

2025-03-08 22:43:12.351412: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-08 22:43:12.361520: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1741448592.372599  438340 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1741448592.375893  438340 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-08 22:43:12.389091: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [None]:
X = np.load('../SavedFeatures/X_cqcc.npy')
y = np.load('../SavedFeatures/y_cqcc.npy')

pad_height = (3, 0)  
pad_width = (0, 0)   
X = np.pad(X, pad_width=((0, 0), pad_height, pad_width), mode='constant', constant_values=0)
X.shape

(15918, 32, 109)

In [None]:
X_train, X_val, y_train, y_val= train_test_split(X, y, test_size = 0.2, random_state = 42)

y_train = to_categorical(y_train, num_classes=2)
y_val = to_categorical(y_val, num_classes=2)

del X, y  
gc.collect()

In [7]:
def model_resnet(input_shape, num_classes):
    base_model = ResNet50(include_top=False, weights=None, input_shape=input_shape)
    x = GlobalAveragePooling2D()(base_model.output) 
    x = BatchNormalization()(x)

    x = Dense(512, activation='relu')(x)
    x = BatchNormalization()(x)  
    x = Dropout(0.4)(x)

    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)  
    x = Dropout(0.4)(x)

    x = Dense(128, activation='relu')(x)
    x = BatchNormalization()(x)  
    x = Dropout(0.2)(x)

    x = Dense(64, activation='relu')(x)
    x = BatchNormalization()(x)  
    x = Dropout(0.2)(x)
    
    x = Dense(num_classes, activation='softmax')(x)
    return Model(inputs=base_model.input, outputs=x)

In [8]:
tf.keras.backend.clear_session()

In [9]:
mrsnt = model_resnet(input_shape=(32, 109, 1), num_classes=2)
mrsnt.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])


I0000 00:00:1741448603.232084  438340 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10125 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4060 Ti, pci bus id: 0000:01:00.0, compute capability: 8.9


In [10]:
hist = mrsnt.fit(X_train, y_train, batch_size=16, epochs=10, validation_data=(X_val, y_val))

Epoch 1/10


I0000 00:00:1741448619.381364  438408 service.cc:148] XLA service 0x7ef7d0002320 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1741448619.381381  438408 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce RTX 4060 Ti, Compute Capability 8.9
2025-03-08 22:43:39.786504: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1741448621.469692  438408 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m  4/637[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 17ms/step - accuracy: 0.6445 - loss: 0.7769   


I0000 00:00:1741448629.812866  438408 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m637/637[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 39ms/step - accuracy: 0.7527 - loss: 0.5849 - val_accuracy: 0.7511 - val_loss: 0.7944
Epoch 2/10
[1m637/637[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 20ms/step - accuracy: 0.8498 - loss: 0.3859 - val_accuracy: 0.8861 - val_loss: 0.3469
Epoch 3/10
[1m637/637[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 18ms/step - accuracy: 0.8692 - loss: 0.3483 - val_accuracy: 0.8991 - val_loss: 0.2646
Epoch 4/10
[1m637/637[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 17ms/step - accuracy: 0.8794 - loss: 0.3010 - val_accuracy: 0.9105 - val_loss: 0.2515
Epoch 5/10
[1m637/637[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 20ms/step - accuracy: 0.8936 - loss: 0.2699 - val_accuracy: 0.9183 - val_loss: 0.2199
Epoch 6/10
[1m637/637[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 17ms/step - accuracy: 0.9060 - loss: 0.2479 - val_accuracy: 0.9293 - val_loss: 0.2039
Epoch 7/10
[1m637/637[0m 

In [11]:
test_loss, test_accuracy = mrsnt.evaluate(X_val, y_val, verbose=0)
y_pred = mrsnt.predict(X_val)
y_pred_classes = y_pred.argmax(axis=1) 
y_true_classes = y_val.argmax(axis=1) 


[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 21ms/step


In [None]:
f1 = f1_score(y_true_classes, y_pred_classes, average='weighted')

print("===================ResNet50 - MFCC===================")
print(f"TAccuracy: {test_accuracy:.5f}")
print(f"F1-Score: {f1:.5f}")
eers = []
# Tính EER 
for i in range(y_pred.shape[1]):  # Run through each class
    # True labels for the current class 
    y_true_binary = y_val[:, i]
    y_pred_prob = y_pred[:, i]

    
    fpr, tpr, thresholds = roc_curve(y_true_binary, y_pred_prob)
    fnr = 1 - tpr
    
    eer_threshold = thresholds[np.nanargmin(np.abs(fpr - fnr))]
    eer = fpr[np.nanargmin(np.abs(fpr - fnr))]
    eers.append(eer)
    print(f"Class {i}: EER = {eer:.5f} at threshold {eer_threshold:.5f}")

mean_eer = np.mean(eers)
print(f"EER: {mean_eer:.5f}")


TAccuracy: 0.92422
F1-Score: 0.92465
Class 0: EER = 0.07436 at threshold 0.29933
Class 1: EER = 0.07344 at threshold 0.70246
EER: 0.07390


In [13]:
# Save Model as weight with H5 format
mrsnt.save_weights("CQCC_mrsnet.weights.h5")

In [None]:
del f1, y_true_binary, y_pred_prob
del fpr, tpr, thresholds, fnr, eer_threshold, eer
del eers, mean_eer
del mrsnt, X_train, X_test, y_train, y_test  
gc.collect()


1782