In [None]:
# Import các thư viện
import os
import numpy as np
import librosa
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Input, Dense, GlobalAveragePooling2D, 
                                     Conv2D, BatchNormalization, Activation, Add, MaxPooling2D)
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.utils import to_categorical

In [None]:
# Hàm trích xuất đặc trưng MFCC
def extract_mfcc(audio, sr=16000, n_mfcc=40):
    hop_length = int(0.01 * sr)  # 10 ms
    win_length = int(0.025 * sr)  # 25 ms
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc, hop_length=hop_length, win_length=win_length)
    return mfcc

# Hàm load dữ liệu
def load_data(data_dir, sr=16000, n_mfcc=40):
    X, y = [], []
    for label in os.listdir(data_dir):
        label_dir = os.path.join(data_dir, label)
        if os.path.isdir(label_dir):
            for file in os.listdir(label_dir):
                if file.endswith('.wav'):
                    file_path = os.path.join(label_dir, file)
                    audio, _ = librosa.load(file_path, sr=sr)
                    mfcc = extract_mfcc(audio, sr, n_mfcc)
                    mfcc = np.expand_dims(mfcc, axis=-1)  # Thêm channel dimension
                    X.append(mfcc)
                    y.append(label)
    return np.array(X), np.array(y)

In [None]:
# Hàm xây dựng khối Inception-ResNet
def inception_resnet_block(input_tensor, filters, block_type='A'):
    if block_type == 'A':
        branch1 = Conv2D(filters, (1, 1), padding='same')(input_tensor)
        branch1 = BatchNormalization()(branch1)
        branch1 = Activation('relu')(branch1)

        branch2 = Conv2D(filters, (1, 1), padding='same')(input_tensor)
        branch2 = BatchNormalization()(branch2)
        branch2 = Activation('relu')(branch2)
        branch2 = Conv2D(filters, (3, 3), padding='same')(branch2)
        branch2 = BatchNormalization()(branch2)
        branch2 = Activation('relu')(branch2)

        branches = Add()([branch1, branch2])
        branches = Conv2D(filters, (1, 1), padding='same')(branches)
        branches = BatchNormalization()(branches)

        output_tensor = Add()([input_tensor, branches])  # Residual connection
        output_tensor = Activation('relu')(output_tensor)
    return output_tensor

In [None]:
# Hàm xây dựng mô hình Inception-ResNet
def build_inception_resnet(input_shape, num_classes):
    inputs = Input(shape=input_shape)

    # Layer đầu tiên
    x = Conv2D(32, (3, 3), strides=(2, 2), padding='same')(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)

    # Thêm các khối Inception-ResNet
    x = inception_resnet_block(x, filters=32, block_type='A')
    x = inception_resnet_block(x, filters=64, block_type='A')
    x = inception_resnet_block(x, filters=128, block_type='A')

    # Global Average Pooling và Dense
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation='relu')(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs, outputs)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Load dữ liệu
train_dir = r'C:\Users\USER\Downloads\SV_NCKH_audio_event\Train'
test_dir = r'C:\Users\USER\Downloads\SV_NCKH_audio_event\Test'

print("Loading training data...")
X_train, y_train = load_data(train_dir)
print("Loading testing data...")
X_test, y_test = load_data(test_dir)

In [None]:
# Chuẩn hóa dữ liệu
scaler = StandardScaler()
X_train = np.array([scaler.fit_transform(x[:, :, 0]) for x in X_train])  # Normalize MFCC
X_test = np.array([scaler.transform(x[:, :, 0]) for x in X_test])

# Định dạng lại dữ liệu
X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)

# Encode nhãn
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

# Chuyển nhãn sang dạng one-hot
num_classes = len(np.unique(y_train))
y_train = to_categorical(y_train, num_classes=num_classes)
y_test = to_categorical(y_test, num_classes=num_classes)

In [None]:
# Xây dựng và huấn luyện mô hình
input_shape = X_train.shape[1:]  # Input shape
model = build_inception_resnet(input_shape, num_classes)

print("Training Inception-ResNet model...")
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=30, batch_size=32)

# Đánh giá mô hình
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc:.4f}")

# Hiển thị kiến trúc mô hình
model.summary()