In [None]:
# Import thư viện
import os
import numpy as np
import librosa
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D, Reshape
from tensorflow.keras.applications import MobileNet
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [None]:
# Hàm trích xuất đặc trưng MFCC
def extract_mfcc(audio, sr=16000, n_mfcc=40):
    hop_length = int(0.01 * sr)  # 10 ms
    win_length = int(0.025 * sr)  # 25 ms
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc, hop_length=hop_length, win_length=win_length)
    return mfcc

# Hàm load dữ liệu từ thư mục
def load_data(data_dir, sr=16000, n_mfcc=40):
    X, y = [], []
    labels = os.listdir(data_dir)
    for label in labels:
        label_dir = os.path.join(data_dir, label)
        if os.path.isdir(label_dir):
            for file in os.listdir(label_dir):
                if file.endswith('.wav'):
                    file_path = os.path.join(label_dir, file)
                    audio, _ = librosa.load(file_path, sr=sr)
                    mfcc = extract_mfcc(audio, sr, n_mfcc)
                    mfcc = np.expand_dims(mfcc, axis=-1)  # Thêm channel cho MobileNet
                    X.append(mfcc)
                    y.append(label)
    return np.array(X), np.array(y)

In [None]:
# Load dữ liệu
train_dir = r'C:\Users\USER\Downloads\SV_NCKH_audio_event\Train'
test_dir = r'C:\Users\USER\Downloads\SV_NCKH_audio_event\Test'

print("Loading training data...")
X_train, y_train = load_data(train_dir)
print("Loading testing data...")
X_test, y_test = load_data(test_dir)

# Chuẩn hóa dữ liệu
scaler = StandardScaler()
X_train = np.array([scaler.fit_transform(x[:, :, 0]) for x in X_train])  # Normalize MFCC
X_test = np.array([scaler.transform(x[:, :, 0]) for x in X_test])

# Định dạng lại dữ liệu thành 4D cho MobileNet
X_train = np.expand_dims(X_train, axis=-1)  # (samples, height, width, channels)
X_test = np.expand_dims(X_test, axis=-1)

# Encode nhãn
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

# Chuyển nhãn sang dạng one-hot
num_classes = len(np.unique(y_train))
y_train = to_categorical(y_train, num_classes=num_classes)
y_test = to_categorical(y_test, num_classes=num_classes)


In [None]:
# Xây dựng mô hình MobileNet
def create_mobilenet_model(input_shape, num_classes):
    base_model = MobileNet(weights=None, include_top=False, input_shape=input_shape)
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation='relu')(x)  # Thêm lớp ẩn tùy chỉnh
    output = Dense(num_classes, activation='softmax')(x)  # Lớp đầu ra
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Khởi tạo mô hình
input_shape = X_train.shape[1:]  # Dạng dữ liệu (height, width, channels)
model = create_mobilenet_model(input_shape, num_classes)

# Huấn luyện mô hình
print("Training MobileNet model...")
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=30, batch_size=32)

# Đánh giá mô hình
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc:.4f}")

# Hiển thị kiến trúc mô hình
model.summary()