### n_states:3, n_mixed:3, n_mfcc:12
- Accuracy for fold 1: 0.7981  fold 2: 0.7837  fold 3: 0.6587  fold 4: 0.8269  fold 5: 0.5721
- Avg. accuracy: 0.7279

In [18]:
import os
import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
import hmmlearn.hmm as hmm
from preprocessing import get_mfcc
from tqdm import tqdm

# Danh sách các lớp và số lượng trạng thái tương ứng cho từng lớp
class_names = [
    'A', 'Ba', 'Bài', 'Bảy', 'Bốn', 'Chậm', 'Chạy', 'Chín', 'Chủ', 'Có',
    'Cuối', 'Đầu', 'Đọc', 'Dừng', 'Đúng', 'E', 'Giảm', 'Giờ', 'Giúp', 'Hai',
    'Hủy', 'I', 'Kế', 'Không', 'Lại', 'Lặp', 'Lui', 'Lưu', 'Một', 'Mục',
    'Năm', 'Ngày', 'Ngừng', 'Nhanh', 'Nhỏ', 'O', 'Sai', 'Sáu', 'Tải', 'Tám',
    'Tăng', 'Thổ địa', 'Tiếp', 'To', 'Tới', 'Trước', 'Tuổi', 'U', 'Vào', 'Về',
    'Xóa', 'Xong'
]

states = [3] * len(class_names)
n_mix = 3

k_fold = 5

# Đường dẫn đến thư mục chứa dữ liệu và mô hình
dataset_path = './datasets'
model_path = './models_train'

# Hàm tải dữ liệu và chia dữ liệu thành k-fold
def load_data_kfold(dataset_path, class_names, k):
    all_data = {cname: [] for cname in class_names}
    
    # Duyệt qua từng lớp và tải các tệp âm thanh
    for cname in class_names:
        file_paths = [os.path.join(dataset_path, cname, i) 
                      for i in os.listdir(os.path.join(dataset_path, cname)) 
                      if i.endswith('.wav')]
        # Trích xuất đặc trưng MFCC từ tệp âm thanh
        all_data[cname] = [get_mfcc(file_path) for file_path in file_paths]

    X_folds, y_folds = [], []
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    
    # Chia dữ liệu thành k-fold theo từng lớp
    for fold_idx in range(k):
        X_fold = {'train': {}, 'test': {}}
        y_fold = {'train': {}, 'test': {}}
        
        for cname in class_names:
            data = all_data[cname]
            splits = list(kf.split(data))
            train_idx, test_idx = splits[fold_idx]
            
            # Lưu dữ liệu huấn luyện và kiểm thử
            X_fold['train'][cname] = [data[i] for i in train_idx]
            X_fold['test'][cname] = [data[i] for i in test_idx]
            y_fold['train'][cname] = [class_names.index(cname)] * len(train_idx)
            y_fold['test'][cname] = [class_names.index(cname)] * len(test_idx)
        
        X_folds.append(X_fold)
        y_folds.append(y_fold)

    return X_folds, y_folds

# Hàm huấn luyện mô hình HMM-GMM cho từng lớp
def train(X, class_names, states, n_mix):
    models = {}
    for idx, cname in enumerate(class_names):
        # Khởi tạo xác suất bắt đầu và ma trận chuyển trạng thái
        start_prob = np.zeros(states[idx])
        start_prob[0] = 1.0
        
        dominantMatrix = np.full((states[idx], states[idx]), 0.5)
        np.fill_diagonal(dominantMatrix[:, 1:], 0.5)
        dominantMatrix[-1, -1] = 1.0
        
        # Khởi tạo và huấn luyện mô hình HMM-GMM
        model = hmm.GMMHMM(
            n_components=states[idx], n_mix=n_mix, n_iter=300,
            startprob_prior=start_prob, transmat_prior=dominantMatrix,
            random_state=42, params='stmc', init_params='mc')
        
        # Huấn luyện mô hình với dữ liệu
        model.fit(np.vstack(X['train'][cname]), 
                  lengths=[x.shape[0] for x in X['train'][cname]])
        models[cname] = model
    return models

# Hàm đánh giá mô hình trên dữ liệu kiểm thử
def evaluate(models, X, y, class_names):
    y_true, y_pred = [], []
    for cname in class_names:
        for mfcc, target in zip(X['test'][cname], y['test'][cname]):
            # Tính điểm số cho từng lớp và chọn lớp có điểm cao nhất
            scores = [models[c].score(mfcc) for c in class_names]
            y_pred.append(np.argmax(scores))
            y_true.append(target)
    return y_true, y_pred

# Huấn luyện và đánh giá mô hình
X_folds, y_folds = load_data_kfold(dataset_path, class_names, k_fold)
all_y_true, all_y_pred = [], []
overall_acc_avg = 0.0

# Lặp qua từng fold để huấn luyện và kiểm thử
for i in tqdm(range(k_fold), desc='Training'):
    models = train(X_folds[i], class_names, states, n_mix)
    y_true, y_pred = evaluate(models, X_folds[i], y_folds[i], class_names)
    all_y_true.extend(y_true)
    all_y_pred.extend(y_pred)
    
    # Tính độ chính xác cho từng fold
    fold_acc = np.mean(np.array(y_true) == np.array(y_pred))
    print(f"Accuracy for fold {i+1}: {fold_acc:.4f}")
    overall_acc_avg += fold_acc

# Tính độ chính xác trung bình của toàn bộ mô hình
overall_acc_avg /= k_fold
print(f'Avg accuracy: {overall_acc_avg:.4f}')


Training:  20%|██        | 1/5 [01:05<04:21, 65.27s/it]

Accuracy for fold 1: 0.7981


Training:  40%|████      | 2/5 [02:07<03:11, 63.76s/it]

Accuracy for fold 2: 0.7837


Model is not converging.  Current: -244576.93872448424 is not greater than -244576.9372419739. Delta is -0.001482510328060016
Training:  60%|██████    | 3/5 [03:11<02:06, 63.48s/it]

Accuracy for fold 3: 0.6587


Model is not converging.  Current: -245986.2262411186 is not greater than -245980.3248222824. Delta is -5.901418836205266
Training:  80%|████████  | 4/5 [04:14<01:03, 63.25s/it]

Accuracy for fold 4: 0.8269


Training: 100%|██████████| 5/5 [05:19<00:00, 63.93s/it]

Accuracy for fold 5: 0.5721
Avg accuracy: 0.7279





### n_states:7, n_mixes:4, n_mfcc=11
- Accuracy for fold 1: 0.82  fold 2: 0.79  fold 3: 0.66  fold 4: 0.88  fold 5: 0.6
- Avg. accuracy: 0.75

In [None]:
import os
import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
import hmmlearn.hmm as hmm
from preprocessing import get_mfcc
from tqdm import tqdm
import warnings

# Tắt cảnh báo
warnings.filterwarnings('ignore', category=UserWarning, message=".*degenerate mixture covariance.*")
warnings.filterwarnings('ignore', category=UserWarning, message=".*Model is not converging.*")
warnings.filterwarnings('ignore', category=UserWarning, message=".*Degenerate mixture covariance.*")

# Danh sách các lớp và trạng thái
class_names = [
    'A', 'Ba', 'Bài', 'Bảy', 'Bốn', 'Chậm', 'Chạy', 'Chín', 'Chủ', 'Có',
    'Cuối', 'Đầu', 'Đọc', 'Dừng', 'Đúng', 'E', 'Giảm', 'Giờ', 'Giúp', 'Hai',
    'Hủy', 'I', 'Kế', 'Không', 'Lại', 'Lặp', 'Lui', 'Lưu', 'Một', 'Mục',
    'Năm', 'Ngày', 'Ngừng', 'Nhanh', 'Nhỏ', 'O', 'Sai', 'Sáu', 'Tải', 'Tám',
    'Tăng', 'Thổ địa', 'Tiếp', 'To', 'Tới', 'Trước', 'Tuổi', 'U', 'Vào', 'Về',
    'Xóa', 'Xong'
]

states = [
    7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
    7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
    7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
    7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
    7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
    7, 7
]
n_mix = 4
k_fold = 5

# Đường dẫn dữ liệu
dataset_path = './datasets'
model_path = './models_train'

def load_data_kfold(dataset_path, class_names, k):
    all_data = {cname: [] for cname in class_names}
    for cname in class_names:
        file_paths = [os.path.join(dataset_path, cname, i) 
                      for i in os.listdir(os.path.join(dataset_path, cname)) 
                      if i.endswith('.wav')]
        all_data[cname] = [get_mfcc(file_path) for file_path in file_paths]
    
    X_folds, y_folds = [], []
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    
    for fold_idx in range(k):
        X_fold = {'train': {}, 'test': {}}
        y_fold = {'train': {}, 'test': {}}
        
        for cname in class_names:
            data = all_data[cname]
            splits = list(kf.split(data))
            train_idx, test_idx = splits[fold_idx]
            X_fold['train'][cname] = [data[i] for i in train_idx]
            X_fold['test'][cname] = [data[i] for i in test_idx]
            y_fold['train'][cname] = [class_names.index(cname)] * len(train_idx)
            y_fold['test'][cname] = [class_names.index(cname)] * len(test_idx)
        
        X_folds.append(X_fold)
        y_folds.append(y_fold)

    return X_folds, y_folds

def train(X, class_names, states, n_mix):
    models = {}
    for idx, cname in enumerate(class_names):
        start_prob = np.zeros(states[idx])
        start_prob[0] = 1.0
        dominantMatrix = np.full((states[idx], states[idx]), 0.5)
        np.fill_diagonal(dominantMatrix[:, 1:], 0.5)
        dominantMatrix[-1, -1] = 1.0
        
        model = hmm.GMMHMM(
            n_components=states[idx], n_mix=n_mix, n_iter=300,
            startprob_prior=start_prob, transmat_prior=dominantMatrix,
            random_state=42, params='stmc', init_params='mc')
        model.fit(np.vstack(X['train'][cname]), 
                  lengths=[x.shape[0] for x in X['train'][cname]])
        models[cname] = model
    return models

def evaluate(models, X, y, class_names):
    y_true, y_pred = [], []
    for cname in class_names:
        for mfcc, target in zip(X['test'][cname], y['test'][cname]):
            scores = [models[c].score(mfcc) for c in class_names]
            y_pred.append(np.argmax(scores))
            y_true.append(target)
    return y_true, y_pred

# Training and evaluation
X_folds, y_folds = load_data_kfold(dataset_path, class_names, k_fold)
all_y_true, all_y_pred = [], []
overall_acc_avg = 0.0

for i in tqdm(range(k_fold), desc='Training'):
    models = train(X_folds[i], class_names, states, n_mix)
    y_true, y_pred = evaluate(models, X_folds[i], y_folds[i], class_names)
    all_y_true.extend(y_true)
    all_y_pred.extend(y_pred)
    
    # Accuracy for current fold
    fold_acc = np.mean(np.array(y_true) == np.array(y_pred))
    print(f"Accuracy for fold {i+1}: {fold_acc:.4f}")
    
    overall_acc_avg += fold_acc

# Average accuracy across all folds
overall_acc_avg /= k_fold
print(f'Avg accuracy: {overall_acc_avg:.4f}')


Training:   0%|          | 0/5 [00:00<?, ?it/s]Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture cova

Accuracy for fold 1: 0.82


Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate

Accuracy for fold 2: 0.79


Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate

Accuracy for fold 3: 0.66


Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate

Accuracy for fold 4: 0.88


Training: 100%|██████████| 5/5 [31:14<00:00, 374.86s/it]

Accuracy for fold 5: 0.60
Avg accuracy: 0.75





### n_states:7, n_mixed:3, n_mfcc:13
- Accuracy for fold 1: 0.80  fold 2: 0.77  fold 3: 0.67  fold 4: 0.88  fold 5: 0.60
- Avg. accuracy: 0.74

In [None]:
import os
import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
import hmmlearn.hmm as hmm
from preprocessing import get_mfcc
from tqdm import tqdm
import warnings

# Tắt cảnh báo
warnings.filterwarnings('ignore', category=UserWarning, message=".*degenerate mixture covariance.*")
warnings.filterwarnings('ignore', category=UserWarning, message=".*Model is not converging.*")


# Danh sách các lớp và trạng thái
class_names = [
    'A', 'Ba', 'Bài', 'Bảy', 'Bốn', 'Chậm', 'Chạy', 'Chín', 'Chủ', 'Có',
    'Cuối', 'Đầu', 'Đọc', 'Dừng', 'Đúng', 'E', 'Giảm', 'Giờ', 'Giúp', 'Hai',
    'Hủy', 'I', 'Kế', 'Không', 'Lại', 'Lặp', 'Lui', 'Lưu', 'Một', 'Mục',
    'Năm', 'Ngày', 'Ngừng', 'Nhanh', 'Nhỏ', 'O', 'Sai', 'Sáu', 'Tải', 'Tám',
    'Tăng', 'Thổ địa', 'Tiếp', 'To', 'Tới', 'Trước', 'Tuổi', 'U', 'Vào', 'Về',
    'Xóa', 'Xong'
]

states = [
    7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
    7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
    7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
    7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
    7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
    7, 7
]
n_mix = 3
k_fold = 5

# Đường dẫn dữ liệu
dataset_path = './datasets'
model_path = './models_train'

def load_data_kfold(dataset_path, class_names, k):
    all_data = {cname: [] for cname in class_names}
    for cname in class_names:
        file_paths = [os.path.join(dataset_path, cname, i) 
                      for i in os.listdir(os.path.join(dataset_path, cname)) 
                      if i.endswith('.wav')]
        all_data[cname] = [get_mfcc(file_path) for file_path in file_paths]
    
    X_folds, y_folds = [], []
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    
    for fold_idx in range(k):
        X_fold = {'train': {}, 'test': {}}
        y_fold = {'train': {}, 'test': {}}
        
        for cname in class_names:
            data = all_data[cname]
            splits = list(kf.split(data))
            train_idx, test_idx = splits[fold_idx]
            X_fold['train'][cname] = [data[i] for i in train_idx]
            X_fold['test'][cname] = [data[i] for i in test_idx]
            y_fold['train'][cname] = [class_names.index(cname)] * len(train_idx)
            y_fold['test'][cname] = [class_names.index(cname)] * len(test_idx)
        
        X_folds.append(X_fold)
        y_folds.append(y_fold)

    return X_folds, y_folds

def train(X, class_names, states, n_mix):
    models = {}
    for idx, cname in enumerate(class_names):
        start_prob = np.zeros(states[idx])
        start_prob[0] = 1.0
        dominantMatrix = np.full((states[idx], states[idx]), 0.5)
        np.fill_diagonal(dominantMatrix[:, 1:], 0.5)
        dominantMatrix[-1, -1] = 1.0
        
        model = hmm.GMMHMM(
            n_components=states[idx], n_mix=n_mix, n_iter=300,
            startprob_prior=start_prob, transmat_prior=dominantMatrix,
            random_state=42, params='stmc', init_params='mc')
        model.fit(np.vstack(X['train'][cname]), 
                  lengths=[x.shape[0] for x in X['train'][cname]])
        models[cname] = model
    return models

def evaluate(models, X, y, class_names):
    y_true, y_pred = [], []
    for cname in class_names:
        for mfcc, target in zip(X['test'][cname], y['test'][cname]):
            scores = [models[c].score(mfcc) for c in class_names]
            y_pred.append(np.argmax(scores))
            y_true.append(target)
    return y_true, y_pred

# Training and evaluation
X_folds, y_folds = load_data_kfold(dataset_path, class_names, k_fold)
all_y_true, all_y_pred = [], []
overall_acc_avg = 0.0

for i in tqdm(range(k_fold), desc='Training'):
    models = train(X_folds[i], class_names, states, n_mix)
    y_true, y_pred = evaluate(models, X_folds[i], y_folds[i], class_names)
    all_y_true.extend(y_true)
    all_y_pred.extend(y_pred)
    
    # Accuracy for current fold
    fold_acc = np.mean(np.array(y_true) == np.array(y_pred))
    print(f"Accuracy for fold {i+1}: {fold_acc:.4f}")
    
    overall_acc_avg += fold_acc

# Average accuracy across all folds
overall_acc_avg /= k_fold
print(f'Overall accuracy: {overall_acc_avg:.4f}')


Training:   0%|          | 0/5 [00:00<?, ?it/s]Model is not converging.  Current: -214092.44841912197 is not greater than -214089.8761637393. Delta is -2.5722553826635703
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture 

Accuracy for fold 1: 0.80


Training:  40%|████      | 2/5 [08:26<12:29, 249.89s/it]

Accuracy for fold 2: 0.77


Model is not converging.  Current: -262199.33504036395 is not greater than -262199.33068264776. Delta is -0.004357716185040772
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Deg

Accuracy for fold 3: 0.67


Model is not converging.  Current: -239959.49224099758 is not greater than -239959.41834237892. Delta is -0.07389861866249703
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Dege

Accuracy for fold 4: 0.88


Training: 100%|██████████| 5/5 [21:11<00:00, 254.35s/it]

Accuracy for fold 5: 0.60
Overall accuracy: 0.74





### n_states:7, n_mixed:2, n_mfcc:12
- Accuracy for fold 1: 0.76  fold 2: 0.76  fold 3: 0.69  fold 4: 0.86  fold 5: 0.57
- Avg. accuracy: 0.7288

In [17]:
import os
import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
import hmmlearn.hmm as hmm
from preprocessing import get_mfcc
from tqdm import tqdm
import warnings

# Tắt cảnh báo
warnings.filterwarnings('ignore', category=UserWarning, message=".*degenerate mixture covariance.*")
warnings.filterwarnings('ignore', category=UserWarning, message=".*Model is not converging.*")
warnings.filterwarnings('ignore', category=UserWarning, message=".*Degenerate mixture covariance.*")

# Danh sách các lớp và trạng thái
class_names = [
    'A', 'Ba', 'Bài', 'Bảy', 'Bốn', 'Chậm', 'Chạy', 'Chín', 'Chủ', 'Có',
    'Cuối', 'Đầu', 'Đọc', 'Dừng', 'Đúng', 'E', 'Giảm', 'Giờ', 'Giúp', 'Hai',
    'Hủy', 'I', 'Kế', 'Không', 'Lại', 'Lặp', 'Lui', 'Lưu', 'Một', 'Mục',
    'Năm', 'Ngày', 'Ngừng', 'Nhanh', 'Nhỏ', 'O', 'Sai', 'Sáu', 'Tải', 'Tám',
    'Tăng', 'Thổ địa', 'Tiếp', 'To', 'Tới', 'Trước', 'Tuổi', 'U', 'Vào', 'Về',
    'Xóa', 'Xong'
]

states = [
    7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
    7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
    7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
    7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
    7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
    7, 7
]
n_mix = 2
k_fold = 5

# Đường dẫn dữ liệu
dataset_path = './datasets'
model_path = './models_train'

def load_data_kfold(dataset_path, class_names, k):
    all_data = {cname: [] for cname in class_names}
    for cname in class_names:
        file_paths = [os.path.join(dataset_path, cname, i) 
                      for i in os.listdir(os.path.join(dataset_path, cname)) 
                      if i.endswith('.wav')]
        all_data[cname] = [get_mfcc(file_path) for file_path in file_paths]
    
    X_folds, y_folds = [], []
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    
    for fold_idx in range(k):
        X_fold = {'train': {}, 'test': {}}
        y_fold = {'train': {}, 'test': {}}
        
        for cname in class_names:
            data = all_data[cname]
            splits = list(kf.split(data))
            train_idx, test_idx = splits[fold_idx]
            X_fold['train'][cname] = [data[i] for i in train_idx]
            X_fold['test'][cname] = [data[i] for i in test_idx]
            y_fold['train'][cname] = [class_names.index(cname)] * len(train_idx)
            y_fold['test'][cname] = [class_names.index(cname)] * len(test_idx)
        
        X_folds.append(X_fold)
        y_folds.append(y_fold)

    return X_folds, y_folds

def train(X, class_names, states, n_mix):
    models = {}
    for idx, cname in enumerate(class_names):
        start_prob = np.zeros(states[idx])
        start_prob[0] = 1.0
        dominantMatrix = np.full((states[idx], states[idx]), 0.5)
        np.fill_diagonal(dominantMatrix[:, 1:], 0.5)
        dominantMatrix[-1, -1] = 1.0
        
        model = hmm.GMMHMM(
            n_components=states[idx], n_mix=n_mix, n_iter=300,
            startprob_prior=start_prob, transmat_prior=dominantMatrix,
            random_state=42, params='stmc', init_params='mc')
        model.fit(np.vstack(X['train'][cname]), 
                  lengths=[x.shape[0] for x in X['train'][cname]])
        models[cname] = model
    return models

def evaluate(models, X, y, class_names):
    y_true, y_pred = [], []
    for cname in class_names:
        for mfcc, target in zip(X['test'][cname], y['test'][cname]):
            scores = [models[c].score(mfcc) for c in class_names]
            y_pred.append(np.argmax(scores))
            y_true.append(target)
    return y_true, y_pred

# Training and evaluation
X_folds, y_folds = load_data_kfold(dataset_path, class_names, k_fold)
all_y_true, all_y_pred = [], []
overall_acc_avg = 0.0

for i in tqdm(range(k_fold), desc='Training'):
    models = train(X_folds[i], class_names, states, n_mix)
    y_true, y_pred = evaluate(models, X_folds[i], y_folds[i], class_names)
    all_y_true.extend(y_true)
    all_y_pred.extend(y_pred)
    
    # Accuracy for current fold
    fold_acc = np.mean(np.array(y_true) == np.array(y_pred))
    print(f"Accuracy for fold {i+1}: {fold_acc:.4f}")
    
    overall_acc_avg += fold_acc

# Average accuracy across all folds
overall_acc_avg /= k_fold
print(f'Avg accuracy: {overall_acc_avg:.4f}')


Training:   0%|          | 0/5 [00:00<?, ?it/s]Model is not converging.  Current: -241425.64511193984 is not greater than -241425.54339050746. Delta is -0.10172143238014542
Model is not converging.  Current: -245211.6798056045 is not greater than -245211.67862767438. Delta is -0.0011779301275964826
Training:  20%|██        | 1/5 [02:45<11:03, 165.79s/it]

Accuracy for fold 1: 0.76


Model is not converging.  Current: -237403.8788806541 is not greater than -237403.8769346242. Delta is -0.0019460299226921052
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Dege

Accuracy for fold 2: 0.76


Model is not converging.  Current: -226775.92581152302 is not greater than -226775.91020958996. Delta is -0.015601933060679585
Model is not converging.  Current: -232800.00154407063 is not greater than -232797.65395983614. Delta is -2.3475842344923876
Model is not converging.  Current: -248220.98212488182 is not greater than -248220.94588875127. Delta is -0.036236130545148626
Model is not converging.  Current: -216467.4982501662 is not greater than -216467.02320602883. Delta is -0.47504413736169226
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixtu

Accuracy for fold 3: 0.69


Training:  80%|████████  | 4/5 [11:23<02:52, 172.69s/it]

Accuracy for fold 4: 0.86


Training: 100%|██████████| 5/5 [13:55<00:00, 167.09s/it]

Accuracy for fold 5: 0.57
Avg accuracy: 0.7288



