In [None]:
import os
import csv
import librosa
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

In [None]:
# 标记音频文件并将路径和标签存储在CSV文件中
folders = ['cc', 'lijunjie', 'wusiyuan', 'xuzhaoqi']  # 文件夹名称，按首字母排的
labels = [1, 2, 3, 4]  # 对应的标签

In [None]:
# 创建CSV文件以存储音频文件路径和标签
with open('dataset.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['audio_path', 'label'])
    
    # 遍历每个文件夹并标记音频文件
    for folder, label in zip(folders, labels):
        for filename in os.listdir(folder):
            if filename.endswith('.wav'):
                audio_path = os.path.join(folder, filename)
                writer.writerow([audio_path, label])

In [None]:
# 从音频文件中提取MFCC特征
def extract_features(audio_path):
    y, sr = librosa.load(audio_path)
    mfccs = librosa.feature.mfcc(y=y, sr=sr)
    return mfccs.flatten()  # 将MFCC特征展平

In [None]:
# 读取数据集CSV
df = pd.read_csv('dataset.csv')
df['mfcc'] = df['audio_path'].apply(extract_features)  # 应用特征提取

# 准备分类器的数据
# 创建一个新的DataFrame，包含展平的特征
features_df = pd.DataFrame(df['mfcc'].tolist())
features_df['label'] = df['label']

In [None]:
# 分割数据集为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(features_df.drop('label', axis=1), features_df['label'], test_size=0.2, random_state=42)

In [None]:
# 初始化朴素贝叶斯分类器
gnb = GaussianNB()
# 训练模型
gnb.fit(X_train, y_train)

In [None]:
# 进行预测
y_pred = gnb.predict(X_test)
# 计算准确率
accuracy = accuracy_score(y_test, y_pred)
# 打印准确率
print(f"朴素贝叶斯分类器的准确率为: {accuracy}")