In [5]:
import pandas as pd
import ast
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# 读取CSV文件
df = pd.read_csv('features_and_labels.csv')

# 将特征向量中的字符串列表转换为实际列表
df['feature'] = df['feature'].apply(ast.literal_eval)

# 展平特征向量中的列表
df['feature'] = df['feature'].apply(lambda x: np.array(x).flatten())

# 提取特征向量和标签
X = np.stack(df['feature'].values)  # 转换为NumPy数组
y = df['label'].tolist()

# 拆分数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 标准化特征向量
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 创建SVM模型
svm_classifier = SVC(kernel='linear')
svm_classifier.fit(X_train_scaled, y_train)

# 预测并评估模型性能
y_pred = svm_classifier.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print(f"模型准确率：{accuracy * 100:.2f}%")

ValueError: all input arrays must have the same shape

In [6]:
import pandas as pd
import ast
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# 读取CSV文件
df = pd.read_csv('features_and_labels.csv')

# 将特征向量中的字符串列表转换为实际列表
df['feature'] = df['feature'].apply(ast.literal_eval)

# 确保特征向量中的所有元素都具有相同的形状
max_length = max(len(x) for x in df['feature'])
df['feature'] = df['feature'].apply(lambda x: np.pad(x, (0, max_length - len(x))))

# 提取特征向量和标签
X = np.stack(df['feature'].values)  # 转换为NumPy数组
y = df['label'].tolist()

# 拆分数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 标准化特征向量
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 创建SVM模型
svm_classifier = SVC(kernel='linear')
svm_classifier.fit(X_train_scaled, y_train)

# 预测并评估模型性能
y_pred = svm_classifier.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print(f"模型准确率：{accuracy * 100:.2f}%")

模型准确率：100.00%


In [14]:
from sklearn.naive_bayes import GaussianNB

# 创建朴素贝叶斯模型
nb_classifier = GaussianNB()
nb_classifier.fit(X_train_scaled, y_train)

# 预测并评估模型性能
y_pred_nb = nb_classifier.predict(X_test_scaled)
accuracy_nb = accuracy_score(y_test, y_pred_nb)
print(f"朴素贝叶斯模型准确率：{accuracy_nb * 100:.2f}%")

朴素贝叶斯模型准确率：91.67%


In [18]:
import joblib

# 保存模型到文件
model_filename = 'nb_model.pkl'
joblib.dump(nb_classifier, model_filename)
# 保存特征向量化器
scaler_filename = 'nb_scaler.pkl'
joblib.dump(scaler, scaler_filename)

print(f"模型已保存到文件：{model_filename}")

模型已保存到文件：nb_model.pkl


In [19]:
import os
import pandas as pd
import librosa

wav_filepath = 'std.wav'

x, sr = librosa.load(wav_filepath)
mfccs = librosa.feature.mfcc(y=x, sr=sr, n_mfcc=13)  # 选择13个MFCC系数
energy = sum(x**2) / len(x)  # 计算能量
feature_vector = mfccs.flatten().tolist() + [energy]

# print(feature_vector)

In [20]:
import joblib

# 加载模型
loaded_model = joblib.load('nb_model.pkl')
scaler = joblib.load('nb_scaler.pkl')

# 准备测试数据（使用你计算得到的 feature_vector）
test_data = scaler.transform([feature_vector])

# 进行预测
predicted_class = loaded_model.predict(test_data)

print(f"预测结果：{predicted_class}")

ValueError: X has 3875 features, but StandardScaler is expecting 4044 features as input.

In [25]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# 读取CSV文件
df = pd.read_csv('energy.csv')

# 提取特征向量和标签
X = df['feature'].apply(lambda x: float(x.strip('[]'))).values.reshape(-1, 1)  # 转换为二维数组
y = df['label'].tolist()

# 将标签列表转换为一维数组
unique_labels = sorted(set(y))
label_to_index = {label: i for i, label in enumerate(unique_labels)}
y_train = [label_to_index[label] for label in y]

# 拆分数据集
X_train, X_test, y_train, y_test = train_test_split(X, y_train, test_size=0.2, random_state=42)

# 创建朴素贝叶斯模型
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)

# 预测并评估模型性能
y_pred = nb_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"朴素贝叶斯模型准确率：{accuracy * 100:.2f}%")

朴素贝叶斯模型准确率：100.00%


In [26]:
# 假设你有一个新的能量值
new_energy = 1.5e-06

# 将新的能量值转换为二维数组
new_X = np.array([[new_energy]])

# 使用已训练的模型进行预测
predicted_label_index = nb_classifier.predict(new_X)[0]

# 将预测的标签索引转换回原始标签
predicted_label = unique_labels[predicted_label_index]

print(f"预测结果：{predicted_label}")

预测结果：lijunjie


In [31]:
import librosa

# 加载音频文件
audio_path = 'data/xuzhaoqi/1.wav'
x, sr = librosa.load(audio_path)

# 计算音频信号的能量
energy = sum(x**2) / len(x)

print(f"音频能量：{energy}")

# 将新的能量值转换为二维数组
new_X = np.array([[energy]])

# 使用已训练的模型进行预测
predicted_label_index = nb_classifier.predict(new_X)[0]

# 将预测的标签索引转换回原始标签
predicted_label = unique_labels[predicted_label_index]

print(f"预测结果：{predicted_label}")

音频能量：5.657377177998379e-05
预测结果：xuzhaoqi
