## 模型性能测试

### LightGBM

In [7]:
import pandas as pd
from joblib import load
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score

# 加载测试数据集
# test_data = pd.read_csv('dif_distance_featrues/p4_vectors.csv')
# test_data = pd.read_csv('p4_test_vectors.csv')
test_data = pd.read_csv('wusiyuan_test_vectors.csv')

X_test = test_data.iloc[:, :-1]  # 排除最后一列(label)

# 使用joblib加载已训练的模型
model = load('models/lgb_later_model.pkl')

# 使用模型进行预测
y_pred = model.predict(X_test)

# 对y_true进行编码转换
labels, uniques = pd.factorize(test_data['label'])
y_true = labels

# 假设这是factorize之后与原标签的对应关系
labels_to_names = {0: 'lijunjie', 1: 'xuzhaoqi', 2: 'wusiyuan', 3: 'chence'}

# 将y_true的整数标签转换为字符串标签
y_true_named = np.vectorize(labels_to_names.get)(y_true)

# 现在y_true_named和y_pred都是字符串形式的标签，可以计算性能指标
precision = precision_score(y_true_named, y_pred, labels=list(labels_to_names.values()), average='weighted')
recall = recall_score(y_true_named, y_pred, labels=list(labels_to_names.values()), average='weighted')
f1 = f1_score(y_true_named, y_pred, labels=list(labels_to_names.values()), average='weighted')

print(f"精确度: {precision:.4f}")
print(f"召回率: {recall:.4f}")
print(f"F1 分数: {f1:.4f}")

# 输出预测错误的实例
# 创建一个错误记录的DataFrame
errors = pd.DataFrame({
    'True Label': y_true_named,
    'Predicted Label': y_pred,
    'Correct': y_true_named == y_pred
})

# 筛选出预测错误的实例
incorrect_predictions = errors[errors['Correct'] == False]

# 打印错误的实例
print("\n预测错误的实例:")
print(incorrect_predictions)


精确度: 0.0703
召回率: 0.2598
F1 分数: 0.1106

预测错误的实例:
    True Label Predicted Label  Correct
0     lijunjie        xuzhaoqi    False
1     lijunjie        xuzhaoqi    False
2     lijunjie          chence    False
3     lijunjie          chence    False
4     lijunjie          chence    False
..         ...             ...      ...
122     chence        xuzhaoqi    False
123     chence        xuzhaoqi    False
124     chence        xuzhaoqi    False
125     chence        xuzhaoqi    False
126     chence        xuzhaoqi    False

[94 rows x 3 columns]


  _warn_prf(average, modifier, msg_start, len(result))


### SVC

In [8]:
import pandas as pd
from joblib import load
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score

# 加载测试数据集
# test_data = pd.read_csv('dif_distance_featrues/p4_vectors.csv')
# test_data = pd.read_csv('p4_test_vectors.csv')
test_data = pd.read_csv('wusiyuan_test_vectors.csv')

X_test = test_data.iloc[:, :-1]  # 排除最后一列(label)

# 使用joblib加载已训练的模型
model = load('models/svm_model.pkl')

# 使用模型进行预测
y_pred = model.predict(X_test)

# 对y_true进行编码转换
labels, uniques = pd.factorize(test_data['label'])
y_true = labels

# 假设这是factorize之后与原标签的对应关系
labels_to_names = {0: 'lijunjie', 1: 'xuzhaoqi', 2: 'wusiyuan', 3: 'chence'}

# 将y_true的整数标签转换为字符串标签
y_true_named = np.vectorize(labels_to_names.get)(y_true)

# 现在y_true_named和y_pred都是字符串形式的标签，可以计算性能指标
precision = precision_score(y_true_named, y_pred, labels=list(labels_to_names.values()), average='weighted')
recall = recall_score(y_true_named, y_pred, labels=list(labels_to_names.values()), average='weighted')
f1 = f1_score(y_true_named, y_pred, labels=list(labels_to_names.values()), average='weighted')

print(f"精确度: {precision:.4f}")
print(f"召回率: {recall:.4f}")
print(f"F1 分数: {f1:.4f}")

# 输出预测错误的实例
# 创建一个错误记录的DataFrame
errors = pd.DataFrame({
    'True Label': y_true_named,
    'Predicted Label': y_pred,
    'Correct': y_true_named == y_pred
})

# 筛选出预测错误的实例
incorrect_predictions = errors[errors['Correct'] == False]

# 打印错误的实例
print("\n预测错误的实例:")
print(incorrect_predictions)


精确度: 0.0703
召回率: 0.2598
F1 分数: 0.1106

预测错误的实例:
    True Label Predicted Label  Correct
0     lijunjie        xuzhaoqi    False
1     lijunjie        xuzhaoqi    False
2     lijunjie          chence    False
3     lijunjie          chence    False
4     lijunjie          chence    False
..         ...             ...      ...
122     chence        xuzhaoqi    False
123     chence        xuzhaoqi    False
124     chence        xuzhaoqi    False
125     chence        xuzhaoqi    False
126     chence        xuzhaoqi    False

[94 rows x 3 columns]


  _warn_prf(average, modifier, msg_start, len(result))
