In [None]:
# 导入必要的库
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import (
    classification_report,
    accuracy_score,
    recall_score,
    precision_score,
    confusion_matrix
)
import matplotlib.pyplot as plt
import seaborn as sns

# 忽略警告信息
import warnings
warnings.filterwarnings("ignore")



In [None]:
# 设置图形显示的中文字体和格式
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

# 加载数据
def load_arff_data(file_path):
    """读取ARFF文件并转换为DataFrame格式"""
    from scipy.io import arff
    data, _ = arff.loadarff(file_path)
    df = pd.DataFrame(data)
    for col in df.select_dtypes(include=['object']).columns:
        df[col] = df[col].str.decode('utf-8')
    return df

# 数据读取
train_data = load_arff_data('path_to_train_file.arff')  # 替换为实际路径
test_data = load_arff_data('path_to_test_file.arff')

# 数据拆分为特征和标签
X_train = train_data.iloc[:, :-1]
y_train = train_data.iloc[:, -1]
X_test = test_data.iloc[:, :-1]
y_test = test_data.iloc[:, -1]



In [None]:
# SVM分类器
def svm_classifier(X_train, y_train):
    """训练SVM模型，使用线性核函数"""
    model = SVC(kernel='linear')
    model.fit(X_train, y_train)
    return model

# 模型评估函数
def evaluate_model(model, X_test, y_test, model_name="SVM"):
    """评估分类模型的性能并可视化混淆矩阵"""
    y_pred = model.predict(X_test)
    
    # 计算评估指标
    accuracy = accuracy_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred, average='weighted')
    precision = precision_score(y_test, y_pred, average='weighted')
    report = classification_report(y_test, y_pred)

    # 打印评估结果
    print(f"{model_name} 分类准确率: {accuracy:.4f}")
    print(f"{model_name} 召回率: {recall:.4f}")
    print(f"{model_name} 精确率: {precision:.4f}")
    print(f"{model_name} 分类报告:\n{report}")

    # 混淆矩阵可视化
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=np.unique(y_test), yticklabels=np.unique(y_test))
    plt.title(f"{model_name} 混淆矩阵")
    plt.xlabel("预测标签")
    plt.ylabel("真实标签")
    plt.show()

# 训练和评估SVM模型
svm_model = svm_classifier(X_train, y_train)
evaluate_model(svm_model, X_test, y_test, "SVM")
