In [10]:
import numpy as np
import optuna
from sklearn import metrics
import warnings
import pickle
warnings.simplefilter(action='ignore', category=FutureWarning)
import joblib
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import accuracy_score, recall_score, roc_auc_score, r2_score, roc_curve
import matplotlib.pyplot as plt

from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import classification_report

In [11]:
# Load the data
data = np.load('DataSet/Classified_Data.npz')

Features = data['features']
Labels = data['labels']

print(Features.shape, Labels.shape)

(16871, 9) (16871,)


In [12]:
scaler = StandardScaler()
Features_scaled = scaler.fit_transform(Features)

# 分割数据集
X_train, X_test, y_train, y_test = train_test_split(Features_scaled, Labels, test_size=0.2, random_state=42)

In [13]:
# 设置SVM参数网格
param_grid = {
    'C': [0.1, 1, 10, 100],  # 正则化强度的逆，必须是正数。
    'gamma': ['scale', 'auto', 0.001, 0.0001],  # 核函数的系数
    'kernel': ['linear', 'rbf', 'poly'],  # 指定算法中使用的核类型
    'class_weight': ['balanced', None]  # 设置模式识别中各类别权重
}

# 创建SVM模型
svm = SVC(probability=True)


In [14]:
svm_model = SVC(C=0.1, class_weight='balanced', gamma='auto', kernel='poly', probability=True)

# 训练SVM模型
svm_model.fit(X_train, y_train)

# 模型评估
y_pred = svm_model.predict(X_test)
print(classification_report(y_test, y_pred))
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")

              precision    recall  f1-score   support

           0       0.98      0.67      0.80      1872
           1       0.71      0.98      0.82      1503

    accuracy                           0.81      3375
   macro avg       0.84      0.83      0.81      3375
weighted avg       0.86      0.81      0.81      3375

Accuracy: 0.8106666666666666


In [9]:
# 使用GridSearchCV寻找最优参数
grid_search = GridSearchCV(svm, param_grid, cv=1, scoring='accuracy', verbose=2)
grid_search.fit(X_train, y_train)

# 查看最优参数和得分
print(f"Best parameters: {grid_search.best_params_}")
print(f"Best cross-validation score: {grid_search.best_score_}")

InvalidParameterError: The 'cv' parameter of GridSearchCV must be an int in the range [2, inf), an object implementing 'split' and 'get_n_splits', an iterable or None. Got 1 instead.

In [None]:
# 训练SVM模型
best_svm = grid_search.best_estimator_
best_svm.fit(X_train, y_train)

# 模型评估
y_pred = best_svm.predict(X_test)
print(classification_report(y_test, y_pred))
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")