# sklearn

In [1]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# 获取鸢尾花的数据
iris = datasets.load_iris()

iris_X_train, iris_X_test, iris_y_train, iris_y_test = train_test_split(iris.data, iris.target, test_size=0.2)

# 实现支持向量机，并训练模型参数
svm = SVC()
svm.fit(iris_X_train, iris_y_train)

# 预测测试数据
iris_y_pred = svm.predict(iris_X_test)

# 打印输出：实际分类、预测分类、准确率（保留两位有效位）
accuracy = accuracy_score(iris_y_test, iris_y_pred)
print("实际分类：", iris_y_test)
print("预测分类：", iris_y_pred)
print("准确率：{:.2f}".format(accuracy))

实际分类： [1 0 2 0 2 0 1 0 0 1 2 2 0 1 2 0 2 1 0 1 2 1 2 0 0 2 2 0 2 1]
预测分类： [1 0 2 0 1 0 1 0 0 1 2 2 0 1 2 0 2 1 0 1 2 1 2 0 0 2 2 0 1 1]
准确率：0.93


In [8]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

# 获取鸢尾花的数据
iris = datasets.load_iris()
# 切割80%训练和20%的测试数据
iris_X_train, iris_X_test, iris_y_train, iris_y_test = train_test_split(
    iris.data, iris.target, test_size=0.2, random_state=42)

# 实现支持向量机，并训练模型参数
svm = SVC(kernel='rbf')

# 定义超参数搜索范围
param_grid = {'C': [0.1, 1, 10, 100],
              'gamma': [0.1, 1, 10]}

# 使用网格搜索来调优超参数
grid_search = GridSearchCV(svm, param_grid, cv=5)
grid_search.fit(iris_X_train, iris_y_train)

# 输出最佳参数组合
print("最佳参数组合：", grid_search.best_params_)

# 预测测试数据
iris_y_pred = grid_search.predict(iris_X_test)

# 打印输出：实际分类、预测分类、准确率（保留两位有效位）
accuracy = accuracy_score(iris_y_test, iris_y_pred)
print("实际分类：", iris_y_test)
print("预测分类：", iris_y_pred)
print("准确率：{:.2f}".format(accuracy))

最佳参数组合： {'C': 1, 'gamma': 1}
实际分类： [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]
预测分类： [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]
准确率：1.00


# numpy

In [10]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# 获取鸢尾花的数据
iris = datasets.load_iris()
X, y = iris.data, iris.target

# 切割80%训练和20%的测试数据
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 实现支持向量机，并训练模型参数
class SVM:
    def __init__(self, learning_rate=0.01, num_iterations=1000, C=1.0):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.C = C

    def fit(self, X, y):
        m, n = X.shape
        y = np.where(y == 0, -1, 1)  # 将类别0转换为-1
        self.w = np.zeros(n) # 初始化
        self.b = 0

        for _ in range(self.num_iterations):
            for i in range(m):
                condition = y[i] * (np.dot(X[i], self.w) - self.b) >= 1
                self.w -= self.learning_rate * (2 * self.C * self.w * condition - np.dot(X[i], y[i]))
                self.b -= self.learning_rate * (-y[i] * condition)

    def predict(self, X):
        return np.sign(np.dot(X, self.w) - self.b)

# 调参
def grid_search(X_train, y_train, X_test, y_test, learning_rates, C_values):
    best_accuracy = 0
    best_lr = None
    best_C = None

    for lr in learning_rates:
        for C in C_values:
            svm = SVM(learning_rate=lr, C=C)
            svm.fit(X_train, y_train)
            y_pred = svm.predict(X_test)
            accuracy = accuracy_score(y_test, y_pred)
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_lr = lr
                best_C = C

    return best_lr, best_C, best_accuracy

# 定义学习率和惩罚参数的搜索范围
learning_rates = [0.001, 0.01, 0.1]
C_values = [0.1, 1, 10]

# 执行网格搜索
best_lr, best_C, best_accuracy = grid_search(X_train, y_train, X_test, y_test, learning_rates, C_values)

# 输出最佳结果
print("最佳学习率：", best_lr)
print("最佳惩罚参数：", best_C)
print("最佳准确率：{:.2f}".format(best_accuracy))

最佳学习率： 0.001
最佳惩罚参数： 0.1
最佳准确率：0.30


In [14]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split

# 获取鸢尾花的数据
iris = datasets.load_iris()
# 切割80%训练和20%的测试数据
iris_X_train, iris_X_test, iris_y_train, iris_y_test = train_test_split(iris.data, iris.target, test_size=0.2)

# 实现支持向量机，并训练模型参数
class SVM:
    def __init__(self, learning_rate=0.01, num_iterations=1000):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations

    def fit(self, X, y):
        num_samples, num_features = X.shape
        num_classes = len(np.unique(y))

        # 初始化参数
        self.W = np.zeros((num_features, num_classes))
        self.b = np.zeros((1, num_classes))

        # One-vs-Rest训练
        for i in range(num_classes):
            y_binary = np.where(y == i, 1, -1)
            weights = np.zeros(num_features)
            bias = 0

            # 训练
            for _ in range(self.num_iterations):
                for j in range(num_samples):
                    if y_binary[j] * (np.dot(X[j], weights) + bias) >= 1:
                        weights -= self.learning_rate * (2 * 0 * weights)
                        bias -= self.learning_rate * (2 * 0)
                    else:
                        weights -= self.learning_rate * (2 * 0 * weights - y_binary[j] * X[j])
                        bias -= self.learning_rate * (2 * 0 - y_binary[j])

            # 保存参数
            self.W[:, i] = weights
            self.b[:, i] = bias

    def predict(self, X):
        scores = np.dot(X, self.W) + self.b
        predicted_class = np.argmax(scores, axis=1)
        return predicted_class

# 创建并训练SVM模型
svm_model = SVM()
svm_model.fit(iris_X_train, iris_y_train)

# 预测测试数据
predictions = svm_model.predict(iris_X_test)

# 计算准确率
accuracy = np.mean(predictions == iris_y_test)

# 打印输出：实际分类、预测分类、准确率（保留两位有效位）
print("实际分类：", iris_y_test)
print("预测分类：", predictions)
print("准确率：", format(accuracy, ".2f"))

实际分类： [1 0 1 2 0 2 1 1 2 2 2 2 2 0 1 2 2 1 2 1 2 0 2 0 1 1 0 2 2 0]
预测分类： [1 0 1 2 0 2 1 1 2 2 2 2 2 0 1 2 2 1 2 0 2 0 2 0 1 1 0 2 2 0]
准确率： 0.97


# 探求最佳参数

In [13]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

# 获取鸢尾花的数据
iris = datasets.load_iris()
# 切割80%训练和20%的测试数据
iris_X_train, iris_X_test, iris_y_train, iris_y_test = train_test_split(
    iris.data, iris.target, test_size=0.2, random_state=42)

# 实现支持向量机，并训练模型参数
svm = SVC(kernel='rbf')

# 定义超参数搜索范围
param_grid = {'C': [0.1, 1, 10, 100],
              'gamma': [0.1, 1, 10]}

# 使用网格搜索来调优超参数
grid_search = GridSearchCV(svm, param_grid, cv=5)
grid_search.fit(iris_X_train, iris_y_train)

# 输出所有学习率和惩罚系数组合的准确率
results = grid_search.cv_results_
for i in range(len(results['params'])):
    params = results['params'][i]
    accuracy = results['mean_test_score'][i]
    print("学习率: {}, 惩罚系数: {}, 准确率: {:.2f}".format(params['gamma'], params['C'], accuracy))

# 输出最佳参数组合
print("最佳参数组合：", grid_search.best_params_)

# 获取最佳模型
best_model = grid_search.best_estimator_

# 使用最佳模型进行预测
best_model_predictions = best_model.predict(iris_X_test)

# 打印输出：实际分类、预测分类、准确率（保留两位有效位）
accuracy = accuracy_score(iris_y_test, best_model_predictions)
print("实际分类：", iris_y_test)
print("预测分类：", best_model_predictions)
print("准确率：{:.2f}".format(accuracy))

学习率: 0.1, 惩罚系数: 0.1, 准确率: 0.90
学习率: 1, 惩罚系数: 0.1, 准确率: 0.94
学习率: 10, 惩罚系数: 0.1, 准确率: 0.47
学习率: 0.1, 惩罚系数: 1, 准确率: 0.95
学习率: 1, 惩罚系数: 1, 准确率: 0.96
学习率: 10, 惩罚系数: 1, 准确率: 0.94
学习率: 0.1, 惩罚系数: 10, 准确率: 0.95
学习率: 1, 惩罚系数: 10, 准确率: 0.94
学习率: 10, 惩罚系数: 10, 准确率: 0.93
学习率: 0.1, 惩罚系数: 100, 准确率: 0.95
学习率: 1, 惩罚系数: 100, 准确率: 0.93
学习率: 10, 惩罚系数: 100, 准确率: 0.93
最佳参数组合： {'C': 1, 'gamma': 1}
实际分类： [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]
预测分类： [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]
准确率：1.00
