In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
import torch.optim as optim
import time

## 1. 加载数据并打好标签

In [3]:
url = "http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']
data = pd.read_csv(url, header=None, names=column_names)

# 查看数据集
data

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [5]:
# 将类别标签转换为数值
data['class'] = data['class'].map({'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2})

# 分离特征和目标值
X = data.iloc[:, :-1].values  # 特征
y = data.iloc[:, -1].values   # 目标值

# 将数据集分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 标准化数据
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## 2. 定义和训练模型

### 2.1 SVM模型

In [6]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import time

# 定义SVM模型
svm_model = SVC(kernel='linear')

# 训练模型
start_time = time.time()
svm_model.fit(X_train, y_train)
svm_train_time = time.time() - start_time

# 预测
svm_pred = svm_model.predict(X_test)

# 计算准确率
svm_accuracy = accuracy_score(y_test, svm_pred)

### 2.2 随机森林模型

In [7]:
from sklearn.ensemble import RandomForestClassifier

# 定义随机森林模型
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# 训练模型
start_time = time.time()
rf_model.fit(X_train, y_train)
rf_train_time = time.time() - start_time

# 预测
rf_pred = rf_model.predict(X_test)

# 计算准确率
rf_accuracy = accuracy_score(y_test, rf_pred)

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim

# 将数据转换为PyTorch张量
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# 定义神经网络模型
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(4, 10)  # 输入层到隐藏层
        self.fc2 = nn.Linear(10, 10) # 隐藏层到隐藏层
        self.fc3 = nn.Linear(10, 3)  # 隐藏层到输出层

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# 初始化模型、损失函数和优化器
model = NeuralNetwork()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# 训练模型
start_time = time.time()
epochs = 100
for epoch in range(epochs):
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
nn_train_time = time.time() - start_time

# 预测
model.eval()
with torch.no_grad():
    outputs = model(X_test_tensor)
    _, nn_pred = torch.max(outputs, 1)
    nn_accuracy = accuracy_score(y_test_tensor.numpy(), nn_pred.numpy())

## 3. 结果比较

In [9]:
# 输出结果
print(f"SVM Accuracy: {svm_accuracy:.4f}, Training Time: {svm_train_time:.4f} seconds")
print(f"Random Forest Accuracy: {rf_accuracy:.4f}, Training Time: {rf_train_time:.4f} seconds")
print(f"Neural Network Accuracy: {nn_accuracy:.4f}, Training Time: {nn_train_time:.4f} seconds")

SVM Accuracy: 0.9778, Training Time: 0.0369 seconds
Random Forest Accuracy: 1.0000, Training Time: 0.0718 seconds
Neural Network Accuracy: 1.0000, Training Time: 0.4557 seconds


## 4. 结论

由于该分类任务较简单，三种模型在测试集上都有很高的准确率(97%+)

具体而言：

准确率：随机森林＝神经网络>SVM

训练时间: 神经网络>随机森林>SVM

