In [8]:
# Tensors
import torch
import numpy as np

# 1. 初始化张量
data = [[1,2],[3,4]]
x_data = torch.tensor(data)

np_arr = np.array(data)
x_np = torch.from_numpy(np_arr)

# 1.1 随机初始化
shape = (2,3,)
rand_tensor = torch.rand(shape)
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)

# 1.2. 使用gpu
if torch.cuda.is_available():
    x_data = x_data.to("cuda")



In [43]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split

# 加载CSV文件
df = pd.read_csv('./data/cyber.csv')

# 对分类特征进行编码
categorical_features = [
'class'
]
le = LabelEncoder()
# 对每个分类特征进行编码
for feature in categorical_features:
    df[feature] = le.fit_transform(df[feature])

# 转换DataFrame为numpy数组，因为模型通常需要numpy数组或Tensor作为输入
data = df.values

# 分离特征和标签
X = data[:, :-1]  # 所有行，除了最后一列
y = data[:, -1]   # 所有行，只有最后一列


X_tensor = torch.tensor(X, dtype=torch.float)
y_tensor = torch.tensor(y, dtype=torch.long)

# 分数据集为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 现在X_train和y_train用于训练模型，X_test和y_test用于测试模型
# 使用PyTorch DataLoader，将numpy数组转换为PyTorch的Tensor
X_train_tensor = torch.tensor(X_train, dtype=torch.float)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)


In [42]:
# 归一化
from sklearn.preprocessing import MinMaxScaler

# 初始化MinMaxScaler
scaler = MinMaxScaler()
# 对训练数据进行拟合和转换
X_train_scaled = scaler.fit_transform(X_train)
# 使用相同的scaler转换测试数据
X_test_scaled = scaler.transform(X_test)

# 现在X_train_scaled和X_test_scaled是归一化后的数据，可以用于神经网络的训练

In [40]:
# 使用X_tensor和y_tensor来训练PyTorch模型了
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score

# X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor 已经准备好了
# X_train_tensor: 训练特征 (形状: [num_train_samples, num_features])
# y_train_tensor: 训练标签 (形状: [num_train_samples])
# X_test_tensor: 测试特征 (形状: [num_test_samples, num_features])
# y_test_tensor: 测试标签 (形状: [num_test_samples])

# 使用归一化后的训练和验证数据
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float)

# 创建神经网络模型
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

# 设置超参数
input_size = X_train_tensor.shape[1]
hidden_size = 64
num_classes = 5  # 分类问题
learning_rate = 0.001
epochs = 10
batch_size = 32

# 实例化模型
model = SimpleNN(input_size, hidden_size, num_classes)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 创建数据加载器
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# 训练模型
for epoch in range(epochs):
    model.train()  # 设置模型为训练模式
    train_loss = 0.0
    
    for inputs, labels in train_loader:
        optimizer.zero_grad()  # 清除之前的梯度
        outputs = model(inputs)  # 前向传播
        loss = criterion(outputs, labels)  # 计算损失
        loss.backward()  # 反向传播
        optimizer.step()  # 更新权重
        train_loss += loss.item() * inputs.size(0)  # 累计损失
    
    train_loss = train_loss / len(train_loader.dataset)  # 计算平均损失
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {train_loss:.4f}')

# 评估模型
model.eval()  # 设置模型为评估模式
y_pred = []
y_true = []

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)  # 获取预测结果
        y_pred.extend(predicted.tolist())
        y_true.extend(labels.tolist())

test_accuracy = accuracy_score(y_true, y_pred)
print(f'Test Accuracy: {test_accuracy:.4f}')

Epoch [1/10], Loss: 0.5894
Epoch [2/10], Loss: 0.2897
Epoch [3/10], Loss: 0.2385
Epoch [4/10], Loss: 0.2074
Epoch [5/10], Loss: 0.1845
Epoch [6/10], Loss: 0.1674
Epoch [7/10], Loss: 0.1546
Epoch [8/10], Loss: 0.1446
Epoch [9/10], Loss: 0.1365
Epoch [10/10], Loss: 0.1295
Test Accuracy: 0.9597
