In [3]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, classification_report
import joblib
import warnings
warnings.filterwarnings("ignore")

# 定义神经网络模型
class BinaryClassifier(nn.Module):
    def __init__(self, input_size, hidden_size=64):
        super(BinaryClassifier, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden_size // 2, 2)  # 2个输出类别
        )
        
    def forward(self, x):
        return self.model(x)

# 读取数据
dataTrain = pd.read_csv("allAtt_onehot_large_train_new8.csv")
dataTest = pd.read_csv("allAtt_onehot_large_test_new8.csv")

# 准备数据
x_train, y_train = dataTrain.iloc[:,4:38].values, dataTrain.iloc[:,38:].values
x_test, y_test = dataTest.iloc[:,4:38].values, dataTest.iloc[:,38:].values

# 如果 y 是 one-hot 编码，则转为整数标签
y_train_int = np.argmax(y_train, axis=1)
y_test_int = np.argmax(y_test, axis=1)

# 转换为PyTorch张量
X_train_tensor = torch.FloatTensor(x_train)
y_train_tensor = torch.LongTensor(y_train_int)
X_test_tensor = torch.FloatTensor(x_test)

# 创建数据加载器
batch_size = 32
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

# 初始化模型
input_size = x_train.shape[1]  # 特征数量
model = BinaryClassifier(input_size)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
num_epochs = 50
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        # 清零梯度
        optimizer.zero_grad()
        
        # 前向传播
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # 反向传播和优化
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    # 打印每个epoch的损失
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

# 评估模型
model.eval()
with torch.no_grad():
    X_test_tensor = X_test_tensor.to(device)
    outputs = model(X_test_tensor)
    _, predicted = torch.max(outputs.data, 1)
    predicted = predicted.cpu().numpy()

# 计算准确率
acc = accuracy_score(y_test_int, predicted)
print(f"✅ Accuracy: {acc:.4f}")
print("📊 Classification Report:")
print(classification_report(y_test_int, predicted))

# 保存模型
torch.save(model.state_dict(), 'models/catboost_pytorch.pth')

# 加载模型示例
def load_model(model_path, input_size):
    model = BinaryClassifier(input_size)
    model.load_state_dict(torch.load(model_path))
    model.eval()
    return model

# 使用加载的模型进行预测示例
# loaded_model = load_model('models/pytorch_model.pth', input_size)

Epoch [10/50], Loss: 0.5953
Epoch [20/50], Loss: 0.5757
Epoch [30/50], Loss: 0.5562
Epoch [40/50], Loss: 0.5407
Epoch [50/50], Loss: 0.5302
✅ Accuracy: 0.6921
📊 Classification Report:
              precision    recall  f1-score   support

           0       0.67      0.65      0.66       175
           1       0.71      0.73      0.72       205

    accuracy                           0.69       380
   macro avg       0.69      0.69      0.69       380
weighted avg       0.69      0.69      0.69       380

