In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import numpy as np
import joblib

# 读取数据
train_path = "../data/kmeans_new_train.csv"
test_path = "../data/kmeans_new_test.csv"
df_train = pd.read_csv(train_path)
df_test = pd.read_csv(test_path)
# 设置模型保存路径
MODEL_PATH = "./nfs_classfication_model.pt"
SCALER_PATH = "./scaler.pkl"

feature_cols = [col for col in df_train.columns if col != "label"]
X_train = df_train[feature_cols].values
y_train = df_train["label"].values
X_test = df_test[feature_cols].values
y_test = df_test["label"].values

# 标准化
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 转换为 Tensor
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("使用设备:", device)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device)

# 创建 dataloader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# 网络模型
class NFSNet(nn.Module):
    def __init__(self, input_dim):
        super(NFSNet, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(64, 32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Linear(32, 21)
        )

    def forward(self, x):
        return self.model(x)



model = NFSNet(input_dim=len(feature_cols)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 保存模型函数
def save_model(model, path):
    torch.save(model.state_dict(), path)
    print(f"模型已保存至: {path}")

# 训练函数
def train_model():
    losses = []
    for epoch in range(1, 16):
        model.train()
        running_loss = 0.0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        avg_loss = running_loss / len(train_loader)
        losses.append(avg_loss)
        print(f"Epoch {epoch}/5, Loss: {avg_loss:.4f}")
    save_model(model, MODEL_PATH)
    joblib.dump(scaler, SCALER_PATH)
    print(f"标准化器已保存至: {SCALER_PATH}")
    return losses

# 模型评估
def evaluate_model():
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(y_batch.cpu().numpy())
    acc = accuracy_score(all_labels, all_preds)
    print(f"测试集准确率: {acc:.4f}")



In [None]:
# 运行
losses = train_model()

In [None]:
evaluate_model()

In [None]:
# 绘制 loss 曲线
plt.plot(losses)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training Loss")
plt.grid(True)
plt.show()