In [93]:
import torch 
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, TensorDataset
import pandas as pd
from tqdm import *
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
device = "cuda:0"

In [94]:
train_df = pd.read_csv('../data/train.csv')
valid_df = pd.read_csv('../data/valid.csv')
test_df = pd.read_csv('../data/test.csv')

train_data = torch.tensor(train_df.to_numpy())
valid_data = torch.tensor(valid_df.to_numpy())
test_data = torch.tensor(test_df.to_numpy())

'''获取标签'''
train_y = train_data[:,0]
valid_y = valid_data[:,0]
test_y = test_data[:,0]

'''获取特征'''
train_x = train_data[:, 1:]
valid_x = valid_data[:, 1:]
test_x = test_data[:, 1:]

In [95]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, TensorDataset
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tqdm import tqdm

class RF_MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_mlp):
        super(RF_MLP, self).__init__()
        self.num_mlp = num_mlp
        self.mlp_list = nn.ModuleList()
        for i in range(self.num_mlp):
            self.mlp_list.append(nn.Sequential(
                nn.Linear(input_dim, 2*hidden_dim),
                nn.ReLU(),
                nn.Dropout(0.5),
                nn.Linear(2*hidden_dim,2*hidden_dim),
                nn.ReLU(),
                nn.Linear(2*hidden_dim, hidden_dim),
                nn.ReLU(),
                nn.Dropout(0.5),
                nn.Linear(hidden_dim, 1),
                nn.Sigmoid()
            ))

    def forward(self, x):
        y_pred = torch.zeros((x.shape[0], 1)).to(x.device)
        for i in range(self.num_mlp):
            y_pred += self.mlp_list[i](x)
        y_pred /= self.num_mlp
        return y_pred
    
    def fit(self, train_loader, n_epochs, batch_size, lr):
        # 训练多个 MLP 模型，并将模型的参数随机初始化
        for i in range(self.num_mlp):
            mlp = self.mlp_list[i]
            mlp.apply(weights_init)
            mlp.to(device)

            # 定义优化器和损失函数
            optimizer = optim.Adam(mlp.parameters(), lr=lr)
            criterion = nn.BCELoss()

            # 训练每个 MLP 模型
            for epoch in tqdm(range(n_epochs)):
                for idx, data in enumerate(train_loader):
                    x, y = data
                    x = x.to(device)
                    y = y.to(device)

                    # 计算损失，反向传播，更新参数
                    optimizer.zero_grad()
                    y_pred = mlp(x.float()).double()
                    loss = criterion(y_pred, y.unsqueeze(1))
                    loss.backward()
                    optimizer.step()

def weights_init(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        nn.init.constant_(m.bias, 0.0)

# 设置超参数
input_dim = train_x.shape[1]  # 输入维度
hidden_dim = 256  # 隐藏层维度
num_mlp = 10  # 基模型数量
n_epochs = 40  # 训练轮数
batch_size = 128  # 批量大小
lr = 1e-4  # 学习率

# 创建 RF_MLP 模型对象
rf_mlp = RF_MLP(input_dim=input_dim, hidden_dim=hidden_dim, num_mlp=num_mlp)

# 将数据转换为 PyTorch 数据集对象
train_data = TensorDataset(train_x, train_y)
test_data = TensorDataset(test_x, test_y)
valid_data = TensorDataset(valid_x, valid_y)

# 将数据集转换为 PyTorch 数据加载器对象
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size, drop_last=False)
test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size, drop_last=False)
valid_loader = DataLoader(valid_data, shuffle=False, batch_size=batch_size, drop_last=False)

# 使用GPU训练，如果没有GPU设备就使用CPU训练
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# 训练 RF_MLP 模型
rf_mlp.fit(train_loader, n_epochs=n_epochs, batch_size=batch_size, lr=lr)


from sklearn.metrics import balanced_accuracy_score, recall_score, precision_score, f1_score, roc_auc_score

def eval(dataloader, model, device):
    model.eval()
    probs = torch.tensor([]).to(device)
    true_labels = torch.tensor([]).to(device)
    with torch.no_grad():
        for idx, data in tqdm(enumerate(dataloader)):
            x = data[0].to(device).float()
            true_labels = torch.cat([true_labels, data[1].to(device)], dim=0)
            prob = model(x).double().squeeze()
            probs = torch.cat([probs, prob], dim=0)

        # 对多标签问题进行二进制分类处理
        predicted_labels = (probs > 0.5).long()

        accuracy = balanced_accuracy_score(true_labels.cpu().numpy(), predicted_labels.cpu().numpy())
        recall = recall_score(true_labels.cpu().numpy(), predicted_labels.cpu().numpy(), average='macro')
        precision = precision_score(true_labels.cpu().numpy(), predicted_labels.cpu().numpy(), average='macro')
        f1 = f1_score(true_labels.cpu().numpy(), predicted_labels.cpu().numpy(), average='macro')
        auc = roc_auc_score(true_labels.cpu().numpy(), probs.squeeze().cpu().numpy(), average='macro')

        print("模型精度为：{:.4f}".format(accuracy))
        print("模型召回率为：{:.4f}".format(recall))
        print("模型精确率为：{:.4f}".format(precision))
        print("模型F1分数为：{:.4f}".format(f1))
        print("模型AUC值为：{:.4f}".format(auc))

    model.train()

    return predicted_labels

print("训练集合指标：\n")
a=eval(train_loader, rf_mlp, "cuda:0")
print("验证集合指标：\n")
b=eval(valid_loader, rf_mlp, "cuda:0")
print("测试集合指标：\n")
c=eval(test_loader, rf_mlp, "cuda:0")
print('\n')

100%|██████████| 40/40 [01:21<00:00,  2.03s/it]
100%|██████████| 40/40 [01:20<00:00,  2.02s/it]
100%|██████████| 40/40 [01:20<00:00,  2.02s/it]
100%|██████████| 40/40 [01:21<00:00,  2.04s/it]
100%|██████████| 40/40 [01:20<00:00,  2.01s/it]
100%|██████████| 40/40 [01:20<00:00,  2.02s/it]
100%|██████████| 40/40 [01:21<00:00,  2.03s/it]
100%|██████████| 40/40 [01:20<00:00,  2.02s/it]
100%|██████████| 40/40 [01:20<00:00,  2.00s/it]
100%|██████████| 40/40 [01:21<00:00,  2.04s/it]


训练集合指标：



442it [00:01, 435.79it/s]


模型精度为：0.7582
模型召回率为：0.7582
模型精确率为：0.7594
模型F1分数为：0.7579
模型AUC值为：0.8357
验证集合指标：



56it [00:00, 326.63it/s]


模型精度为：0.7498
模型召回率为：0.7498
模型精确率为：0.7509
模型F1分数为：0.7498
模型AUC值为：0.8279
测试集合指标：



56it [00:00, 387.36it/s]

模型精度为：0.7522
模型召回率为：0.7522
模型精确率为：0.7535
模型F1分数为：0.7518
模型AUC值为：0.8292





