In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from sklearn.model_selection import cross_validate
from sklearn.metrics import roc_auc_score, roc_curve
import numpy as np
import pandas as pd
import plotly.graph_objects as go

In [2]:
importance = pd.read_csv('Data/importance.csv')['feature_names'][: 5]

In [3]:
train_data = pd.read_csv('Data/protrain.csv')
test_data = pd.read_csv('Data/protest.csv')
X_train, y_train = train_data[importance].values, train_data.iloc[:, -1].values
X_test, y_test = test_data[importance].values, test_data.iloc[:, -1].values

In [4]:
X_train = torch.from_numpy(X_train.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))


train_data = data.TensorDataset(X_train, y_train)
train_loader = data.DataLoader(train_data, batch_size=64, shuffle=True)

X_train.shape, y_train.shape

(torch.Size([29811, 5]), torch.Size([29811]))

In [5]:
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.sigmoid(out)
        return out

In [6]:
def train(model, train_loader, criterion, optimizer, num_epochs):
    model.train()
    train_loss_history = []
    for epoch in range(num_epochs):
        train_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * inputs.size(0)
        
        train_loss = train_loss / len(train_loader.dataset)
        train_loss_history.append(train_loss)
        print(f'Epoch {epoch+1}/{num_epochs}, Training Loss: {train_loss:.4f}')
    
    return train_loss_history

In [7]:
def evaluate(model, inputs, labels):
    model.eval()
    with torch.no_grad():
        outputs = model(inputs)
        predicted_probs = outputs.squeeze().numpy()
        predicted_labels = np.where(predicted_probs > 0.5, 1, 0)
        accuracy = np.mean(predicted_labels == labels.numpy())
        auc_score = roc_auc_score(labels.numpy(), predicted_probs)
    
    return accuracy, auc_score

In [8]:
# 定义参数网格
hidden_sizes = [32, 64, 128]
learning_rates = [0.001, 0.01, 0.1]
num_epochs = 20

results = {'hidden_size': [], 'learning_rate': [], 'accuracy': [], 'auc_score': []}

for hidden_size in hidden_sizes:
    for learning_rate in learning_rates:
        model = MLP(input_size=X_train.shape[1], hidden_size=hidden_size, output_size=1)
        criterion = nn.BCELoss()
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        

        train_loss_history = train(model, train_loader, criterion, optimizer, num_epochs)
        
        accuracy, auc_score = evaluate(model, X_test, y_test)

        results['hidden_size'].append(hidden_size)
        results['learning_rate'].append(learning_rate)
        results['accuracy'].append(accuracy)
        results['auc_score'].append(auc_score)

Epoch 1/20, Training Loss: 0.4755
Epoch 2/20, Training Loss: 0.3695
Epoch 3/20, Training Loss: 0.3665
Epoch 4/20, Training Loss: 0.3658
Epoch 5/20, Training Loss: 0.3651
Epoch 6/20, Training Loss: 0.3647
Epoch 7/20, Training Loss: 0.3643
Epoch 8/20, Training Loss: 0.3638
Epoch 9/20, Training Loss: 0.3637
Epoch 10/20, Training Loss: 0.3635
Epoch 11/20, Training Loss: 0.3633
Epoch 12/20, Training Loss: 0.3630
Epoch 13/20, Training Loss: 0.3629
Epoch 14/20, Training Loss: 0.3627
Epoch 15/20, Training Loss: 0.3625
Epoch 16/20, Training Loss: 0.3623
Epoch 17/20, Training Loss: 0.3622
Epoch 18/20, Training Loss: 0.3623
Epoch 19/20, Training Loss: 0.3619
Epoch 20/20, Training Loss: 0.3617
Epoch 1/20, Training Loss: 0.3763
Epoch 2/20, Training Loss: 0.3656
Epoch 3/20, Training Loss: 0.3665
Epoch 4/20, Training Loss: 0.3653
Epoch 5/20, Training Loss: 0.3636
Epoch 6/20, Training Loss: 0.3597
Epoch 7/20, Training Loss: 0.3590
Epoch 8/20, Training Loss: 0.3619
Epoch 9/20, Training Loss: 0.3579
Epo

In [9]:
# 将结果转换为DataFrame
results_df = pd.DataFrame(results)

# 绘制参数调优图像
fig = go.Figure()

for hidden_size in hidden_sizes:
    for learning_rate in learning_rates:
        subset = results_df[(results_df['hidden_size'] == hidden_size) & (results_df['learning_rate'] == learning_rate)]
        accuracy_trace = go.Scatter(
            x = subset.index,
            y = subset['accuracy'],
            mode = 'lines',
            name = f'Hidden Size: {hidden_size}, Learning Rate: {learning_rate}',
        )
        fig.add_trace(accuracy_trace)

fig.update_layout(
    title='Parameter Tuning - Accuracy',
    xaxis_title='Iteration',
    yaxis_title='Accuracy',
    legend_title='Parameters',
    font=dict(family='Times New Roman', size=18)
)

fig.write_html('Image/4_10_1.html')
fig.show()

results_df.head()

Unnamed: 0,hidden_size,learning_rate,accuracy,auc_score
0,32,0.001,0.833848,0.879492
1,32,0.01,0.835326,0.880518
2,32,0.1,0.831901,0.875141
3,64,0.001,0.832572,0.880238
4,64,0.01,0.833647,0.880529


In [10]:
# 绘制AUC曲线
best_result = results_df.iloc[results_df['auc_score'].idxmax()]
print(X_train.shape[1], best_result['hidden_size'])
best_model = MLP(input_size=X_train.shape[1], hidden_size=int(best_result['hidden_size']), output_size=1)
criterion = nn.BCELoss()
optimizer = optim.Adam(best_model.parameters(), lr=best_result['learning_rate'])
train(best_model, train_loader, criterion, optimizer, num_epochs)

_, auc_score = evaluate(best_model, X_test, y_test)

predicted_probs = best_model(X_test).detach().numpy()
fpr, tpr, _ = roc_curve(y_test, predicted_probs)

layout = go.Layout(
    title='Receiver Operating Characteristic',
    xaxis=dict(title='False Positive Rate'),
    yaxis=dict(title='True Positive Rate'),
    hovermode='closest',
    width=900, height=600,
    font=dict(family='Times New Roman', size=18)
)

auc_trace = go.Scatter(
    x = fpr,
    y = tpr,
    mode = 'lines',
    line=dict(color='blue', width=2),
    name = f'AUC: {auc_score:.4f}',
)

# 创建对角线图形
diagonal_trace = go.Scatter(
    x=[0, 1],
    y=[0, 1],
    mode='lines',
    line=dict(color='red', width=2, dash='dash'),
    name='Random'
)
# 创建图表数据
data = [auc_trace, diagonal_trace]

# 创建图表
fig = go.Figure(data=data, layout=layout)
fig.write_html('Image/4_10_2.html')
fig.show()

5 128.0
Epoch 1/20, Training Loss: 0.4113
Epoch 2/20, Training Loss: 0.3655
Epoch 3/20, Training Loss: 0.3646
Epoch 4/20, Training Loss: 0.3641
Epoch 5/20, Training Loss: 0.3636
Epoch 6/20, Training Loss: 0.3630
Epoch 7/20, Training Loss: 0.3623
Epoch 8/20, Training Loss: 0.3617
Epoch 9/20, Training Loss: 0.3613
Epoch 10/20, Training Loss: 0.3610
Epoch 11/20, Training Loss: 0.3607
Epoch 12/20, Training Loss: 0.3607
Epoch 13/20, Training Loss: 0.3628
Epoch 14/20, Training Loss: 0.3625
Epoch 15/20, Training Loss: 0.3626
Epoch 16/20, Training Loss: 0.3620
Epoch 17/20, Training Loss: 0.3618
Epoch 18/20, Training Loss: 0.3619
Epoch 19/20, Training Loss: 0.3614
Epoch 20/20, Training Loss: 0.3613


In [14]:
import plotly.graph_objects as go
import numpy as np

# 创建MLP模型的实例
mlp_model = MLP(input_size=5, hidden_size=64, output_size=1)

# 定义网络结构图的布局
layout = go.Layout(
    title='MLP Network Structure',
    showlegend=False,
    plot_bgcolor='rgba(0,0,0,0)',
    paper_bgcolor='rgba(0,0,0,0)',
    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    font=dict(family='Times New Roman', size=18),
    width=1000, height=900
)

# 构建网络结构图
nodes = []
node_names = []

# 添加输入层节点
input_layer_size = mlp_model.fc1.in_features
input_layer_x = 0
input_layer_y = np.linspace(1, 3, input_layer_size)  # 在垂直方向均匀分布
for i in range(input_layer_size):
    nodes.append(go.Scatter(x=[input_layer_x], y=[input_layer_y[i]], mode='markers', marker=dict(size=25), name=f'Input {i+1}'))
    node_names.append(f'Input {i+1}')

# 添加隐藏层节点
hidden_layer_size = mlp_model.fc1.out_features
hidden_layer_x = 0.5
hidden_layer_y = np.linspace(0, 4, hidden_layer_size)  # 在垂直方向均匀分布
for i in range(hidden_layer_size):
    nodes.append(go.Scatter(x=[hidden_layer_x], y=[hidden_layer_y[i]], mode='markers', marker=dict(size=25), name=f'Hidden {i+1}'))
    node_names.append(f'Hidden {i+1}')

# 添加输出层节点
output_layer_size = mlp_model.fc2.out_features
output_layer_x = 1
output_layer_y = np.linspace(2, 4, output_layer_size)  # 在垂直方向均匀分布
for i in range(output_layer_size):
    nodes.append(go.Scatter(x=[output_layer_x], y=[output_layer_y[i]], mode='markers', marker=dict(size=25), name=f'Output {i+1}'))
    node_names.append(f'Output {i+1}')

# 添加连接线
connections = []
for i in range(input_layer_size):
    for j in range(hidden_layer_size):
        connections.append(go.Scatter(x=[input_layer_x, hidden_layer_x], y=[input_layer_y[i], hidden_layer_y[j]], mode='lines', line=dict(width=0.2, color='blue')))

for i in range(hidden_layer_size):
    for j in range(output_layer_size):
        connections.append(go.Scatter(x=[hidden_layer_x, output_layer_x], y=[hidden_layer_y[i], output_layer_y[j]], mode='lines', line=dict(width=0.2, color='blue')))

# 绘制网络结构图
fig = go.Figure(data=nodes + connections, layout=layout)
fig.write_html('Image/4_10_3.html')
fig.show()