In [1]:
pip install optuna optuna-integration

Collecting optuna-integration
  Downloading optuna_integration-3.6.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna_integration-3.6.0-py3-none-any.whl (93 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m93.4/93.4 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: optuna-integration
Successfully installed optuna-integration-3.6.0
Note: you may need to restart the kernel to use updated packages.


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import optuna
import matplotlib.pyplot as plt
import math

# 定义SpinalVGG模型
Half_width = 128
layer_width = 128

class SpinalVGG(nn.Module):
    def two_conv_pool(self, in_channels, f1, f2):
        s = nn.Sequential(
            nn.Conv2d(in_channels, f1, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(f1),
            nn.ReLU(inplace=True),
            nn.Conv2d(f1, f2, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(f2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        for m in s.children():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
        return s
    
    def three_conv_pool(self, in_channels, f1, f2, f3):
        s = nn.Sequential(
            nn.Conv2d(in_channels, f1, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(f1),
            nn.ReLU(inplace=True),
            nn.Conv2d(f1, f2, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(f2),
            nn.ReLU(inplace=True),
            nn.Conv2d(f2, f3, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(f3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        for m in s.children():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
        return s
        
    def __init__(self, num_classes=10):
        super(SpinalVGG, self).__init__()
        self.l1 = self.two_conv_pool(1, 64, 64)
        self.l2 = self.two_conv_pool(64, 128, 128)
        self.l3 = self.three_conv_pool(128, 256, 256, 256)
        self.l4 = self.three_conv_pool(256, 256, 256, 256)
        
        self.fc_spinal_layer1 = nn.Sequential(
            nn.Dropout(p=0.5), nn.Linear(Half_width, layer_width),
            nn.BatchNorm1d(layer_width), nn.ReLU(inplace=True))
        self.fc_spinal_layer2 = nn.Sequential(
            nn.Dropout(p=0.5), nn.Linear(Half_width+layer_width, layer_width),
            nn.BatchNorm1d(layer_width), nn.ReLU(inplace=True))
        self.fc_spinal_layer3 = nn.Sequential(
            nn.Dropout(p=0.5), nn.Linear(Half_width+layer_width, layer_width),
            nn.BatchNorm1d(layer_width), nn.ReLU(inplace=True))
        self.fc_spinal_layer4 = nn.Sequential(
            nn.Dropout(p=0.5), nn.Linear(Half_width+layer_width, layer_width),
            nn.BatchNorm1d(layer_width), nn.ReLU(inplace=True))
        self.fc_out = nn.Sequential(
            nn.Dropout(p=0.5), nn.Linear(layer_width*4, num_classes))
        
    def forward(self, x):
        x = self.l1(x)
        x = self.l2(x)
        x = self.l3(x)
        x = self.l4(x)
        x = x.view(x.size(0), -1)
        
        x1 = self.fc_spinal_layer1(x[:, 0:Half_width])
        x2 = self.fc_spinal_layer2(torch.cat([x[:, Half_width:2*Half_width], x1], dim=1))
        x3 = self.fc_spinal_layer3(torch.cat([x[:, 0:Half_width], x2], dim=1))
        x4 = self.fc_spinal_layer4(torch.cat([x[:, Half_width:2*Half_width], x3], dim=1))
        
        x = torch.cat([x1, x2], dim=1)
        x = torch.cat([x, x3], dim=1)
        x = torch.cat([x, x4], dim=1)
        
        x = self.fc_out(x)
        return F.log_softmax(x, dim=1)

# 加载和预处理数据
train_url = "https://www.dropbox.com/scl/fi/6tgxge3y0jot8075d5lng/train_imageclass.csv?rlkey=b4cj2ifgbzjlrcmals3t98eu2&st=zmwpkgke&dl=0&raw=1"
df_train = pd.read_csv(train_url, index_col='ID')

target_col = 'label'
X_train = df_train.drop(columns='label').values
y_train = df_train['label'].values

# Reshaping x_train into 2D and normalize
X_train = X_train.reshape((-1, 28, 28, 1)).astype('float32') / 255.0

# One-hot encoding of labels
num_classes = 10
y_train = np.eye(num_classes)[y_train.reshape(-1)]

# Split into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train).permute(0, 3, 1, 2).to(torch.float32)
y_train = torch.tensor(y_train).to(torch.float32)
X_val = torch.tensor(X_val).permute(0, 3, 1, 2).to(torch.float32)
y_val = torch.tensor(y_val).to(torch.float32)

# 创建数据加载器
train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(X_train, y_train), batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(X_val, y_val), batch_size=64, shuffle=False)

device = 'cuda' if torch.cuda.is_available() else 'cpu'

def objective(trial):
    # 定义超参数搜索空间
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
    momentum = trial.suggest_float('momentum', 0.5, 0.9)
    
    # 初始化模型和优化器
    network = SpinalVGG(num_classes=num_classes).to(device)
    optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
    
    # 定义损失函数
    criterion = nn.CrossEntropyLoss()
    
    # 训练模型
    num_epochs = 20
    for epoch in range(num_epochs):
        network.train()
        running_loss = 0.0
        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = network(inputs)
            loss = criterion(outputs, labels.argmax(dim=1))
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        scheduler.step()
        
        # 验证模型
        network.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = network(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels.argmax(dim=1)).sum().item()
        accuracy = correct / total
        trial.report(accuracy, epoch)
        
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    
    return accuracy

# 运行Optuna超参数优化
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

# 获取最佳超参数
best_params = study.best_params
print(f"Best parameters: {best_params}")

# 使用最佳超参数重新训练模型
learning_rate = best_params['learning_rate']
momentum = best_params['momentum']
network = SpinalVGG(num_classes=num_classes).to(device)
optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
criterion = nn.CrossEntropyLoss()

# 训练模型
num_epochs = 35
for epoch in range(num_epochs):
    network.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = network(inputs)
        loss = criterion(outputs, labels.argmax(dim=1))
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    scheduler.step()
    print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')

# 评估模型
network.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in val_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = network(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.argmax(dim=1)).sum().item()
print(f'Validation Accuracy: {100 * correct / total}%')

# 预测测试集
test_url = "https://www.dropbox.com/scl/fi/r8tjg2dea59q5timei3dg/test_imageclass.csv?rlkey=1dzvhckz1x9x3e05vgp00lm6f&st=qnmdgala&dl=0&raw=1"
df_test = pd.read_csv(test_url, index_col='ID')
X_test = df_test.values.reshape((-1, 28, 28, 1)).astype('float32') / 255.0
X_test = torch.tensor(X_test).permute(0, 3, 1, 2).to(torch.float32).to(device)

network.eval()
with torch.no_grad():
    y_pred = network(X_test)
    y_pred = y_pred.argmax(dim=1).cpu().numpy()

# 将预测结果转换为CSV文件
submission = pd.DataFrame(y_pred, index=df_test.index, columns=[target_col])
submission.to_csv('submission.csv')

print(submission)


[I 2024-07-10 08:58:29,963] A new study created in memory with name: no-name-ffd0cd9f-4613-4450-818c-75dab5a29c9f
[I 2024-07-10 09:01:56,601] Trial 0 finished with value: 0.6518333333333334 and parameters: {'learning_rate': 1.8031154903906834e-05, 'momentum': 0.5840082878404524}. Best is trial 0 with value: 0.6518333333333334.
[I 2024-07-10 09:05:21,344] Trial 1 finished with value: 0.9379166666666666 and parameters: {'learning_rate': 0.08451647465228938, 'momentum': 0.6566039081648753}. Best is trial 1 with value: 0.9379166666666666.
[I 2024-07-10 09:08:45,605] Trial 2 finished with value: 0.8534166666666667 and parameters: {'learning_rate': 0.0002622692155832004, 'momentum': 0.5432511360462362}. Best is trial 1 with value: 0.9379166666666666.
[I 2024-07-10 09:12:11,507] Trial 3 finished with value: 0.9365833333333333 and parameters: {'learning_rate': 0.0946087480443934, 'momentum': 0.5970137316230999}. Best is trial 1 with value: 0.9379166666666666.
[I 2024-07-10 09:15:37,859] Trial 

Best parameters: {'learning_rate': 0.04332021300219847, 'momentum': 0.713331254778012}
Epoch 1, Loss: 0.5087731792330742
Epoch 2, Loss: 0.29984266999860604
Epoch 3, Loss: 0.2399582008620103
Epoch 4, Loss: 0.20744726993888615
Epoch 5, Loss: 0.1736084934224685
Epoch 6, Loss: 0.15172170426448187
Epoch 7, Loss: 0.1312965278290212
Epoch 8, Loss: 0.11319389846175909
Epoch 9, Loss: 0.09444059476070106
Epoch 10, Loss: 0.0823770273051535
Epoch 11, Loss: 0.03542361423559487
Epoch 12, Loss: 0.01964785945112817
Epoch 13, Loss: 0.013744457902774836
Epoch 14, Loss: 0.009740920906808849
Epoch 15, Loss: 0.007270199733204208
Epoch 16, Loss: 0.005357571788481437
Epoch 17, Loss: 0.004074703407318641
Epoch 18, Loss: 0.0034983431485209925
Epoch 19, Loss: 0.0031050509937340393
Epoch 20, Loss: 0.0022876731814855398
Epoch 21, Loss: 0.002217207046788341
Epoch 22, Loss: 0.0021340811376576313
Epoch 23, Loss: 0.0020340104892966338
Epoch 24, Loss: 0.0021526384179596787
Epoch 25, Loss: 0.0020113590456506546
Epoch 2