# AlexNet on MNIST

In [1]:
import torch
from torch.nn import functional as F
from torch.utils.data import DataLoader
# import pytorch_lightning as pl
from torch import nn
from torchvision import transforms, datasets, utils
import matplotlib.pyplot as plt

In [2]:
# 检查是否有可用的GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 数据预处理：将28x28的灰度图转换为224x224的RGB图
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # 调整大小
    transforms.Grayscale(num_output_channels=3),  # 灰度图转RGB
    transforms.ToTensor(),
])

# 加载训练和测试数据集
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [3]:
# 定义AlexNet
class AlexNet(nn.Module):
    def __init__(self, num_classes=10):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [4]:
# 初始化模型
model = AlexNet().to(device)

In [5]:
print(model)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace=True)
    (3): Dropout(p=0.5, 

In [6]:
from torchsummary import summary

In [7]:
summary(model, input_size=(3, 244, 244))  # pl_model是你的LightningModule实例

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 60, 60]          23,296
              ReLU-2           [-1, 64, 60, 60]               0
         MaxPool2d-3           [-1, 64, 29, 29]               0
            Conv2d-4          [-1, 192, 29, 29]         307,392
              ReLU-5          [-1, 192, 29, 29]               0
         MaxPool2d-6          [-1, 192, 14, 14]               0
            Conv2d-7          [-1, 384, 14, 14]         663,936
              ReLU-8          [-1, 384, 14, 14]               0
            Conv2d-9          [-1, 256, 14, 14]         884,992
             ReLU-10          [-1, 256, 14, 14]               0
           Conv2d-11          [-1, 256, 14, 14]         590,080
             ReLU-12          [-1, 256, 14, 14]               0
        MaxPool2d-13            [-1, 256, 6, 6]               0
          Dropout-14                 [-

In [8]:
# 获取一个批次的数据
data_iter = iter(test_loader)
images, labels = next(data_iter)

# 前向传播
outputs = model(images)
criterion = nn.CrossEntropyLoss()
loss = criterion(outputs, labels)
print(loss)

# 清除之前的梯度
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
optimizer.zero_grad()

# 反向传播
loss.backward()
print("Backward pass successful.")

tensor(2.3004, grad_fn=<NllLossBackward0>)
Backward pass successful.


In [9]:
import torch.optim as optim

# 损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
num_epochs = 1
train_losses = []
val_losses = []
val_accuracies = []

def train_epoch():
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)  # 将输入和标签移到GPU
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    return running_loss / len(train_dataset)

def validate_epoch():
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)  # 将输入和标签移到GPU
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return running_loss / len(test_dataset), correct / total

for epoch in range(num_epochs):
    train_loss = train_epoch()
    val_loss, val_acc = validate_epoch()
    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    val_accuracies.append(val_acc)

KeyboardInterrupt: 

In [None]:
# 可视化训练过程中的loss和accuracy
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Val Loss')
plt.title('Loss per epoch')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(val_accuracies, label='Val Accuracy', color='green')
plt.title('Accuracy per epoch')
plt.legend()
plt.show()

In [None]:
# 确保我们使用的是基于nn.Module定义的AlexNet模型，而不是LitAlexNet
first_conv_layer = model.features[0]  # 直接从model.features获取第一个卷积层

def visualize_conv_weights(conv_layer, layer_name):
    weights = conv_layer.weight.detach().cpu()
    # 可视化前将权重调整为适合显示的形式
    grid = utils.make_grid(weights, nrow=8, normalize=True, scale_each=True)
    plt.figure(figsize=(16, 8))
    plt.imshow(utils.make_grid(grid, nrow=8).permute(1, 2, 0).numpy())
    plt.axis('off')
    plt.title(f'{layer_name} Weights')
    plt.show()

visualize_conv_weights(first_conv_layer, 'First Conv Layer')