# AlexNet训练ImageNet数据集

本notebook实现了AlexNet在ImageNet数据集上的训练。AlexNet是2012年ImageNet比赛的冠军模型,标志着深度学习在计算机视觉领域的突破。

## 网络特点
- 8层网络(5层卷积 + 3层全连接)
- 使用ReLU激活函数
- 使用Dropout防止过拟合
- 使用数据增强
- 原始输入: 224×224×3的RGB图像

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
from torchvision import datasets, transforms
import time
import os
from pathlib import Path

## 1. 定义AlexNet网络结构

这是原始的AlexNet架构,适用于ImageNet的224×224输入图像。

In [None]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=1000):
        super(AlexNet, self).__init__()
        
        # 特征提取层
        self.features = nn.Sequential(
            # 第一层卷积: 输入3×224×224 -> 输出96×55×55
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),  # 96×55×55 -> 96×27×27
            
            # 第二层卷积: 96×27×27 -> 256×27×27
            nn.Conv2d(96, 256, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),  # 256×27×27 -> 256×13×13
            
            # 第三层卷积: 256×13×13 -> 384×13×13
            nn.Conv2d(256, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            
            # 第四层卷积: 384×13×13 -> 384×13×13
            nn.Conv2d(384, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            
            # 第五层卷积: 384×13×13 -> 256×13×13
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),  # 256×13×13 -> 256×6×6
        )
        
        # 自适应平均池化,确保输出为6×6
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        
        # 分类器(全连接层)
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )
    
    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

## 2. 验证网络结构

In [None]:
# 创建模型实例
model = AlexNet(num_classes=1000)

# 测试输入
test_input = torch.randn(1, 3, 224, 224)
output = model(test_input)
print(f"输入形状: {test_input.shape}")
print(f"输出形状: {output.shape}")
print(f"\n模型参数量: {sum(p.numel() for p in model.parameters()) / 1e6:.2f}M")

## 3. 数据预处理和加载

ImageNet数据集说明:
- 训练集: 约128万张图像
- 验证集: 5万张图像
- 1000个类别

注意: 你需要下载ImageNet数据集并设置正确的路径。如果没有ImageNet,可以使用ImageNet的子集如tiny-imagenet或其他数据集。

In [None]:
# ImageNet数据集路径(请根据实际情况修改)
# 如果没有ImageNet,可以先用CIFAR-10或其他数据集测试
data_dir = "/path/to/imagenet"  # 修改为你的ImageNet路径

# 检查是否存在ImageNet数据集
use_imagenet = os.path.exists(data_dir)

if not use_imagenet:
    print("未找到ImageNet数据集,将使用CIFAR-10作为演示")
    print("如需使用ImageNet,请下载数据集并设置正确的data_dir路径")

In [None]:
# ImageNet标准的数据增强和归一化
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),  # 随机裁剪并调整到224×224
    transforms.RandomHorizontalFlip(),  # 随机水平翻转
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),  # 颜色抖动
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # ImageNet标准均值
                       std=[0.229, 0.224, 0.225])      # ImageNet标准方差
])

val_transform = transforms.Compose([
    transforms.Resize(256),             # 先缩放到256
    transforms.CenterCrop(224),         # 中心裁剪到224×224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                       std=[0.229, 0.224, 0.225])
])

In [None]:
# 加载数据集
batch_size = 128  # 根据GPU内存调整
num_workers = 4   # 数据加载线程数

if use_imagenet:
    # 使用ImageNet数据集
    train_dataset = datasets.ImageFolder(
        root=os.path.join(data_dir, 'train'),
        transform=train_transform
    )
    val_dataset = datasets.ImageFolder(
        root=os.path.join(data_dir, 'val'),
        transform=val_transform
    )
    num_classes = 1000
else:
    # 使用CIFAR-10作为演示(10个类别)
    train_dataset = datasets.CIFAR10(
        root='./data',
        train=True,
        download=True,
        transform=train_transform
    )
    val_dataset = datasets.CIFAR10(
        root='./data',
        train=False,
        download=True,
        transform=val_transform
    )
    num_classes = 10

train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers,
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True
)

print(f"训练集大小: {len(train_dataset)}")
print(f"验证集大小: {len(val_dataset)}")
print(f"类别数: {num_classes}")
print(f"批次大小: {batch_size}")

## 4. 设置训练参数和优化器

In [None]:
# 设备配置
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"使用设备: {device}")

# 创建模型
model = AlexNet(num_classes=num_classes).to(device)

# 损失函数
criterion = nn.CrossEntropyLoss()

# 优化器 - 使用SGD with momentum(AlexNet原始论文的设置)
optimizer = optim.SGD(
    model.parameters(),
    lr=0.01,              # 初始学习率
    momentum=0.9,         # 动量
    weight_decay=5e-4     # L2正则化
)

# 学习率调度器 - 每30个epoch降低学习率
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

# 训练轮数
num_epochs = 90  # AlexNet原始论文训练90个epoch

## 5. 定义训练和验证函数

In [None]:
def train_epoch(model, train_loader, criterion, optimizer, device):
    """训练一个epoch"""
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        
        # 前向传播
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # 反向传播和优化
        loss.backward()
        optimizer.step()
        
        # 统计
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
        # 每100个batch打印一次
        if (i + 1) % 100 == 0:
            print(f'Batch [{i+1}/{len(train_loader)}], '
                  f'Loss: {running_loss/(i+1):.4f}, '
                  f'Acc: {100.*correct/total:.2f}%')
    
    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc


def validate(model, val_loader, criterion, device):
    """验证模型"""
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    # Top-5准确率统计
    correct_top5 = 0
    
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            
            # Top-1准确率
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            
            # Top-5准确率
            _, pred_top5 = outputs.topk(5, 1, True, True)
            pred_top5 = pred_top5.t()
            correct_top5 += pred_top5.eq(labels.view(1, -1).expand_as(pred_top5)).sum().item()
    
    val_loss = running_loss / len(val_loader)
    val_acc = 100. * correct / total
    val_acc_top5 = 100. * correct_top5 / total
    
    return val_loss, val_acc, val_acc_top5

## 6. 训练模型

In [None]:
# 记录最佳模型
best_acc = 0.0
checkpoint_dir = './checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True)

# 训练历史
history = {
    'train_loss': [],
    'train_acc': [],
    'val_loss': [],
    'val_acc': [],
    'val_acc_top5': []
}

print("开始训练...\n")
start_time = time.time()

for epoch in range(num_epochs):
    epoch_start = time.time()
    
    # 训练
    print(f'Epoch [{epoch+1}/{num_epochs}]')
    print('-' * 60)
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    
    # 验证
    val_loss, val_acc, val_acc_top5 = validate(model, val_loader, criterion, device)
    
    # 学习率调整
    scheduler.step()
    current_lr = optimizer.param_groups[0]['lr']
    
    # 记录历史
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    history['val_acc_top5'].append(val_acc_top5)
    
    epoch_time = time.time() - epoch_start
    
    # 打印结果
    print(f'\nEpoch {epoch+1} Summary:')
    print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
    print(f'Val Loss: {val_loss:.4f}, Val Acc (Top-1): {val_acc:.2f}%, Val Acc (Top-5): {val_acc_top5:.2f}%')
    print(f'Learning Rate: {current_lr:.6f}')
    print(f'Epoch Time: {epoch_time:.2f}s')
    print('=' * 60)
    
    # 保存最佳模型
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'best_acc': best_acc,
        }, os.path.join(checkpoint_dir, 'alexnet_best.pth'))
        print(f'保存最佳模型,验证准确率: {best_acc:.2f}%\n')

total_time = time.time() - start_time
print(f'\n训练完成!总用时: {total_time/3600:.2f}小时')
print(f'最佳验证准确率: {best_acc:.2f}%')

## 7. 可视化训练过程

In [None]:
import matplotlib.pyplot as plt

# 绘制损失曲线
plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 1)
plt.plot(history['train_loss'], label='Train Loss')
plt.plot(history['val_loss'], label='Val Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('训练和验证损失')
plt.legend()
plt.grid(True)

plt.subplot(1, 3, 2)
plt.plot(history['train_acc'], label='Train Acc')
plt.plot(history['val_acc'], label='Val Acc (Top-1)')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('训练和验证准确率 (Top-1)')
plt.legend()
plt.grid(True)

plt.subplot(1, 3, 3)
plt.plot(history['val_acc'], label='Top-1 Acc')
plt.plot(history['val_acc_top5'], label='Top-5 Acc')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('验证准确率对比')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig('alexnet_training_history.png', dpi=300, bbox_inches='tight')
plt.show()

## 8. 加载最佳模型并进行测试

In [None]:
# 加载最佳模型
checkpoint = torch.load(os.path.join(checkpoint_dir, 'alexnet_best.pth'))
model.load_state_dict(checkpoint['model_state_dict'])
print(f"已加载最佳模型(Epoch {checkpoint['epoch']}, 准确率: {checkpoint['best_acc']:.2f}%)")

# 在验证集上测试
val_loss, val_acc, val_acc_top5 = validate(model, val_loader, criterion, device)
print(f"\n最终测试结果:")
print(f"验证损失: {val_loss:.4f}")
print(f"Top-1准确率: {val_acc:.2f}%")
print(f"Top-5准确率: {val_acc_top5:.2f}%")

## 9. 使用预训练模型(可选)

PyTorch提供了在ImageNet上预训练的AlexNet模型,可以直接加载使用。

In [None]:
# 加载PyTorch预训练的AlexNet
from torchvision.models import alexnet, AlexNet_Weights

# 使用最新的权重
pretrained_model = alexnet(weights=AlexNet_Weights.IMAGENET1K_V1)
pretrained_model = pretrained_model.to(device)

print("已加载预训练的AlexNet模型")
print(f"\n预训练模型在ImageNet上的性能:")
print(f"Top-1准确率: {AlexNet_Weights.IMAGENET1K_V1.meta['acc@1']:.2f}%")
print(f"Top-5准确率: {AlexNet_Weights.IMAGENET1K_V1.meta['acc@5']:.2f}%")

## 10. 推理示例

使用训练好的模型对单张图像进行预测。

In [None]:
def predict_image(model, image_path, transform, device, class_names=None):
    """预测单张图像"""
    from PIL import Image
    
    # 加载和预处理图像
    image = Image.open(image_path).convert('RGB')
    image_tensor = transform(image).unsqueeze(0).to(device)
    
    # 预测
    model.eval()
    with torch.no_grad():
        outputs = model(image_tensor)
        probabilities = torch.nn.functional.softmax(outputs, dim=1)
        
        # Top-5预测
        top5_prob, top5_idx = probabilities.topk(5, 1, True, True)
    
    # 显示结果
    plt.figure(figsize=(10, 4))
    plt.subplot(1, 2, 1)
    plt.imshow(image)
    plt.axis('off')
    plt.title('输入图像')
    
    plt.subplot(1, 2, 2)
    top5_prob = top5_prob.cpu().numpy()[0]
    top5_idx = top5_idx.cpu().numpy()[0]
    
    if class_names:
        labels = [class_names[idx] for idx in top5_idx]
    else:
        labels = [f'Class {idx}' for idx in top5_idx]
    
    plt.barh(range(5), top5_prob)
    plt.yticks(range(5), labels)
    plt.xlabel('概率')
    plt.title('Top-5预测')
    plt.gca().invert_yaxis()
    
    plt.tight_layout()
    plt.show()
    
    return top5_idx, top5_prob

# 使用示例(需要提供图像路径)
# top5_classes, top5_probs = predict_image(model, 'path/to/image.jpg', val_transform, device)

## 总结

本notebook实现了完整的AlexNet训练流程:

1. **网络架构**: 8层深度网络(5层卷积 + 3层全连接)
2. **关键技术**:
   - ReLU激活函数
   - Dropout正则化
   - 数据增强
   - 学习率衰减
3. **训练策略**: SGD + Momentum优化器
4. **评估指标**: Top-1和Top-5准确率

### AlexNet历史意义
- 2012年ImageNet竞赛冠军
- Top-5错误率从26%降至15.3%
- 标志着深度学习在计算机视觉的突破
- 首次在GPU上训练大规模CNN

### 实际应用建议
- 对于新任务,建议使用预训练模型进行迁移学习
- 现代架构(如ResNet、EfficientNet)性能更优
- AlexNet适合作为学习深度学习的入门模型