# AlexNet

In [None]:
import os

# 设置代理
os.environ['http_proxy'] = 'http://127.0.0.1:7893'
os.environ['https_proxy'] = 'http://127.0.0.1:7893'
os.environ['HTTP_PROXY'] = 'http://127.0.0.1:7893'
os.environ['HTTPS_PROXY'] = 'http://127.0.0.1:7893'
os.environ['no_proxy'] = '127.0.0.1,localhost'
os.environ['NO_PROXY'] = '127.0.0.1,localhost'

# 验证代理设置
print(f"HTTP代理: {os.environ.get('http_proxy')}")

In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.nn import functional as F
from torchvision import datasets, transforms
import time

In [None]:
class Residual(nn.Module):
    def __init__(self, input_channels, num_channels, use_1x1conv=False, strides=1) -> None:
        super().__init__()
        self.conv1 = nn.Conv2d(input_channels, num_channels, kernel_size=3, padding=1, stride=strides)
        self.conv2 = nn.Conv2d(num_channels, num_channels, kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.Conv2d(in_channels=input_channels, out_channels=num_channels, kernel_size=1,stride=strides)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm2d(num_channels)
        self.bn2 = nn.BatchNorm2d(num_channels)
        
    def forward(self,X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        Y += X
        return F.relu(Y)

In [None]:
b1 = nn.Sequential(
    nn.Conv2d(1,64, kernel_size=7, stride=2, padding=3),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    )

In [None]:
def resnet_block(input_channels, num_channels, num_residuals, first_block=False):
    blk = []
    for i in range(num_residuals):
        if i == 0 and not first_block:
            blk.append(Residual(input_channels, num_channels, use_1x1conv=True, strides=2))
        else:
            blk.append(Residual(input_channels, num_channels))
        # After the first block, input_channels becomes num_channels
        input_channels = num_channels
    return blk

In [None]:
b2 = nn.Sequential(*resnet_block(64, 64, 2, first_block=True))
b3 = nn.Sequential(*resnet_block(64, 128, 2))
b4 = nn.Sequential(*resnet_block(128, 256, 2))
b5 = nn.Sequential(*resnet_block(256, 512, 2))

In [None]:
net = nn.Sequential(
    b1, b2, b3, b4, b5, nn.AdaptiveAvgPool2d((1,1)),
    nn.Flatten(),
    nn.Linear(512, 10)
)

In [None]:
# 这里的1, 1分别代表批量大小和输入图片的通道数（例如黑白图像为1通道）
X = torch.rand(1, 1, 224, 224)

for layer in net:
    X = layer(X)
    print(layer.__class__.__name__, 'output shape:\t',X.shape)

In [None]:
# 数据预处理和加载 - 纯PyTorch实现
# 为了适应AlexNet的输入尺寸(224x224),需要调整图像大小
batch_size = 128

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize(96),  # 调整图像大小到224x224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])  # 归一化
])

# 加载Fashion-MNIST数据集
train_dataset = datasets.FashionMNIST(
    root='./data',  # 数据存储路径
    train=True,
    download=True,
    transform=transform
)

test_dataset = datasets.FashionMNIST(
    root='./data',
    train=False,
    download=True,
    transform=transform
)

# 创建数据加载器
train_iter = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
test_iter = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

print(f'训练集大小: {len(train_dataset)}, 测试集大小: {len(test_dataset)}')

In [None]:
# 训练函数 - 纯PyTorch实现
def train(net, train_iter, test_iter, num_epochs, lr, device):
    """训练模型"""
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)
    net.apply(init_weights)
    print(f'training on {device}')
    net.to(device)
    
    # 定义优化器和损失函数
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss_fn = nn.CrossEntropyLoss()
    
    for epoch in range(num_epochs):
        # 训练模式
        net.train()
        train_loss_sum, train_acc_sum, n, batch_count, start = 0.0, 0.0, 0, 0, time.time()
        
        for X, y in train_iter:
            X, y = X.to(device), y.to(device)
            
            # 前向传播
            y_hat = net(X)
            loss = loss_fn(y_hat, y)
            
            # 反向传播
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            # 统计
            train_loss_sum += loss.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]
            batch_count += 1
        
        # 评估模式
        test_acc = evaluate_accuracy(net, test_iter, device)
        
        print(f'epoch {epoch + 1}, loss {train_loss_sum / batch_count:.4f}, '
              f'train acc {train_acc_sum / n:.3f}, test acc {test_acc:.3f}, '
              f'time {time.time() - start:.1f} sec')

def evaluate_accuracy(net, data_iter, device):
    """评估模型准确率"""
    net.eval()
    acc_sum, n = 0.0, 0
    
    with torch.no_grad():
        for X, y in data_iter:
            X, y = X.to(device), y.to(device)
            acc_sum += (net(X).argmax(dim=1) == y).sum().item()
            n += y.shape[0]
    
    return acc_sum / n

# 设置设备
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 训练参数
lr, num_epochs = 0.1, 10

# 开始训练
train(net, train_iter, test_iter, num_epochs, lr, device)