In [1]:
import torch
from torch import nn, optim
import time
import matplotlib.pyplot as plt
from MODULE import DataloadFasionMNIST as dl

In [15]:
net = nn.Sequential(
    # 这里使用一个11*11的更大窗口来捕捉对象。
    # 同时，步幅为4，以减少输出的高度和宽度。
    # 另外，输出通道的数目远大于LeNet
    nn.Conv2d(1, 96, kernel_size=11, stride=4, padding=1), nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2),
    # 减小卷积窗口，使用填充为2来使得输入与输出的高和宽一致，且增大输出通道数
    nn.Conv2d(96, 256, kernel_size=5, padding=2), nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2),
    # 使用三个连续的卷积层和较小的卷积窗口。
    # 除了最后的卷积层，输出通道的数量进一步增加。
    # 在前两个卷积层之后，汇聚层不用于减少输入的高度和宽度
    nn.Conv2d(256, 384, kernel_size=3, padding=1), nn.ReLU(),
    nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.ReLU(),
    nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nn.Flatten(),
    # 这里，全连接层的输出数量是LeNet中的好几倍。使用dropout层来减轻过拟合
    nn.Linear(6400, 4096), nn.ReLU(),
    nn.Dropout(p=0.5),
    nn.Linear(4096, 4096), nn.ReLU(),
    nn.Dropout(p=0.5),
    # 最后是输出层。由于这里使用Fashion-MNIST，所以用类别数为10，而非论文中的1000
    nn.Linear(4096, 10))

In [21]:
batch_size = 128
train_iter, test_iter = dl.load_dataset(batch_size, resize=224)

In [9]:
lr, epochs = 0.01, 10

In [19]:
def train_AlexNet(net, train_iter, test_iter, num_epochs, lr, device='cpu'):
    """Train a model without using d2l."""

    def init_weights(m):
        if type(m) == torch.nn.Linear or type(m) == torch.nn.Conv2d:
            torch.nn.init.xavier_uniform_(m.weight)
    
    # Apply weight initialization
    net.apply(init_weights)

    print('Training on', device)
    net.to(device)
    
    # Optimizer and loss function
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss_fn = torch.nn.CrossEntropyLoss()

    # Lists to store values for plotting
    train_losses, train_accuracies, test_accuracies = [], [], []

    # Start training
    for epoch in range(num_epochs):
        metric = [0.0, 0.0, 0]  # Accumulate loss, accuracy, and number of examples
        net.train()
        
        start_time = time.time()

        # Training loop
        for i, (X, y) in enumerate(train_iter):
            optimizer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss_fn(y_hat, y)
            l.backward()
            optimizer.step()

            # Calculate accuracy
            with torch.no_grad():
                correct = (y_hat.argmax(dim=1) == y).sum().item()
                metric[0] += l.item() * X.shape[0]  # accumulate loss
                metric[1] += correct  # accumulate correct predictions
                metric[2] += X.shape[0]  # accumulate number of examples

        # Compute average loss and accuracy for the epoch
        train_loss = metric[0] / metric[2]
        train_acc = metric[1] / metric[2]

        # Evaluate accuracy on test set
        test_acc = evaluate_accuracy_gpu(net, test_iter, device)

        # Store metrics for plotting
        train_losses.append(train_loss)
        train_accuracies.append(train_acc)
        test_accuracies.append(test_acc)

        # Print the progress
        epoch_time = time.time() - start_time
        print(f'Epoch {epoch + 1}/{num_epochs}, '
              f'train loss {train_loss:.3f}, train acc {train_acc:.3f}, '
              f'test acc {test_acc:.3f}, '
              f'time {epoch_time:.2f} sec')

    # Plot the training process
    epochs = range(1, num_epochs + 1)
    plt.figure(figsize=(10, 6))
    
    # Plot train loss
    plt.plot(epochs, train_losses, 'b-', label='Train Loss')
    # Plot train accuracy
    plt.plot(epochs, train_accuracies, 'g-', label='Train Accuracy')
    # Plot test accuracy
    plt.plot(epochs, test_accuracies, 'r-', label='Test Accuracy')

    plt.xlabel('Epoch')
    plt.ylabel('Value')
    plt.title('Training Loss and Accuracy')
    plt.legend()
    plt.grid()
    plt.show()

    print(f'Final train acc: {train_acc:.3f}, test acc: {test_acc:.3f}')

def evaluate_accuracy_gpu(net, data_iter, device):
    """Evaluate the accuracy on the test set."""
    net.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for X, y in data_iter:
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            correct += (y_hat.argmax(dim=1) == y).sum().item()
            total += y.size(0)
    
    return correct / total

In [20]:
train_AlexNet(net, train_iter, test_iter, epochs, lr)

Training on cpu


RuntimeError: Given input size: (256x2x2). Calculated output size: (256x0x0). Output size is too small