# AlexNet

我跑完了，后来网挂了，后来的就没打印出来...

In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

# device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Hyper-parameters
num_epochs = 80
learning_rate = 0.001
batch_size = 100
num_classes = 10

# Image preprocessing modules
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.Resize(224),
    transforms.ToTensor()
])

transform_test = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor()
])

# CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='./data/',
                                             train=True,
                                             transform=transform_train,
                                             download=True)

test_dataset = torchvision.datasets.CIFAR10(root='./data/',
                                            train=False, 
                                            transform=transform_test)

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size, 
                                          shuffle=False)

class AlexNet(nn.Module):
    def __init__(self, num_classes=num_classes):
        super(AlexNet, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2))
        
        self.fc = nn.Sequential(
            nn.Linear(in_features=5*5*256, out_features=4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, num_classes))
        
    def forward(self, x):
        out = self.conv(x)
        out = torch.flatten(out, 1)
        out = self.fc(out)
        return out
        
model = AlexNet(num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# For updating learning rate
def update_lr(optimizer, lr):    
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

# Train the model
total_step = len(train_loader)
curr_lr = learning_rate
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}"
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

    # Decay learning rate
    if (epoch+1) % 20 == 0:
        curr_lr /= 3
        update_lr(optimizer, curr_lr)

Files already downloaded and verified
Epoch [1/80], Step [100/500] Loss: 2.2834
Epoch [1/80], Step [200/500] Loss: 2.3054
Epoch [1/80], Step [300/500] Loss: 2.0415
Epoch [1/80], Step [400/500] Loss: 2.0790
Epoch [1/80], Step [500/500] Loss: 1.9260
Epoch [2/80], Step [100/500] Loss: 1.7179
Epoch [2/80], Step [200/500] Loss: 1.7784
Epoch [2/80], Step [300/500] Loss: 1.6494
Epoch [2/80], Step [400/500] Loss: 1.5744
Epoch [2/80], Step [500/500] Loss: 1.6141
Epoch [3/80], Step [100/500] Loss: 1.4282
Epoch [3/80], Step [200/500] Loss: 1.5248
Epoch [3/80], Step [300/500] Loss: 1.2281
Epoch [3/80], Step [400/500] Loss: 1.4265
Epoch [3/80], Step [500/500] Loss: 1.4360
Epoch [4/80], Step [100/500] Loss: 1.3595
Epoch [4/80], Step [200/500] Loss: 1.2796
Epoch [4/80], Step [300/500] Loss: 1.4515
Epoch [4/80], Step [400/500] Loss: 1.4469
Epoch [4/80], Step [500/500] Loss: 1.3263
Epoch [5/80], Step [100/500] Loss: 1.2847
Epoch [5/80], Step [200/500] Loss: 1.2507
Epoch [5/80], Step [300/500] Loss: 1.3

Epoch [39/80], Step [200/500] Loss: 0.3213
Epoch [39/80], Step [300/500] Loss: 0.2458
Epoch [39/80], Step [400/500] Loss: 0.3476
Epoch [39/80], Step [500/500] Loss: 0.2328
Epoch [40/80], Step [100/500] Loss: 0.3255
Epoch [40/80], Step [200/500] Loss: 0.3055
Epoch [40/80], Step [300/500] Loss: 0.3314
Epoch [40/80], Step [400/500] Loss: 0.4261
Epoch [40/80], Step [500/500] Loss: 0.2912
Epoch [41/80], Step [100/500] Loss: 0.2311
Epoch [41/80], Step [200/500] Loss: 0.2494
Epoch [41/80], Step [300/500] Loss: 0.2666
Epoch [41/80], Step [400/500] Loss: 0.2982
Epoch [41/80], Step [500/500] Loss: 0.3200
Epoch [42/80], Step [100/500] Loss: 0.2157
Epoch [42/80], Step [200/500] Loss: 0.1979
Epoch [42/80], Step [300/500] Loss: 0.2438
Epoch [42/80], Step [400/500] Loss: 0.1825
Epoch [42/80], Step [500/500] Loss: 0.1777
Epoch [43/80], Step [100/500] Loss: 0.2010
Epoch [43/80], Step [200/500] Loss: 0.3428
Epoch [43/80], Step [300/500] Loss: 0.1535
Epoch [43/80], Step [400/500] Loss: 0.2465
Epoch [43/8

Epoch [77/80], Step [300/500] Loss: 0.0169
Epoch [77/80], Step [400/500] Loss: 0.0527
Epoch [77/80], Step [500/500] Loss: 0.0465
Epoch [78/80], Step [100/500] Loss: 0.0714
Epoch [78/80], Step [200/500] Loss: 0.0487
Epoch [78/80], Step [300/500] Loss: 0.0433
Epoch [78/80], Step [400/500] Loss: 0.0303
Epoch [78/80], Step [500/500] Loss: 0.0504
Epoch [79/80], Step [100/500] Loss: 0.0508
Epoch [79/80], Step [200/500] Loss: 0.0172


In [8]:
# Test the model
model.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) # 73.76 %

Test Accuracy of the model on the 10000 test images: 73.76 %


In [9]:
print(model)

AlexNet(
  (conv): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=6400, out_features=4096, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.5)
    (6): Linear(in_features=4096, o

# TORCHVISION.MODELS.ALEXNET

source code: https://pytorch.org/docs/stable/_modules/torchvision/models/alexnet.html

In [11]:
alexNet = torchvision.models.alexnet(pretrained=False, num_classes=10).to(device)

In [14]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(alexNet.parameters(), lr=learning_rate)

# For updating learning rate
def update_lr(optimizer, lr):    
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

# Train the model
total_step = len(train_loader)
curr_lr = learning_rate
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = alexNet(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}"
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

    # Decay learning rate
    if (epoch+1) % 20 == 0:
        curr_lr /= 3
        update_lr(optimizer, curr_lr)
        
# Test the model
alexNet.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = alexNet(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) # 81.58 %

Epoch [1/80], Step [100/500] Loss: 2.0849
Epoch [1/80], Step [200/500] Loss: 1.9574
Epoch [1/80], Step [300/500] Loss: 1.8432
Epoch [1/80], Step [400/500] Loss: 1.7765
Epoch [1/80], Step [500/500] Loss: 1.5289
Epoch [2/80], Step [100/500] Loss: 1.6538
Epoch [2/80], Step [200/500] Loss: 1.3661
Epoch [2/80], Step [300/500] Loss: 1.6833
Epoch [2/80], Step [400/500] Loss: 1.4120
Epoch [2/80], Step [500/500] Loss: 1.6024
Epoch [3/80], Step [100/500] Loss: 1.1505
Epoch [3/80], Step [200/500] Loss: 1.1965
Epoch [3/80], Step [300/500] Loss: 1.3950
Epoch [3/80], Step [400/500] Loss: 1.2313
Epoch [3/80], Step [500/500] Loss: 1.2610
Epoch [4/80], Step [100/500] Loss: 1.1245
Epoch [4/80], Step [200/500] Loss: 1.5226
Epoch [4/80], Step [300/500] Loss: 1.2910
Epoch [4/80], Step [400/500] Loss: 1.1755
Epoch [4/80], Step [500/500] Loss: 0.9071
Epoch [5/80], Step [100/500] Loss: 1.1490
Epoch [5/80], Step [200/500] Loss: 1.3728
Epoch [5/80], Step [300/500] Loss: 1.2280
Epoch [5/80], Step [400/500] Loss:

Epoch [39/80], Step [300/500] Loss: 0.4033
Epoch [39/80], Step [400/500] Loss: 0.3143
Epoch [39/80], Step [500/500] Loss: 0.2293
Epoch [40/80], Step [100/500] Loss: 0.4661
Epoch [40/80], Step [200/500] Loss: 0.2600
Epoch [40/80], Step [300/500] Loss: 0.3503
Epoch [40/80], Step [400/500] Loss: 0.3859
Epoch [40/80], Step [500/500] Loss: 0.2126
Epoch [41/80], Step [100/500] Loss: 0.4034
Epoch [41/80], Step [200/500] Loss: 0.3677
Epoch [41/80], Step [300/500] Loss: 0.3055
Epoch [41/80], Step [400/500] Loss: 0.2489
Epoch [41/80], Step [500/500] Loss: 0.3356
Epoch [42/80], Step [100/500] Loss: 0.1502
Epoch [42/80], Step [200/500] Loss: 0.2968
Epoch [42/80], Step [300/500] Loss: 0.3112
Epoch [42/80], Step [400/500] Loss: 0.2443
Epoch [42/80], Step [500/500] Loss: 0.3144
Epoch [43/80], Step [100/500] Loss: 0.3622
Epoch [43/80], Step [200/500] Loss: 0.2949
Epoch [43/80], Step [300/500] Loss: 0.2092
Epoch [43/80], Step [400/500] Loss: 0.2859
Epoch [43/80], Step [500/500] Loss: 0.2093
Epoch [44/8

Epoch [77/80], Step [400/500] Loss: 0.0450
Epoch [77/80], Step [500/500] Loss: 0.1572
Epoch [78/80], Step [100/500] Loss: 0.1444
Epoch [78/80], Step [200/500] Loss: 0.0717
Epoch [78/80], Step [300/500] Loss: 0.1902
Epoch [78/80], Step [400/500] Loss: 0.1320
Epoch [78/80], Step [500/500] Loss: 0.1117
Epoch [79/80], Step [100/500] Loss: 0.1209
Epoch [79/80], Step [200/500] Loss: 0.1278
Epoch [79/80], Step [300/500] Loss: 0.0504
Epoch [79/80], Step [400/500] Loss: 0.1602
Epoch [79/80], Step [500/500] Loss: 0.1745
Epoch [80/80], Step [100/500] Loss: 0.1670
Epoch [80/80], Step [200/500] Loss: 0.1094
Epoch [80/80], Step [300/500] Loss: 0.1731
Epoch [80/80], Step [400/500] Loss: 0.1287
Epoch [80/80], Step [500/500] Loss: 0.1936
Test Accuracy of the model on the 10000 test images: 81.58 %


In [15]:
print(alexNet)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace)
    (3): Dropout(p=0.5)
    (4): Linear(in_features=4096, out_feature