In [6]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torchsummary import summary
import torch.nn.functional as F
import time
import numpy as np

## 读取数据

## 定义模型

In [7]:
class AlexNetModel(nn.Module):
    def __init__(self):
        super(AlexNetModel, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=96, kernel_size=11, stride=4)
        self.subSampling1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, padding=2)
        self.subSampling2 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.conv3 = nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, padding=1)
        self.subSampling3 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.linear1 = nn.Linear(256*6*6, 4096)
        self.linear2 = nn.Linear(4096, 4096)
        self.linear3 = nn.Linear(4096, 10)
    def forward(self, x):
        x1 = F.relu(self.conv1(x))
        x2 = self.subSampling1(x1)
        x3 = F.relu(self.conv2(x2))
        x4 = self.subSampling2(x3)
        x5 = F.relu(self.conv3(x4))
        x6 = F.relu(self.conv4(x5))
        x7 = F.relu(self.conv5(x6))
        x8 = self.subSampling3(x7)
        x9 = F.dropout(F.relu(self.linear1(x8.view(x8.shape[0], -1))), p=0.5)
        x10 = F.dropout(F.relu(self.linear2(x9)), p=0.5)
        y = self.linear3(x10)
        return y

In [11]:
def training_loop(net, optimizer, loss, n_epochs, train_set, test_set, batch_size=64):
    train_iter = torch.utils.data.DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)
    test_iter = torch.utils.data.DataLoader(dataset=test_set, batch_size=batch_size, shuffle=False)
    net = net.to('cuda')
    for epoch in range(n_epochs):
        train_loss_sum , batch_count, start = 0, 0, time.time()
        for X, y in train_iter:
            X = X.to('cuda')
            y = y.to('cuda')
            y_pre = net(X)
            l = loss(y_pre, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_loss_sum += l.item()
            batch_count += 1
        print("epochs: %d, loss: %2f, time: %2f sec" % (epoch+1, train_loss_sum / batch_count, 
                                                       (time.time() - start)))
        

In [15]:
if __name__ == "__main__":
    savePath = '../Chapter5/Fashion-MNIST-AlexNet/'
    trans = []
    trans.append(torchvision.transforms.Resize(size=227))
    trans.append(torchvision.transforms.ToTensor())

    transform = torchvision.transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root=savePath, train=True, download=True, transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root=savePath, train=False, download=True, transform=transform)
    
    lr = 0.001
    batch_size = 8
    n_epochs = 51      
    net = AlexNetModel()
    print(net)
    optimizer = torch.optim.Adam(net.parameters(), lr=lr)
    loss = torch.nn.CrossEntropyLoss()
    print(net)
    #summary(net.to('cuda'), (1, 28, 28))
    training_loop(net=net, optimizer=optimizer, loss=loss, n_epochs=n_epochs, train_set=mnist_train,
                  test_set=mnist_test, batch_size=batch_size)
    

AlexNetModel(
  (conv1): Conv2d(1, 96, kernel_size=(11, 11), stride=(4, 4))
  (subSampling1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (subSampling2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv5): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (subSampling3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (linear1): Linear(in_features=9216, out_features=4096, bias=True)
  (linear2): Linear(in_features=4096, out_features=4096, bias=True)
  (linear3): Linear(in_features=4096, out_features=10, bias=True)
)
AlexNetModel(
  (conv1): Conv2d(1, 96, kernel_size=(11, 11), stride=(4, 4))
  (subSampling1): MaxPool2d(kernel_size=2, stride=

RuntimeError: CUDA out of memory. Tried to allocate 144.00 MiB (GPU 0; 1.96 GiB total capacity; 784.91 MiB already allocated; 92.62 MiB free; 79.09 MiB cached)