In [1]:
# Load in relevant libraries, and alias where appropriate
import torch
import torch.nn as nn
import torch.nn.functional  as F

import torchvision
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler

from torchinfo import summary

import numpy as np
import matplotlib.pyplot as plt

# Define relevant variables for the Machine Learning task
batch_size = 64
num_classes = 10
learning_rate = 0.005
num_epochs = 30

# Device will determine whether to run the training on GPU or CPU.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print("using device: %s" % device)

using device: cuda


In [2]:
def get_train_valid_loader(data_dir,
                           batch_size,
                           random_seed,
                           valid_size=0.1,
                           shuffle=True):
    normalize = transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    )

    # define transforms
    valid_transform = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        normalize,
    ])

    train_transform = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        normalize,
    ])

    # load the dataset
    train_dataset = datasets.CIFAR10(
        root=data_dir, train=True,
        download=True, transform=train_transform,
    )

    valid_dataset = datasets.CIFAR10(
        root=data_dir, train=True,
        download=True, transform=valid_transform,
    )

    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler)
 
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=batch_size, sampler=valid_sampler)
    
    # MNIST dataset samples
    class_names = train_dataset.classes
    print(train_dataset)
    print(valid_dataset)

    print(train_dataset[0][0].shape)
    print(class_names)
    # torch.manual_seed(0)

    # fig = plt.figure(figsize=(16, 4))
    # rows, cols = 2, 10

    # for i in range(1, (rows*cols) + 1):
    #     rand_ind = torch.randint(0, len(train_dataset), size=[1]).item()
    #     img, label = train_dataset[rand_ind]
    #     img = img.numpy()
    #     # print(img.numpy().shape) # [3, 224, 224]
    #     img = np.transpose(img, (1, 2, 0)) # 把channel那一维放到最后
    #     # img = img * [0.485, 0.456, 0.406] + [0.229, 0.224, 0.225]   # unnormalize
    #     fig.add_subplot(rows, cols, i)
    #     plt.imshow(img)
    #     plt.title(f"{class_names[label]}")
    #     plt.axis(False)
    #     plt.tight_layout()

    return (train_loader, valid_loader)


def get_test_loader(data_dir,
                    batch_size,
                    shuffle=True):
    normalize = transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    )

    # define transform
    transform = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        normalize,
    ])

    dataset = datasets.CIFAR10(
        root=data_dir, train=False,
        download=True, transform=transform,
    )

    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, shuffle=shuffle
    )

    return data_loader

# CIFAR10 dataset 
train_loader, valid_loader = get_train_valid_loader(data_dir='./data', batch_size=batch_size, random_seed=1)

test_loader = get_test_loader(data_dir='./data', batch_size=batch_size)

Files already downloaded and verified
Files already downloaded and verified
Dataset CIFAR10
    Number of datapoints: 50000
    Root location: ./data
    Split: Train
    StandardTransform
Transform: Compose(
               Resize(size=(224, 224), interpolation=PIL.Image.BILINEAR)
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )
Dataset CIFAR10
    Number of datapoints: 50000
    Root location: ./data
    Split: Train
    StandardTransform
Transform: Compose(
               Resize(size=(224, 224), interpolation=PIL.Image.BILINEAR)
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )
torch.Size([3, 224, 224])
['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
Files already downloaded and verified


In [3]:
# Define the deep convolutional neural network AlexNet
class AlexNet(nn.Module):
    def __init__(self, num_classes: int = 10, dropout: float = 0.5) -> None:
        super().__init__()
        self.features = nn.Sequential(
            # 这里，我们使用一个11*11的更大窗口来捕捉对象。
            # 同时，步幅为4，以减少输出的高度和宽度。
            # 另外，输出通道的数目远大于LeNet
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            # 减小卷积窗口，使用填充为2来使得输入与输出的高和宽一致，且增大输出通道数
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            # 使用三个连续的卷积层和较小的卷积窗口。
            # 除了最后的卷积层，输出通道的数量进一步增加。
            # 在前两个卷积层之后，汇聚层不用于减少输入的高度和宽度
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
        self.classifier = nn.Sequential(
            # 这里，全连接层的输出数量是LeNet中的好几倍。使用dropout层来减轻过拟合
            nn.Dropout(p=dropout),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            # 最后是输出层
            nn.Linear(4096, num_classes)
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


In [4]:
model = AlexNet()
# print(model)
summary(model=model, input_size=(1, 3, 224, 224), col_width=20, 
        col_names=['input_size', 'output_size', 'num_params', 'trainable'], 
        row_settings=['var_names'], verbose=0)

Layer (type (var_name))                  Input Shape          Output Shape         Param #              Trainable
AlexNet (AlexNet)                        [1, 3, 224, 224]     [1, 10]              --                   True
├─Sequential (features)                  [1, 3, 224, 224]     [1, 256, 6, 6]       --                   True
│    └─Conv2d (0)                        [1, 3, 224, 224]     [1, 96, 55, 55]      34,944               True
│    └─ReLU (1)                          [1, 96, 55, 55]      [1, 96, 55, 55]      --                   --
│    └─MaxPool2d (2)                     [1, 96, 55, 55]      [1, 96, 27, 27]      --                   --
│    └─Conv2d (3)                        [1, 96, 27, 27]      [1, 256, 27, 27]     614,656              True
│    └─ReLU (4)                          [1, 256, 27, 27]     [1, 256, 27, 27]     --                   --
│    └─MaxPool2d (5)                     [1, 256, 27, 27]     [1, 256, 13, 13]     --                   --
│    └─Conv2d (6)     

In [5]:
model = AlexNet(num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=0.005, momentum=0.9)  

# Train the model
total_step = len(train_loader)

for epoch in range(num_epochs):
    model.train()   # sets the module in training mode
    for i, (images, labels) in enumerate(train_loader):  
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
           .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
            
    # Validation
    model.eval()    # sets the module in evaluation mode
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in valid_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs
    
        print('Accuracy of the network on the {} validation images: {} %' 
              .format(5000, 100 * correct / total))

Epoch [1/30], Step [704/704], Loss: 1.7298
Accuracy of the network on the 5000 validation images: 35.24 %
Epoch [2/30], Step [704/704], Loss: 0.8920
Accuracy of the network on the 5000 validation images: 51.44 %
Epoch [3/30], Step [704/704], Loss: 1.3447
Accuracy of the network on the 5000 validation images: 62.3 %
Epoch [4/30], Step [704/704], Loss: 1.9061
Accuracy of the network on the 5000 validation images: 61.86 %
Epoch [5/30], Step [704/704], Loss: 0.4215
Accuracy of the network on the 5000 validation images: 67.58 %
Epoch [6/30], Step [704/704], Loss: 0.1987
Accuracy of the network on the 5000 validation images: 71.48 %
Epoch [7/30], Step [704/704], Loss: 0.5428
Accuracy of the network on the 5000 validation images: 73.1 %
Epoch [8/30], Step [704/704], Loss: 0.8499
Accuracy of the network on the 5000 validation images: 74.0 %
Epoch [9/30], Step [704/704], Loss: 1.0260
Accuracy of the network on the 5000 validation images: 77.2 %
Epoch [10/30], Step [704/704], Loss: 0.9407
Accura

In [6]:
model.eval()    # sets the module in evaluation mode
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        del images, labels, outputs

    print('Accuracy of the network on the {} test images: {} %' 
          .format(10000, 100 * correct / total))

Accuracy of the network on the 10000 test images: 82.88 %


In [7]:
# EXPORTING A MODEL FROM PYTORCH TO ONNX
model = model.to('cpu')
model.eval()
x = torch.randn(size=(1, 3, 224, 224), dtype=torch.float32, requires_grad=True)
torch.onnx.export(model,                                        # model being run
                  x,                                            # model input (or a tuple for multiple inputs)
                  "AlexNet.onnx",                               # where to save the model (can be a file or file-like object)
                  export_params=True,                           # store the trained parameter weights inside the model file
                  opset_version=11,                             # the ONNX version to export the model to
                  do_constant_folding=True,                     # whether to execute constant folding for optimization
                  input_names = ['input'],                      # the model's input names
                  output_names = ['output'],                    # the model's output names
                  dynamic_axes={'input' : {0 : 'batch_size'},   # variable length axes
                                'output' : {0 : 'batch_size'}})