In [18]:
# Load in relevant libraries, and alias where appropriate
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torchsummary import summary
from torch.utils.tensorboard import SummaryWriter

# Define relevant variables for the ML task
batch_size = 64
num_classes = 10
learning_rate = 0.001
num_epochs = 10

# Device will determine whether to run the training on GPU or CPU.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
#Loading the dataset and preprocessing
train_dataset = torchvision.datasets.MNIST(root = './data',
                                           train = True,
                                           transform = transforms.Compose([
                                                  transforms.Resize((32,32)),
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1307,), std = (0.3081,))]),
                                           download = True)


test_dataset = torchvision.datasets.MNIST(root = './data',
                                          train = False,
                                          transform = transforms.Compose([
                                                  transforms.Resize((32,32)),
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1325,), std = (0.3105,))]),
                                          download=True)


train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = batch_size,
                                           shuffle = True, num_workers=6)


test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                           batch_size = batch_size,
                                           shuffle = True, num_workers=6)

In [5]:
#Defining the convolutional neural network
class LeNet5(nn.Module):
    def __init__(self, num_classes):
        super(LeNet5, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(6),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.fc = nn.Linear(400, 120)
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(120, 84)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(84, num_classes)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.relu(out)
        out = self.fc1(out)
        out = self.relu1(out)
        out = self.fc2(out)
        return out

In [15]:
class AlexNet2(nn.Module):
    def __init__(self, num_classes, dropout):
        super(AlexNet2, self).__init__()
        self.net = nn.Sequential(
            nn.Upsample(scale_factor=8, mode='bilinear'),
            nn.Conv2d(in_channels=1, out_channels=96, kernel_size=11, stride=4, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Flatten(),
            nn.Linear(in_features=9216, out_features=4096),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            nn.Linear(in_features=4096, out_features=num_classes)
        )

    def forward(self, x):
        return self.net(x)

In [16]:
model = AlexNet2(num_classes, 0.5).to(device)
for params in model.parameters():
    print(params.size())


torch.Size([96, 1, 11, 11])
torch.Size([96])
torch.Size([256, 96, 5, 5])
torch.Size([256])
torch.Size([384, 256, 3, 3])
torch.Size([384])
torch.Size([384, 384, 3, 3])
torch.Size([384])
torch.Size([256, 384, 3, 3])
torch.Size([256])
torch.Size([4096, 9216])
torch.Size([4096])
torch.Size([4096, 4096])
torch.Size([4096])
torch.Size([10, 4096])
torch.Size([10])


In [17]:
summary(model, input_size=(1, 32, 32), batch_size=-1)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
          Upsample-1          [-1, 1, 256, 256]               0
            Conv2d-2           [-1, 96, 62, 62]          11,712
              ReLU-3           [-1, 96, 62, 62]               0
         MaxPool2d-4           [-1, 96, 30, 30]               0
            Conv2d-5          [-1, 256, 30, 30]         614,656
              ReLU-6          [-1, 256, 30, 30]               0
         MaxPool2d-7          [-1, 256, 14, 14]               0
            Conv2d-8          [-1, 384, 14, 14]         885,120
              ReLU-9          [-1, 384, 14, 14]               0
           Conv2d-10          [-1, 384, 14, 14]       1,327,488
             ReLU-11          [-1, 384, 14, 14]               0
           Conv2d-12          [-1, 256, 14, 14]         884,992
             ReLU-13          [-1, 256, 14, 14]               0
        MaxPool2d-14            [-1, 25

In [6]:
#Setting the loss function
cost = nn.CrossEntropyLoss()

#Setting the optimizer with the model parameters and learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

#this is defined to print how many steps are remaining when training
total_step = len(train_loader)

In [7]:
train_losses = []

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        #Forward pass
        outputs = model(images)
        loss = cost(outputs, labels)
        train_losses.append(loss.item())

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 400 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
        		           .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

Epoch [1/10], Step [400/938], Loss: 0.1266
Epoch [1/10], Step [800/938], Loss: 0.1225
Epoch [2/10], Step [400/938], Loss: 0.0892
Epoch [2/10], Step [800/938], Loss: 0.0213
Epoch [3/10], Step [400/938], Loss: 0.0086
Epoch [3/10], Step [800/938], Loss: 0.0068
Epoch [4/10], Step [400/938], Loss: 0.0137
Epoch [4/10], Step [800/938], Loss: 0.0017
Epoch [5/10], Step [400/938], Loss: 0.0160
Epoch [5/10], Step [800/938], Loss: 0.0171
Epoch [6/10], Step [400/938], Loss: 0.0034
Epoch [6/10], Step [800/938], Loss: 0.0093
Epoch [7/10], Step [400/938], Loss: 0.0083
Epoch [7/10], Step [800/938], Loss: 0.0414
Epoch [8/10], Step [400/938], Loss: 0.0004
Epoch [8/10], Step [800/938], Loss: 0.0024
Epoch [9/10], Step [400/938], Loss: 0.0005
Epoch [9/10], Step [800/938], Loss: 0.0001
Epoch [10/10], Step [400/938], Loss: 0.0001
Epoch [10/10], Step [800/938], Loss: 0.0009


In [8]:
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)

with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))

Accuracy of the network on the 10000 test images: 99.04 %


In [None]:
plt.figure()
plt.plot(train_losses)
plt.title('train losses')
plt.xlabel('step')
plt.ylabel('percent')

In [7]:
print(train_losses[:10])

[2.3060343265533447, 2.2772130966186523, 2.2504353523254395, 2.2140092849731445, 2.163278579711914, 2.130688190460205, 2.088505983352661, 2.036684036254883, 2.0306613445281982, 1.9066760540008545]


In [None]:
#定义超参数，采用SGD作为优化器
learning_rate = 0.001
batch_size = 256
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
loss_fn = nn.CrossEntropyLoss()
model.to(device)
loss_list = []
acc_list = []
epoch_num = []

def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)

#定义训练循环和测试循环
def train_loop(dataloader, model, loss_fn, optimizer, epoch):
    size = len(dataloader.dataset)
    for t in range(epoch):
        print(f"Epoch {t+1}\n-------------------------------")
        running_loss = 0
        for batch, (X, y) in enumerate(dataloader):
            X, y = X.to(device), y.to(device)
            pred = model(X)
            loss = loss_fn(pred, y)
            running_loss += loss
            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if batch % 50 == 49:
                writer.add_scalar('training loss',
                                running_loss / 50,
                                epoch * len(dataloader)+batch+1)

                loss, current = loss.item(), (batch+1) * len(X)
                loss_list.append(loss), epoch_num.append(t+current/size)
                print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
                running_loss = 0

        test_loop(test_iter, model, loss_fn)

def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    acc_list.append(correct)
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f}")

In [None]:
#开始训练(epoch=30)
model.apply(init_weights)
writer = SummaryWriter()
train_loop(train_iter, model, loss_fn, optimizer, 30)