# Pytorch Tutorial

Pytorch is a popular deep learning framework and it's easy to get started.

In [1]:
import torch
import torch.nn as nn
import torch.utils.data as data
import torchvision
import torchvision.transforms as transforms
from tqdm import tqdm
import time
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(torch.__version__)
print(torchvision.__version__)
print(device)

BATCH_SIZE = 128
NUM_EPOCHS = 10
LEARNING_RATE = 0.001

1.6.0
0.7.0
cpu


First, we read the mnist data, preprocess them and encapsulate them into dataloader form.

In [2]:
# preprocessing
normalize = transforms.Normalize(mean=[.5], std=[.5])
transform = transforms.Compose([transforms.ToTensor(), normalize])

# download and load the data
train_dataset = torchvision.datasets.MNIST(root='./mnist/', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.MNIST(root='./mnist/', train=False, transform=transform, download=False)

# encapsulate them into dataloader form
train_loader = data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
test_loader = data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=True) 

print(len(train_dataset), len(test_dataset))

60000 10000


Then, we define the model, object function and optimizer that we use to classify.

In [3]:
class SimpleNet(nn.Module):
# TODO:define model

    def __init__(self):
        super(SimpleNet, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 6, 5), # in_channels, out_channels, kernel_size
            nn.Sigmoid(),
            nn.MaxPool2d(2, 2), # kernel_size, stride
            nn.Dropout(0.15),    # drop_prob
            nn.Conv2d(6, 16, 5),
            nn.Sigmoid(),
            nn.MaxPool2d(2, 2),
            nn.Dropout(0.15)
        )
        self.fc = nn.Sequential(
            nn.Linear(256, 120),
            nn.Sigmoid(),
            nn.Linear(120, 84),
            nn.Sigmoid(),
            nn.Linear(84, 10)
        )

    def forward(self, img):
        feature = self.conv(img)
        output = self.fc(feature.view(img.shape[0], -1))
        return output
    
model = SimpleNet()

# TODO:define loss function and optimiter
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)

# initialize parameters
for param in model.parameters():
    nn.init.normal_(param, mean=0, std=0.01) 

print(model)

SimpleNet(
  (conv): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): Sigmoid()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Dropout(p=0.15, inplace=False)
    (4): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (5): Sigmoid()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Dropout(p=0.15, inplace=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=256, out_features=120, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=120, out_features=84, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=84, out_features=10, bias=True)
  )
)


Next, we can start to train and evaluate!

In [4]:
# train and evaluate
for epoch in range(NUM_EPOCHS):
    
    train_l_sum,  batch_count, start = 0.0, 0, time.time()
    
    model.train() # 训练模式
 
    for images, labels in tqdm(train_loader):
        
        images = images.to(device)
        labels = labels.to(device)
        labels_hat = model(images)
        l = criterion(labels_hat, labels)
        optimizer.zero_grad()
        l.backward()
        optimizer.step()
        train_l_sum += l.cpu().item()
        batch_count += 1

    train_acc_sum, train_n, test_acc_sum, test_n = 0.0, 0, 0.0, 0

    model.eval() # 评估模式, 这会关闭dropout
    
    for images, labels in tqdm(train_loader):
        train_acc_sum += (model(images.to(device)).argmax(dim=1) == labels.to(device)).float().sum().cpu().item()
        train_n += labels.shape[0]
        
    for images, labels in tqdm(test_loader):
        test_acc_sum += (model(images.to(device)).argmax(dim=1) == labels.to(device)).float().sum().cpu().item()
        test_n += labels.shape[0]
    
    print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
          % (epoch + 1, train_l_sum / batch_count, train_acc_sum / train_n, test_acc_sum / test_n, time.time() - start))   

100%|██████████| 468/468 [00:12<00:00, 38.76it/s]
100%|██████████| 468/468 [00:07<00:00, 58.80it/s]
100%|██████████| 78/78 [00:01<00:00, 59.49it/s]
  1%|          | 4/468 [00:00<00:12, 36.50it/s]

epoch 1, loss 2.2233, train acc 0.263, test acc 0.264, time 21.3 sec


100%|██████████| 468/468 [00:12<00:00, 38.14it/s]
100%|██████████| 468/468 [00:07<00:00, 59.08it/s]
100%|██████████| 78/78 [00:01<00:00, 59.49it/s]
  1%|          | 4/468 [00:00<00:12, 37.01it/s]

epoch 2, loss 1.6163, train acc 0.627, test acc 0.623, time 21.5 sec


100%|██████████| 468/468 [00:12<00:00, 37.99it/s]
100%|██████████| 468/468 [00:08<00:00, 58.44it/s]
100%|██████████| 78/78 [00:01<00:00, 56.23it/s]
  1%|          | 4/468 [00:00<00:12, 37.35it/s]

epoch 3, loss 0.7848, train acc 0.904, test acc 0.906, time 21.7 sec


100%|██████████| 468/468 [00:12<00:00, 37.19it/s]
100%|██████████| 468/468 [00:08<00:00, 57.62it/s]
100%|██████████| 78/78 [00:01<00:00, 59.88it/s]
  1%|          | 5/468 [00:00<00:11, 41.46it/s]

epoch 4, loss 0.2831, train acc 0.955, test acc 0.957, time 22.0 sec


100%|██████████| 468/468 [00:12<00:00, 37.64it/s]
100%|██████████| 468/468 [00:08<00:00, 58.41it/s]
100%|██████████| 78/78 [00:01<00:00, 58.97it/s]
  1%|          | 4/468 [00:00<00:12, 37.70it/s]

epoch 5, loss 0.1859, train acc 0.963, test acc 0.964, time 21.8 sec


100%|██████████| 468/468 [00:12<00:00, 37.69it/s]
100%|██████████| 468/468 [00:08<00:00, 58.18it/s]
100%|██████████| 78/78 [00:01<00:00, 58.16it/s]
  1%|          | 4/468 [00:00<00:11, 39.97it/s]

epoch 6, loss 0.1513, train acc 0.969, test acc 0.971, time 21.8 sec


100%|██████████| 468/468 [00:12<00:00, 37.94it/s]
100%|██████████| 468/468 [00:07<00:00, 58.65it/s]
100%|██████████| 78/78 [00:01<00:00, 59.17it/s]
  1%|          | 4/468 [00:00<00:11, 39.57it/s]

epoch 7, loss 0.1314, train acc 0.975, test acc 0.975, time 21.6 sec


100%|██████████| 468/468 [00:12<00:00, 38.35it/s]
100%|██████████| 468/468 [00:07<00:00, 58.61it/s]
100%|██████████| 78/78 [00:01<00:00, 58.97it/s]
  1%|          | 5/468 [00:00<00:11, 39.49it/s]

epoch 8, loss 0.1189, train acc 0.977, test acc 0.977, time 21.5 sec


100%|██████████| 468/468 [00:12<00:00, 37.89it/s]
100%|██████████| 468/468 [00:08<00:00, 58.35it/s]
100%|██████████| 78/78 [00:01<00:00, 57.81it/s]
  1%|          | 4/468 [00:00<00:11, 38.80it/s]

epoch 9, loss 0.1087, train acc 0.980, test acc 0.979, time 21.7 sec


100%|██████████| 468/468 [00:12<00:00, 38.12it/s]
100%|██████████| 468/468 [00:08<00:00, 58.42it/s]
100%|██████████| 78/78 [00:01<00:00, 59.02it/s]

epoch 10, loss 0.1020, train acc 0.981, test acc 0.979, time 21.6 sec





#### Q5:
Please print the training and testing accuracy.

In [6]:
train_accuracy=train_acc_sum / train_n
test_accuracy=test_acc_sum / test_n
print('Training accuracy: %0.2f%%' % (train_accuracy*100))
print('Testing accuracy: %0.2f%%' % (test_accuracy*100))

Training accuracy: 98.13%
Testing accuracy: 97.92%


### Results

|     | Training accuracy(%) | Testing accuracy(%) |
|:---:|:--------------------:|:-------------------:|
|  Q5 |         98.13        |        97.92        |