In [14]:
import torch
import torchvision.transforms  as T
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

device = torch.device('cuda')
device

device(type='cuda')

In [2]:
set = datasets.MNIST(root='./data', train=True, download=True, transform=T.ToTensor())
loader = DataLoader(set, batch_size = 64, shuffle=False)

In [3]:
def get_mean_std(loader):
    channels_sum, channels_squares_sum = 0, 0
    num_batches = len(loader)

    for data, _ in loader:
        channels_sum += torch.mean(data)
        channels_squares_sum += torch.mean(data**2)

    mean = channels_sum/num_batches
    std  = np.sqrt((channels_squares_sum/num_batches - mean**2))

    return mean, std

mean, std = get_mean_std(loader)
print(mean, std)

tensor(0.1307) tensor(0.3081)


In [4]:
trans = T.Compose([T.ToTensor(), T.Normalize(mean.item(), std.item())])

trainset = datasets.MNIST(root='./data', train=True, download=True, transform=trans)
train_loader = DataLoader(trainset, batch_size = 64, shuffle=False)

testset = datasets.MNIST(root='./data', train=False, download=True, transform=trans)
test_loader = DataLoader(testset, batch_size = 64, shuffle=False)

In [5]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.Dropout(0.2),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1),
            nn.Dropout(0.2),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.Dropout(0.2),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.Dropout(0.2),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 7 * 7, 1024),
            nn.Dropout(0.2),
            nn.LeakyReLU(inplace=True),
            nn.Linear(1024, 512),
            nn.Dropout(0.2),
            nn.LeakyReLU(inplace=True),
            nn.Linear(512, 256),
            nn.Dropout(0.2),
            nn.LeakyReLU(inplace=True),
            nn.Linear(256, 128),
            nn.Dropout(0.2),
            nn.LeakyReLU(inplace=True),
            nn.Linear(128, 64),
            nn.Dropout(0.2),
            nn.LeakyReLU(inplace=True),
            nn.Linear(64, 32),
            nn.Dropout(0.2),
            nn.LeakyReLU(inplace=True),
            nn.Linear(32, 10)
            )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        
        return x   

In [8]:
model = Net().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=0.000001)

In [9]:
model.train()

for epoch in range(25):
    running_loss = 0.0

    for i, (img, label) in enumerate(train_loader):
        img, label = img.to(device), label.to(device)
        optimizer.zero_grad()
        output = model(img)
        
        loss = criterion(output, label)
        loss.backward()

        optimizer.step()

        running_loss += loss.item()
        
    print(f'Epoch {epoch+1}: loss = {running_loss / len(train_loader):.3f}')

print('Training finished!')        

Epoch 1: loss = 0.682
Epoch 2: loss = 0.158
Epoch 3: loss = 0.104
Epoch 4: loss = 0.080
Epoch 5: loss = 0.066
Epoch 6: loss = 0.058
Epoch 7: loss = 0.051
Epoch 8: loss = 0.045
Epoch 9: loss = 0.041
Epoch 10: loss = 0.037
Epoch 11: loss = 0.035
Epoch 12: loss = 0.032
Epoch 13: loss = 0.030
Epoch 14: loss = 0.026
Epoch 15: loss = 0.028
Epoch 16: loss = 0.024
Epoch 17: loss = 0.023
Epoch 18: loss = 0.023
Epoch 19: loss = 0.021
Epoch 20: loss = 0.019
Epoch 21: loss = 0.019
Epoch 22: loss = 0.017
Epoch 23: loss = 0.016
Epoch 24: loss = 0.018
Epoch 25: loss = 0.016
Training finished!


In [10]:
correct = 0
total = 0
with torch.no_grad():
    for img, label in test_loader:
        images, labels = img.to(device), label.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the {len(test_loader)} test images: {100 * correct // total}%')

Accuracy of the network on the 157 test images: 99%
