# PyTorch: MNIST dataset using GPU

In [1]:
import torch
import numpy as np
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data.dataloader import DataLoader

In [2]:
dataset = MNIST(root='dataset/', download=True, transform=ToTensor())

In [3]:
def split_indices(n, val_pct):
    n_val = int(val_pct * n)
    idxs = np.random.permutation(n)
    return idxs[n_val:], idxs[:n_val]

In [4]:
train_idxs, val_idxs = split_indices(len(dataset), val_pct=0.2)

len(train_idxs), len(val_idxs)

(48000, 12000)

In [5]:
batch_size = 128

train_sampler = SubsetRandomSampler(train_idxs)
train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)

val_sampler = SubsetRandomSampler(val_idxs)
val_loader = DataLoader(dataset, batch_size=batch_size, sampler=val_sampler)

In [6]:
class MnistModel (nn.Module):
    def __init__(self, in_size, hidden_size, out_size):
        super().__init__()
        self.full_connecton_1 = nn.Linear(in_size, hidden_size[0])
        self.full_connecton_2 = nn.Linear(hidden_size[0], hidden_size[1])
        self.full_connecton_3 = nn.Linear(hidden_size[1], out_size)
    
    def forward(self, xb):
        xb = xb.view(xb.size(0), -1)
        out = F.relu(self.full_connecton_1(xb))
        out = F.relu(self.full_connecton_2(out))
        out = self.full_connecton_3(out)
        return out

In [7]:
input_size = 28 * 28
hidden_size = [256, 32]
out_size = 10

In [8]:
model = MnistModel(input_size, hidden_size, out_size)

In [9]:
for t in model.parameters():
    print(t.shape)

torch.Size([256, 784])
torch.Size([256])
torch.Size([32, 256])
torch.Size([32])
torch.Size([10, 32])
torch.Size([10])


In [10]:
for images, labels in train_loader:
    print('images.shape:', images.shape)
    outputs = model(images)
    loss = F.cross_entropy(outputs, labels)
    print('loss', loss.item())
    break

print('outputs.shape', outputs.shape)
print('Sample outputs:\n', outputs[:2].data)

images.shape: torch.Size([128, 1, 28, 28])
loss 2.310922384262085
outputs.shape torch.Size([128, 10])
Sample outputs:
 tensor([[ 0.0578,  0.1278, -0.1149,  0.2035, -0.0403, -0.0136, -0.0651,  0.0116,
          0.1817,  0.0012],
        [ 0.0448,  0.1191, -0.0909,  0.1975, -0.0002, -0.0284, -0.0627, -0.0075,
          0.2119, -0.0202]])


### Using GPU

In [11]:
torch.cuda.is_available()

True

In [12]:
def get_default_device():
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')

In [13]:
device = get_default_device()
device

device(type='cuda')

In [14]:
def to_device(data, device):
    if isinstance(data, (list, tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

In [15]:
for images, labels in train_loader:
    print(images.shape)
    images = to_device(images, device)
    print(images.device)
    break

torch.Size([128, 1, 28, 28])
cuda:0


In [16]:
class DeviceDataLoader:
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
    
    def __iter__(self):
        for b in self.dl:
            yield to_device(b, self.device)

    def __len__(self):
        return len(self.dl)

In [17]:
train_loader = DeviceDataLoader(train_loader, device)
val_loader = DeviceDataLoader(val_loader, device)

In [18]:
for xb, yb in val_loader:
    print(xb.device)
    print(yb)
    break

cuda:0
tensor([4, 8, 9, 9, 1, 1, 5, 5, 2, 2, 5, 4, 2, 7, 1, 4, 2, 8, 6, 2, 7, 4, 5, 1,
        5, 7, 9, 9, 9, 3, 2, 2, 1, 4, 8, 3, 2, 2, 9, 1, 0, 7, 8, 0, 1, 3, 1, 9,
        7, 5, 0, 2, 7, 8, 9, 5, 7, 3, 8, 6, 3, 6, 3, 9, 9, 7, 4, 6, 1, 3, 8, 7,
        9, 5, 3, 1, 7, 5, 1, 7, 7, 4, 2, 3, 3, 7, 2, 5, 7, 5, 6, 6, 9, 0, 1, 1,
        1, 2, 2, 1, 0, 2, 2, 7, 6, 4, 7, 9, 5, 5, 1, 3, 9, 9, 4, 5, 3, 7, 8, 0,
        5, 9, 9, 5, 5, 8, 1, 3], device='cuda:0')


In [19]:
def loss_batch(model, loss_func, xb, yb, opt=None, metric=None):
    preds = model(xb)
    loss = loss_func(preds, yb)

    if opt is not None:
        loss.backward()
        opt.step()
        opt.zero_grad()
    
    metric_result = None
    if metric is not None:
        metric_result = metric(preds, yb)
    
    return loss.item(), len(xb), metric_result

In [20]:
def fit(epochs, lr, model, loss_fn, train_dl, valid_dl, opt_fn=None, metric=None):
    losses, metrics = [], []

    if opt_fn is None: opt_fn = torch.optim.SGD
    opt = opt_fn(model.parameters(), lr=lr)
    
    for epoch in range(epochs):
        for xb, yb in train_dl:
            loss, _, _ = loss_batch(model, loss_fn, xb, yb, opt)
        
        result = evaluate(model, loss_fn, valid_dl, metric)
        val_loss, total, val_metric = result

        losses.append(val_loss)
        metrics.append(val_metric)

        if metric is None:
            print('Epoch [{}/{}], Loss: {:.5f}'.format(epoch + 1, epochs, val_loss))
        else:
            print('Epoch [{}/{}], Loss: {:.5f}, {}: {:.5f}'.format(epoch + 1, epochs, val_loss, metric.__name__, val_metric))
    
    return losses, metrics