In [1]:
# import libraries
from mlxtend.data import loadlocal_mnist
import torch
import torchvision
from torchvision.datasets import MNIST
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
X, y = loadlocal_mnist(
        images_path='data\\train-images.idx3-ubyte', 
        labels_path='data\\train-labels.idx1-ubyte')
print(X)
print(y)

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
[5 0 4 ... 5 6 8]


In [3]:
X.shape

(60000, 784)

In [4]:
y.shape

(60000,)

In [5]:
type(X)

numpy.ndarray

In [6]:
type(y)

numpy.ndarray

In [7]:
inputs = torch.from_numpy(X)
outputs = torch.from_numpy(y)

print(inputs)
print(outputs)

tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], dtype=torch.uint8)
tensor([5, 0, 4,  ..., 5, 6, 8], dtype=torch.uint8)


In [8]:
for image in inputs:
    print(image)
    #print(label)
    break

tensor([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   3,  18,
         18,  18, 126, 136, 175,  26, 166, 255, 247, 127,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,  30,  36,  94, 1

In [9]:
train = TensorDataset(inputs, outputs)

In [10]:
for image, label in train:
    print(image)
    print(label)
    break

tensor([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   3,  18,
         18,  18, 126, 136, 175,  26, 166, 255, 247, 127,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,  30,  36,  94, 1

In [11]:
train_ds, val_ds = random_split(train, [50000, 10000])

In [12]:
batch_size = 100
train_loader = DataLoader(train_ds, batch_size, shuffle = True)
val_loader = DataLoader(val_ds, batch_size)

In [13]:
input_size = 28 * 28
classes = 10
model = nn.Linear(input_size, classes)

In [14]:
model.weight.shape

torch.Size([10, 784])

In [15]:
model.bias.shape

torch.Size([10])

In [16]:
for image, label in train_loader:
    out = model(image.float())
    break

print(out.shape)
print(out[:2].data)

torch.Size([100, 10])
tensor([[  56.7804,   -9.8473,   42.9829,   -2.2877,   10.5297,  -24.9129,
          -30.0376,  -69.2167,   57.1823,   10.1394],
        [ -11.3448,   53.6051,  -54.0029,    0.9911,  -10.9169,  -69.3392,
          -56.5569,   61.4682,   77.3953, -108.1011]])


In [17]:
prob = F.softmax(out, dim = 1)
print(prob[:2].data)
print(torch.sum(prob[0]).item())

tensor([[4.0087e-01, 4.6448e-30, 4.0817e-07, 8.9130e-27, 3.2852e-21, 1.3306e-36,
         7.9140e-39, 0.0000e+00, 5.9913e-01, 2.2236e-21],
        [2.8885e-39, 4.6564e-11, 0.0000e+00, 6.5777e-34, 4.4311e-39, 0.0000e+00,
         0.0000e+00, 1.2104e-07, 1.0000e+00, 0.0000e+00]])
0.9999999403953552


In [18]:
max_prob, lab = torch.max(prob, dim = 1)
print(max_prob)
print(lab)

tensor([0.5991, 1.0000, 0.9908, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.8999,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.9584, 0.9987,
        0.7573, 1.0000, 0.9433, 1.0000, 0.9001, 1.0000, 1.0000, 0.8726, 1.0000,
        0.9257, 1.0000, 1.0000, 0.8733, 1.0000, 1.0000, 1.0000, 1.0000, 0.8639,
        1.0000, 1.0000, 0.9689, 1.0000, 1.0000, 0.9980, 0.5962, 0.9932, 1.0000,
        1.0000, 1.0000, 0.9999, 1.0000, 0.9826, 0.9672, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 0.9075, 1.0000, 1.0000, 1.0000, 1.0000, 0.8444,
        1.0000, 1.0000, 1.0000, 0.7693, 1.0000, 1.0000, 0.9998, 1.0000, 0.9953,
        1.0000, 0.8593, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.9999, 1.0000,
        1.0000, 1.0000, 1.0000, 0.9999, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000], grad_fn=<MaxBackward0>)
tensor([8, 8, 1, 4, 4, 8, 0, 0, 0, 0, 2, 7, 4, 4, 4, 8, 8, 0, 8, 4, 4, 8, 8, 4,

In [19]:
label

tensor([6, 0, 0, 4, 7, 7, 8, 2, 6, 8, 1, 7, 9, 9, 4, 3, 6, 2, 7, 9, 9, 8, 6, 7,
        0, 0, 9, 2, 6, 0, 4, 5, 7, 3, 3, 7, 1, 7, 7, 1, 7, 8, 7, 6, 8, 9, 8, 1,
        0, 8, 3, 3, 5, 7, 3, 3, 1, 0, 3, 0, 5, 3, 3, 8, 1, 3, 7, 2, 7, 0, 1, 6,
        1, 3, 1, 0, 2, 2, 0, 6, 6, 8, 7, 5, 4, 6, 8, 2, 5, 8, 3, 0, 1, 9, 0, 9,
        0, 7, 2, 1], dtype=torch.uint8)

In [20]:
def accuracy(outputs, labels):
    probs, preds = torch.max(outputs, dim = 1)
    return torch.tensor(torch.sum(preds == labels).item()/len(preds))

In [21]:
acc = accuracy(out, label.long())
print(acc)

tensor(0.1100)


In [22]:
loss_fn = F.cross_entropy

In [23]:
loss = loss_fn(out, label.long())
print(loss)

tensor(78.0122, grad_fn=<NllLossBackward>)


In [24]:
class MNISTModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(input_size, classes)
    
    def training_step(self, batch):
        images, labels = batch
        pred = self.linear(images.float())
        loss = F.cross_entropy(out, labels.long())
        return loss
    
    def validation_step(self, batch):
        images, labels = batch
        pred = self.linear(images.float())
        loss = F.cross_entropy(out, labels.long())
        acc = accuracy(out, labels.long())
        return {"loss" : loss, "accuracy" : acc}
    
    def validation_epoch_end(self, outputs):
        losses = [out["loss"] for out in outputs]
        loss = torch.stack(losses).mean()
        accuracies = [out["accuracy"] for out in outputs]
        accuracy = torch.stack(accuracies).mean()
        return {"loss" : loss.item(), "accuracy" : accuracy.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}".format(epoch, result['loss'], result['accuracy']))
        

In [25]:
def evaluate(model, val_loader):
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase 
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward(retain_graph = True)
            optimizer.step()
            optimizer.zero_grad()
        # Validation phase
        result = evaluate(model, val_loader)
        model.epoch_end(epoch, result)
        history.append(result)
    return history

In [26]:
model = MNISTModel()
result0 = evaluate(model, val_loader)
result0

{'loss': 86.4020004272461, 'accuracy': 0.10250000655651093}

In [27]:
history1 = fit(5, 0.001, model, train_loader, val_loader)

Epoch [0], val_loss: 86.4020, val_acc: 0.1025
Epoch [1], val_loss: 86.4020, val_acc: 0.1025
Epoch [2], val_loss: 86.4020, val_acc: 0.1025
Epoch [3], val_loss: 86.4020, val_acc: 0.1025
Epoch [4], val_loss: 86.4020, val_acc: 0.1025


In [28]:
history2 = fit(5, 0.0001, model, train_loader, val_loader)

Epoch [0], val_loss: 86.4020, val_acc: 0.1025
Epoch [1], val_loss: 86.4020, val_acc: 0.1025
Epoch [2], val_loss: 86.4020, val_acc: 0.1025
Epoch [3], val_loss: 86.4020, val_acc: 0.1025
Epoch [4], val_loss: 86.4020, val_acc: 0.1025
