In [1]:
import numpy as np # linear algebra
import struct
from array import array
from os.path  import join

#
# MNIST Data Loader Class
#
class MnistDataloader(object):
    def __init__(self, training_images_filepath,training_labels_filepath,
                 test_images_filepath, test_labels_filepath):
        self.training_images_filepath = training_images_filepath
        self.training_labels_filepath = training_labels_filepath
        self.test_images_filepath = test_images_filepath
        self.test_labels_filepath = test_labels_filepath
    
    def read_images_labels(self, images_filepath, labels_filepath):        
        labels = []
        with open(labels_filepath, 'rb') as file:
            magic, size = struct.unpack(">II", file.read(8))
            if magic != 2049:
                raise ValueError('Magic number mismatch, expected 2049, got {}'.format(magic))
            labels = array("B", file.read())        
        
        with open(images_filepath, 'rb') as file:
            magic, size, rows, cols = struct.unpack(">IIII", file.read(16))
            if magic != 2051:
                raise ValueError('Magic number mismatch, expected 2051, got {}'.format(magic))
            image_data = array("B", file.read())        
        images = []
        for i in range(size):
            images.append([0] * rows * cols)
        for i in range(size):
            img = np.array(image_data[i * rows * cols:(i + 1) * rows * cols])
            img = img.reshape(28, 28)
            images[i][:] = img            
        
        return images, labels
            
    def load_data(self):
        x_train, y_train = self.read_images_labels(self.training_images_filepath, self.training_labels_filepath)
        x_test, y_test = self.read_images_labels(self.test_images_filepath, self.test_labels_filepath)
        return (x_train, y_train),(x_test, y_test)  
(x_train_raw, y_train_raw),(x_test_raw, y_test_raw) = MnistDataloader(
    "data/mnist/train-images.idx3-ubyte",
    "data/mnist/train-labels.idx1-ubyte",
    "data/mnist/t10k-images.idx3-ubyte",
    "data/mnist/t10k-labels.idx1-ubyte"
    ).load_data()


In [2]:
import torch.nn.functional as F

In [3]:
import torch
from torch import nn
from torch.autograd import Variable
from torch.distributions import Categorical

In [4]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
# model = nn.Sequential(
#     nn.Conv2d(1, 2, 3),
#     nn.MaxPool2d(2, 2),
#     nn.Flatten(1),
#     nn.ReLU(),
#     nn.Linear(338, 10),
#     nn.ReLU(),
#     nn.Linear(10, 10),
#     nn.Softmax(),
# ).to(device)

model = nn.Sequential(
    nn.Conv2d(1, 6, 3),
    nn.ReLU(),
    nn.MaxPool2d(2, 2),
    nn.Conv2d(6, 16, 3),
    nn.ReLU(),
    nn.MaxPool2d(2, 2),
    nn.Flatten(1),
    nn.Linear(400, 10),
    nn.Softmax(),
).to(device)

# model = nn.Sequential(
#     nn.Conv2d(1, 2, 5),
#     nn.ReLU(),
#     nn.MaxPool2d(2, 2),
#     nn.Conv2d(2, 10, 5),
#     nn.ReLU(),
#     nn.MaxPool2d(2, 2),
#     nn.Flatten(1),
#     nn.Linear(160, 40),
#     nn.ReLU(),
#     nn.Linear(40, 10),
#     nn.Softmax(),
# ).to(device)

# er_f = nn.HuberLoss()
er_f = nn.CrossEntropyLoss()
optim = torch.optim.SGD(model.parameters(), lr=0.01, momentum=.9)

In [6]:
train_data = torch.tensor(x_train_raw, dtype=torch.float, device=device, requires_grad=True).unsqueeze(1)

  train_data = torch.tensor(x_train_raw, dtype=torch.float, device=device, requires_grad=True).unsqueeze(1)


In [7]:
train_labels = torch.zeros(train_data.shape[0], 10, requires_grad=False)
for i, y in zip(range(train_data.shape[0]), y_train_raw):
    train_labels[i, int(y)] = 1
train_labels = train_labels.to(device)
# train_labels = torch.tensor(y_train_raw, dtype=torch.float, device=device, requires_grad=False)

In [8]:
total_epochs = 0

In [14]:
optim = torch.optim.SGD(model.parameters(), lr=0.01, momentum=.9)

In [18]:
for epoch in range(500):
    optim.zero_grad()
    outs = model(train_data)
    # outs = outs.max(1).indices
    # predicted = Categorical(outs).sample(1).item()
    loss = er_f(outs, train_labels)
    print("outs", outs.shape, "labels", train_labels.shape)
    loss.backward()
    optim.step()
    total_epochs += 1
    writer.add_scalar("loss", loss, total_epochs)

  return self._call_impl(*args, **kwargs)


outs torch.Size([60000, 10]) labels torch.Size([60000, 10])
outs torch.Size([60000, 10]) labels torch.Size([60000, 10])


KeyboardInterrupt: 

In [12]:
test_data = torch.tensor(x_test_raw, dtype=torch.float, device=device, requires_grad=False).unsqueeze(1)
test_labels = torch.tensor(y_test_raw, dtype=torch.int, device=device, requires_grad=False)

In [55]:
predicted = model(test_data).max(1).indices

In [56]:
(predicted == test_labels).sum()

tensor(8425, device='cuda:0')

In [57]:
torch.save(model, "model90.pt")