In [1]:
from urllib import request
import gzip
import pickle
import numpy as np

filename = [
["training_images","train-images-idx3-ubyte.gz"],
["test_images","t10k-images-idx3-ubyte.gz"],
["training_labels","train-labels-idx1-ubyte.gz"],
["test_labels","t10k-labels-idx1-ubyte.gz"]
]

def download_mnist():
    base_url = "http://yann.lecun.com/exdb/mnist/"
    for name in filename:
        print("Downloading "+name[1]+"...")
        request.urlretrieve(base_url+name[1], name[1])
    print("Download complete.")

def save_mnist():
    mnist = {}
    for name in filename[:2]:
        with gzip.open(name[1], 'rb') as f:
            mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1,28*28)
    for name in filename[-2:]:
        with gzip.open(name[1], 'rb') as f:
            mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=8)
    with open("mnist.pkl", 'wb') as f:
        pickle.dump(mnist,f)
    print("Save complete.")

def init():
    download_mnist()
    save_mnist()

def load():
    with open("mnist.pkl",'rb') as f:
        mnist = pickle.load(f)
    return mnist["training_images"], mnist["training_labels"], mnist["test_images"], mnist["test_labels"]

if __name__ == '__main__':
    init()


Downloading train-images-idx3-ubyte.gz...
Downloading t10k-images-idx3-ubyte.gz...
Downloading train-labels-idx1-ubyte.gz...
Downloading t10k-labels-idx1-ubyte.gz...
Download complete.
Save complete.


In [2]:
import math
import torch
import numpy as np  
from download_mnist import load
import operator  
import time
device = torch.device('cpu')
x_train, y_train, x_test, y_test = load()
x_train = x_train.reshape(60000,784,1)
x_test  = x_test.reshape(10000,784,1)
x_train = x_train.astype(float)
x_test = x_test.astype(float)


In [3]:
N, D_in, h1,h2, D_out = 128, 784, 200,50,10

In [4]:
W1 = torch.randn(D_in,h1, device = device)
W2 = torch.randn(h1,h2, device = device)
W3 = torch.randn(h2,D_out, device = device)

In [5]:
x_train=torch.tensor(x_train)
y_train = torch.tensor(y_train)

In [6]:
learning_rate = 0.01

In [7]:
model = torch.nn.Sequential(
            torch.nn.Linear(D_in,h1),
            torch.nn.ReLU(),
            torch.nn.Linear(h1,h2),
            torch.nn.ReLU(),
            torch.nn.Linear(h2,D_out),
            torch.nn.Softmax()).to(device)

In [8]:
loss_fn = torch.nn.MSELoss(reduction='sum')

In [None]:
for t in range(10):
    y_pred = model(x_train)
    loss = loss_fn(y_pred,y_train)
    print(t,loss.item())
    model.zero_grad()
    loss.backward()
    with torch.no_grad():
        for param in model.partameters():
            param.data -= learning_rate * param.grad