In [1]:
import torch
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import torch.nn as nn

In [2]:
train_data = datasets.MNIST(root = './data/', train = True, 
                            transform = transforms.Compose(
                                [transforms.ToTensor(),
                                 transforms.Normalize([0.131],[0.308])]))
test_data = datasets.MNIST(root = './data/', train = False, 
                          transform = transforms.Compose(
                                [transforms.ToTensor(),
                                 transforms.Normalize([0.133],[0.309])]))

In [3]:
batch_size = 64
train_loader = torch.utils.data.DataLoader(dataset = train_data, 
                                          batch_size = batch_size,
                                          shuffle = True)
test_loader = torch.utils.data.DataLoader(dataset = test_data,
                                         batch_size = batch_size)

In [4]:
class Net(nn.Module):
    def __init__(self, D_in, H_1, H_2, D_out):
        super(Net, self).__init__()
        self.Linear1 = nn.Linear(D_in, H_1)
        self.Linear2 = nn.Linear(H_1, H_2)
        self.Linear3 = nn.Linear(H_2, D_out)
    
    def forward(self, x):
        h_1 = F.relu(self.Linear1(x))
        h_2 = F.relu(self.Linear2(h_1))
        y_pred = self.Linear3(h_2)
        return y_pred

In [5]:
model = Net(784, 196, 49, 10)
if torch.cuda.is_available() == True:
    model = model.cuda()
crit = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.01)

In [6]:
def train(epoch):
    for batch_id, (data, y) in enumerate(train_loader):
        data = data.view(data.shape[0], -1)
        if torch.cuda.is_available() == True:
            data = data.cuda()
            y = y.cuda()
        y_pred = model(data)
        loss = crit(y_pred, y)
        if batch_id % 100 == 0:
            print('Epoch: {}, Batch: {}, Loss:{}'.format(epoch, batch_id, loss.item()))
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [7]:
def test():
    accuracy = 0
    for idx, (data, y) in enumerate(test_loader):
        if torch.cuda.is_available() == True:
            data = data.cuda()
            y = y.cuda()
        prob = model(data.view(data.shape[0], -1))
        y_pred = prob.max(1, keepdim = True)[1]
        accuracy += y_pred.eq(y.view_as(y_pred)).sum()
    print (accuracy.item() / len(test_loader.dataset))

In [8]:
NUM_EPOCH = 10
for i in range(NUM_EPOCH):
    train(i)

Epoch: 0, Batch: 0, Loss:2.3138883113861084
Epoch: 0, Batch: 100, Loss:2.020395040512085
Epoch: 0, Batch: 200, Loss:1.1703168153762817
Epoch: 0, Batch: 300, Loss:0.7072461247444153
Epoch: 0, Batch: 400, Loss:0.6283529996871948
Epoch: 0, Batch: 500, Loss:0.5561915040016174
Epoch: 0, Batch: 600, Loss:0.49006593227386475
Epoch: 0, Batch: 700, Loss:0.46359163522720337
Epoch: 0, Batch: 800, Loss:0.2847071588039398
Epoch: 0, Batch: 900, Loss:0.3571622669696808
Epoch: 1, Batch: 0, Loss:0.36635226011276245
Epoch: 1, Batch: 100, Loss:0.3533521294593811
Epoch: 1, Batch: 200, Loss:0.4699513912200928
Epoch: 1, Batch: 300, Loss:0.4315120577812195
Epoch: 1, Batch: 400, Loss:0.5315122008323669
Epoch: 1, Batch: 500, Loss:0.41508474946022034
Epoch: 1, Batch: 600, Loss:0.36075714230537415
Epoch: 1, Batch: 700, Loss:0.16152268648147583
Epoch: 1, Batch: 800, Loss:0.2698308527469635
Epoch: 1, Batch: 900, Loss:0.2778359055519104
Epoch: 2, Batch: 0, Loss:0.21237421035766602
Epoch: 2, Batch: 100, Loss:0.25673

In [9]:
dtest()

0.964
