In [1]:
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms


In [5]:
# hipy parameters

input_dim = 28 * 28
hidden1_dim = 100
hidden2_dim = 100
output_dim = 10
learning_rate = 0.01
num_epochs = 5
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')



# Load Data
train_dataset = dsets.MNIST(root='../data/', train=True, download=True, transform=transforms.ToTensor())
valid_dataset = dsets.MNIST(root='../data/', train=False, download=True, transform=transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=100, shuffle=True)
valid_loader = torch.utils.data.DataLoader(dataset=valid_dataset, batch_size=100, shuffle=False)




# Build model
class Net(nn.Module):
    def __init__(self, D_in, H1, H2, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H1)
        self.linear2 = nn.Linear(H1, H2)
        self.linear3 = nn.Linear(H2, D_out)
        self.bn1 = nn.BatchNorm1d(H1)
        self.bn2 = nn.BatchNorm1d(H2)
        
    def forward(self, x):
        x = self.bn1(torch.relu(self.linear1(x)))
        x = self.bn2(torch.relu(self.linear2(x)))
        x = self.linear3(x)
        return x
    
    

# Create a model
model = Net(input_dim, hidden1_dim, hidden2_dim, output_dim).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

# Training
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (x, y) in enumerate(train_loader):
        # to device
        x = x.view(-1, 28 * 28).to(device)
        y = y.to(device)
        # forward pass
        yhat = model(x)
        loss = criterion(yhat, y)
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # print loss
        if i % 100 == 0:
            print('Epoch [{}/{}], Iteration [{}/{}], loss: {:.4f}'
                  .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

    # Validating
    correct = 0
    total = 0
    for x, y in valid_loader:
        x = x.view(-1, 28 * 28).to(device)
        y = y.to(device)
        z = model(x)
        _, label = torch.max(z.data, 1)
        total += y.size(0)
        correct += (label == y).sum().item()
        accuracy = 100 * correct / total
    print('Accuracy: {:.4f}'.format(accuracy))



Epoch [1/5], Iteration [1/600], loss: 2.3942
Epoch [1/5], Iteration [101/600], loss: 1.0774
Epoch [1/5], Iteration [201/600], loss: 0.5714
Epoch [1/5], Iteration [301/600], loss: 0.4188
Epoch [1/5], Iteration [401/600], loss: 0.4221
Epoch [1/5], Iteration [501/600], loss: 0.2675
Accuracy: 93.3200
Epoch [2/5], Iteration [1/600], loss: 0.2838
Epoch [2/5], Iteration [101/600], loss: 0.2781
Epoch [2/5], Iteration [201/600], loss: 0.3595
Epoch [2/5], Iteration [301/600], loss: 0.1979
Epoch [2/5], Iteration [401/600], loss: 0.2960
Epoch [2/5], Iteration [501/600], loss: 0.1954
Accuracy: 94.9600
Epoch [3/5], Iteration [1/600], loss: 0.2337
Epoch [3/5], Iteration [101/600], loss: 0.1629
Epoch [3/5], Iteration [201/600], loss: 0.1945
Epoch [3/5], Iteration [301/600], loss: 0.2200
Epoch [3/5], Iteration [401/600], loss: 0.2002
Epoch [3/5], Iteration [501/600], loss: 0.1153
Accuracy: 95.9000
Epoch [4/5], Iteration [1/600], loss: 0.1368
Epoch [4/5], Iteration [101/600], loss: 0.1430
Epoch [4/5], I