In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torch.utils.data
import torchvision.transforms as transforms

In [2]:
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307), ((0.3081)))])

In [3]:
trainset = torchvision.datasets.MNIST('mnist', train=True, download=True, transform=transform)
testset = torchvision.datasets.MNIST('mnist', train=False, download=True, transform=transform)

In [4]:
# try batch size 32 64 128
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128,shuffle=True, num_workers=0)
testloader = torch.utils.data.DataLoader(testset, batch_size=128,shuffle=False, num_workers=0) 

In [5]:
# Define the class Net
class Net(nn.Module):
    def __init__(self,layers):    
        #create a list of number of neurons for each layer using nn.ModuleList
        super(Net, self).__init__()
        self.hidden = nn.ModuleList()
        
        for input_size, output_size in zip(layers,layers[1:]):
            self.hidden.append(nn.Linear(input_size,output_size))#Apply a linear transformation to input_size

    def forward(self, x):   
        L = len(self.hidden)
        #Loop through each layer
        for(l,linear_transform) in zip(range(L),self.hidden):
            if l < L-1:
                x = F.relu(linear_transform(x))
            else:
                x = linear_transform(x)
        return x

In [9]:
# Create a list of combinations to try out
try_list = [[784,10,10,10],[784,100,10,10],[784,100,100,10],# 2 hidden layers
            [784,10,10,10,10],[784,100,10,10,10],[784,100,100,10,10],[784,100,100,100,10],# 3 hidden layers
            [784,10,10,10,10,10],[784,100,10,10,10,10],[784,100,100,100,10,10]] # 4 hidden layers

In [12]:
for layers in try_list:  
# Instantiate the Adam optimizer and Cross-Entropy loss function, I moved it into the for loop, seemed to be working fine.
    model = Net(layers)   
    optimizer = optim.Adam(model.parameters(), lr=3e-4)#Try lr = .....
    criterion = nn.CrossEntropyLoss()
    for epoch in range(10):
        for batch_idx, data_target in enumerate(trainloader):
            data = data_target[0]
            target = data_target[1]
            data = data.view(-1, 28 * 28)
            optimizer.zero_grad()
            # Complete a forward pass
            output = model(data)
            # Compute the loss, gradients and change the weights
            loss = criterion(output,target)
            loss.backward()
            optimizer.step()
        
    correct, total = 0, 0
    model.eval()

    for i, data in enumerate(testloader, 0):
        inputs, labels = data
    
        # Put each image into a vector
        inputs = inputs.view(-1,28*28 )
    
        # Do the forward pass and get the predictions
        outputs = model(inputs)
        _, outputs = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (outputs == labels).sum().item()
    print('The testing set accuracy of the network is: %d %%' % (100 * correct / total))

The testing set accuracy of the network is: 92 %
The testing set accuracy of the network is: 91 %
The testing set accuracy of the network is: 97 %
The testing set accuracy of the network is: 97 %
The testing set accuracy of the network is: 97 %
The testing set accuracy of the network is: 97 %
The testing set accuracy of the network is: 97 %
The testing set accuracy of the network is: 96 %
The testing set accuracy of the network is: 96 %
The testing set accuracy of the network is: 97 %
