In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
torch.manual_seed(189898) # Last 6 digits of my A# without the leading zeros

cuda


<torch._C.Generator at 0x26fdb704170>

In [2]:
# Check your Current Working Directory
!pwd

'pwd' is not recognized as an internal or external command,
operable program or batch file.


In [3]:
#Set Batch Size
batch_size = 20

# Download the MNIST dataset to local drive. A new folder "data" will be created in teh current directory to store data
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor())

# Use a data loader to shuffle and batch
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [6]:
# Hyper-parameters
# Network Architecture
input_size = 784
num_classes = 10

# Training Parameters
num_epochs = 10

# Fully connected neural netowrk with two hidden layers
class NeuralNet(nn.Module):
    def __init__(self, input_size, h1, h2, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, h1)
        self.fc2 = nn.Linear(h1, h2)
        self.relu = nn.ReLU()
        self.fc3 = nn.Linear(h2, num_classes)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        return out
    
# Define the Loss Function and Optimizer
criterion = torch.nn.CrossEntropyLoss()

In [8]:
# Train the model
learning_rates = [0.05, 0.1]
hidden_sizes = [[1568, 1568], [3136, 1568], [1568, 784]] # These values should all produce 97+% accuracy

for hidden_size in hidden_sizes:
    for learning_rate in learning_rates:
        total_step = len(train_loader)
        # Define the model object and the optimizer
        model = NeuralNet(input_size, hidden_size[0], hidden_size[1], num_classes).to(device)
        optimizer = torch.optim.SGD(params=model.parameters(), lr=learning_rate)
        for epoch in range(num_epochs):
            for i, (images, labels) in enumerate(train_loader):
                # Move tensors to the configured device
                images = images.reshape(-1, 28*28).to(device)
                labels = labels.to(device)

                # Forward pass
                outputs = model.forward(images)
                loss = criterion(outputs, labels)

                # Backward and optimize
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                # Print progress every 100 steps
                if (i+1) % 100 == 0:
                    print(f'Epoch[{epoch+1}/{num_epochs}], Step [{i+1}/{total_step}], Loss: {round(float(loss.item()), 4)}')

    # Test the model once you finish training
        with torch.no_grad(): # In test phase we don't need to compute gradients (for memory efficiency)
            correct = 0
            total = 0
            for images, labels in test_loader:
                images = images.reshape(-1, 28*28).to(device)
                labels = labels.to(device)

                # get network outputs
                outputs = model.forward(images)
                throwaway, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            
            print(f"Accuracy of the network no the 10,000 test images: {(100 * correct / total)}%, with learning rate: {learning_rate}, and {hidden_size} hidden neurons")
            print("\n\n")


Epoch[1/10], Step [100/3000], Loss: 0.6115
Epoch[1/10], Step [200/3000], Loss: 0.2294
Epoch[1/10], Step [300/3000], Loss: 0.8505
Epoch[1/10], Step [400/3000], Loss: 0.8356
Epoch[1/10], Step [500/3000], Loss: 0.2663
Epoch[1/10], Step [600/3000], Loss: 0.9179
Epoch[1/10], Step [700/3000], Loss: 0.229
Epoch[1/10], Step [800/3000], Loss: 0.1723
Epoch[1/10], Step [900/3000], Loss: 0.4879
Epoch[1/10], Step [1000/3000], Loss: 0.2801
Epoch[1/10], Step [1100/3000], Loss: 0.0731
Epoch[1/10], Step [1200/3000], Loss: 0.9669
Epoch[1/10], Step [1300/3000], Loss: 0.1547
Epoch[1/10], Step [1400/3000], Loss: 0.0771
Epoch[1/10], Step [1500/3000], Loss: 0.5352
Epoch[1/10], Step [1600/3000], Loss: 0.4816
Epoch[1/10], Step [1700/3000], Loss: 0.34
Epoch[1/10], Step [1800/3000], Loss: 0.6817
Epoch[1/10], Step [1900/3000], Loss: 0.4901
Epoch[1/10], Step [2000/3000], Loss: 0.4228
Epoch[1/10], Step [2100/3000], Loss: 0.2214
Epoch[1/10], Step [2200/3000], Loss: 0.0388
Epoch[1/10], Step [2300/3000], Loss: 0.3663


In [None]:
# originally tested a range from 0.001 to 10 with order of magnitude increases for lr, 1578*1578, 784*1568, and 1568*784 hidden layer sizes with batch size of 20
# 98.16%, 0.1, [1568,784]
# 96.57%, 0.01, [1568,784]
# 91.27%, 0.001, [1568,784]
# 97.62%, 0.1, [784, 1568]
# 96.53%, 0.01, [784, 1568]
# 91.20%, 0.001, [784, 1568]
# 98.09%, 0.1, [1568, 1568]
# 96.89%, 0.01, [1568, 1568]
# 91.24%, 0.001, [1568, 1568]


In [None]:
# Save the model checkpoint for future use
torch.save(model.state_dict(), 'model.ckpt')