In [1]:
import torch
import torch.nn as nn
import torchvision.datasets as datasets 
import torchvision.transforms as transforms

In [2]:
input_size = 784        # Number of input neurons (image pixels).
hidden_size = 400       # Number of hidden neurons.
out_size = 10           # Number of classes (0-9). 
epochs = 10             # How many times we pass our entire dataset into our network.
batch_size = 100        # Input size of the data during one iteration.
learning_rate = 0.001   # How fast we are learning.

In [3]:
# Load and transform the MNIST dataset for training and testing:

# Load the MNIST training dataset:
# 'root' specifies the directory to store the dataset.
# 'train=True' indicates that the training dataset is being loaded.
# 'transform=transforms.ToTensor()' converts the images to PyTorch tensors.
# 'download=True' downloads the dataset if it's not already present in the specified directory.
train_dataset = datasets.MNIST(root='./data',
                           train=True,
                           transform=transforms.ToTensor(),
                           download=True)

# Load the MNIST testing dataset:
# Similar parameters as the training dataset, but 'train=False' to load the test dataset.
test_dataset = datasets.MNIST(root='./data',
                           train=False,
                           transform=transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100%|███████████████████████████████████████████████████████████████████| 9912422/9912422 [00:01<00:00, 8910934.46it/s]


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████████████████████████████████████████████████████████████████| 28881/28881 [00:00<00:00, 28773323.95it/s]

Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz



100%|███████████████████████████████████████████████████████████████████| 1648877/1648877 [00:00<00:00, 7410402.37it/s]


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|█████████████████████████████████████████████████████████████████████████| 4542/4542 [00:00<00:00, 4598244.94it/s]


Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw



In [4]:
# Create DataLoader instances for the training and testing datasets:

# DataLoader for the training dataset:
# 'dataset=train_dataset' specifies the dataset to load.
# 'batch_size=batch_size' sets the number of samples per batch.
# 'shuffle=True' randomizes the order of the data for each epoch, which helps in reducing overfitting.
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                          batch_size=batch_size,
                                          shuffle=True)

# DataLoader for the testing dataset:
# Similar to the training loader, but 'shuffle=False' as shuffling is not needed for evaluation.
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

![nn mnist](https://user-images.githubusercontent.com/30661597/61593615-5eb8bf00-ac14-11e9-8087-f880971b3543.png)


In [5]:
# Define the neural network architecture for MNIST classification:

class Net(nn.Module):
    def __init__(self, input_size, hidden_size, out_size):
        super(Net, self).__init__() 
        # First fully connected layer:
        self.fc1 = nn.Linear(input_size, hidden_size) # Maps from input size to hidden size.
        
        # Second fully connected layer:
        self.fc2 = nn.Linear(hidden_size, hidden_size) # Maps from hidden size to hidden size.
        
        # Third fully connected layer:
        self.fc3 = nn.Linear(hidden_size, out_size) # Maps from hidden size to output size.

        self.relu = nn.ReLU()
        
        self.init_weights()
        
    def init_weights(self):
        # Initialize weights using Kaiming normalization:
        nn.init.kaiming_normal_(self.fc1.weight)
        nn.init.kaiming_normal_(self.fc2.weight)

    def forward(self, x): 
        # Forward pass through the network:
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        return out

In [6]:
# Instantiate the neural network and set up the loss function and optimizer:

# Create an instance of the Net class:
net = Net(input_size, hidden_size, out_size)

# Check if CUDA (GPU support) is available and move the network to GPU if it is:
CUDA = torch.cuda.is_available()
if CUDA:
    net = net.cuda()
    
# Define the loss function. CrossEntropyLoss combines Softmax with the negative log likelihood loss:
# Therefore, no need to specify Softmax as well.
criterion = nn.CrossEntropyLoss()

# Define the optimizer. Here, Adam optimizer is used with the specified learning rate:
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

In [7]:
# Training loop for the neural network:

for epoch in range(epochs):
    correct_train = 0  # Counter for correct predictions.
    running_loss = 0 # Accumulator for the loss.
    
    # Iterate over batches of the training dataset:
    for i, (images, labels) in enumerate(train_loader): 
        
        # Flatten the images from a 4D tensor to a 2D tensor:
        # Flatten the image from size (batch,1,28,28) --> (100,1,28,28) where 1 represents the number of channels (grayscale-->1),
        # to size (100,784) and wrap it in a variable
        images = images.view(-1, 28*28)  # Flatten each image in the batch.
        
        # Move images and labels to GPU if CUDA is available:
        if CUDA:
            images = images.cuda()
            labels = labels.cuda()
            
        outputs = net(images) # Forward pass: Compute the predicted outputs by passing images to the model.       
        _, predicted = torch.max(outputs.data, 1)  # Get the predictions from the maximum value.                                          
        correct_train += (predicted == labels).sum() # Count how many predictions match the labels.
        
        loss = criterion(outputs, labels)                 # Compute the loss between predicted and actual labels.
        running_loss += loss.item()                       # Accumulate the loss.
        optimizer.zero_grad()                             # Zero the gradients before backward pass.
        loss.backward()                                   # Backpropagation
        optimizer.step()                                  # Update the weights
        
    print('Epoch [{}/{}], Training Loss: {:.3f}, Training Accuracy: {:.3f}%'.format
          (epoch+1, epochs, running_loss/len(train_loader), (100*correct_train.double()/len(train_dataset))))
print("DONE TRAINING!")

Epoch [1/10], Training Loss: 0.236, Training Accuracy: 93.037%
Epoch [2/10], Training Loss: 0.086, Training Accuracy: 97.350%
Epoch [3/10], Training Loss: 0.054, Training Accuracy: 98.293%
Epoch [4/10], Training Loss: 0.039, Training Accuracy: 98.730%
Epoch [5/10], Training Loss: 0.029, Training Accuracy: 99.042%
Epoch [6/10], Training Loss: 0.021, Training Accuracy: 99.283%
Epoch [7/10], Training Loss: 0.021, Training Accuracy: 99.292%
Epoch [8/10], Training Loss: 0.016, Training Accuracy: 99.463%
Epoch [9/10], Training Loss: 0.012, Training Accuracy: 99.598%
Epoch [10/10], Training Loss: 0.017, Training Accuracy: 99.425%
DONE TRAINING!


In [8]:
# Evaluate the neural network's performance on the test dataset:

with torch.no_grad():
    correct = 0 # Counter for correct predictions.
    
    # Iterate over batches in the test dataset:
    for images, labels in test_loader:
        if CUDA:
            # Move images and labels to GPU if CUDA is available:
            images = images.cuda()
            labels = labels.cuda()
        
        # Flatten the images to match the input requirements of the network:
        images = images.view(-1, 28*28)
        
        # Forward pass to get the output from the network:
        outputs = net(images)
        
        # Get the predicted classes from the maximum value of the outputs:
        _, predicted = torch.max(outputs.data, 1)
        
        # Count how many predictions match the actual labels:
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / len(test_dataset)))

Accuracy of the network on the 10000 test images: 97.83 %
