<a href="https://colab.research.google.com/github/Muhdhammad/deep-learning/blob/main/ann-mnist-classification/ann.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

#### A fully connected neural network


In [None]:
class NN(nn.Module):
  def __init__(self, input_size, num_classes):
    super(NN, self).__init__()
    self.fc1 = nn.Linear(input_size, 100)     # hidden layer
    self.fc2 = nn.Linear(100, num_classes)    # output layer

  def forward(self, x):
    x = F.relu(self.fc1(x))
    x = self.fc2(x)
    return x

In [None]:
model = NN(784, 10)
x = torch.randn(64, 784)
print(model(x).shape)

torch.Size([64, 10])


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


#### Hyperparameters

In [None]:
input_size = 784
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 2

#### Loading the Data

In [None]:
train_dataset = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

In [None]:
test_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

#### Initializing the network

In [None]:
model = NN(input_size=input_size, num_classes=num_classes)

#### Loss and Optimizer

Loss function:
- ```criterion = nn.CrossEntropyLoss()``` → Define the loss function for multi-class classification, combining softmax and cross-entropy.

Optimizer:
- ```optimizer = optim.Adam(model.parameters(), lr=learning_rate)``` → Initialize the Adam optimizer to update the model's parameters based on the computed gradients, with a specified learning rate.

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

#### Training the network

1. Forward pass: ```scores = model(data)``` → Compute the output.

2. Loss computation:  ```loss = criterion(scores, target)``` → Compare the output to the target and compute the loss.

3. Backward pass:
 - ```optimizer.zero_grad()``` → Clear the previous gradients before the new backward pass.

 - ```loss.backward()``` → Compute the gradients of the loss with respect to the model parameters.

4. Optimization step: ```optimizer.step()``` → Update the model parameters using the gradients.

In [None]:
for epochs in range(num_epochs):
  for index, (data, target) in enumerate(train_loader):

    # To get data to cude is available
    data = data.to(device=device)
    target = target.to(device=device)

    # Reshaping into correct shape
    data = data.reshape(data.shape[0], -1)

    # Forward pass
    scores = model(data)

    # Compute the loss
    loss = criterion(scores, target)

    # Backward
    optimizer.zero_grad()
    loss.backward()

    # Gradient descent
    optimizer.step()

####Check accuracy on training and testing to evaluate model performance

-  ```scores = model(data)``` → Compute the model's output predictions (logits) for the input data.

- ```_, predictions = scores.max(1)``` → Get the index of the class with the highest score for each image (predicted class).

- ```correct += (predictions == target).sum()``` → Count the number of correct predictions by comparing predictions to the true labels.

- ```total_samples += predictions.size(0)```  → Keep track of the total number of samples processed.


In [None]:
def check_accuracy(loader, model):

  correct = 0
  total_samples = 0
  model.eval()       # Set model in evaluation mode

  if loader.dataset.train:
    print('Checking accuracy on training data')
  else:
    print('Checking accuracy on test data')

  with torch.no_grad():  # No need to calculate gradient
    for data, target in loader:
      data = data.to(device)
      target = target.to(device)

      data = data.reshape(data.shape[0], -1)

      scores = model(data)
      _, predictions = scores.max(1)
      correct += (predictions == target).sum()
      total_samples += predictions.size(0)

  accuracy = (correct/total_samples) * 100
  print(f'{correct}/{total_samples}')
  print(f'The accuracy of model is {accuracy:.2f}%')

  model.train()

check_accuracy(train_loader, model)
check_accuracy(test_loader, model)

Checking accuracy on training data
57906/60000
The accuracy of model is 96.51%
Checking accuracy on test data
9612/10000
The accuracy of model is 96.12%
