<a href="https://colab.research.google.com/github/AyubQuadri/Assignment/blob/main/TASK.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Save the model files
from google.colab import drive
drive.mount('/content/gdrive')
PATH = '/content/gdrive/My Drive/Colab Notebooks/chkpt_training_bn/'
# Define the neural network with 3 Linear layers and SiLU activation
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.flatten = nn.Flatten()
        self.silu = nn.SiLU()
        self.fc1 = nn.Linear(28*28, 512 ,bias=False)  # Input layer (flattened 28x28 images), 512 neurons
        self.fc2 = nn.Linear(512, 256, bias=False)     # Hidden layer with 256 neurons
        self.fc3 = nn.Linear(256, 10, bias=False)      # Output layer (10 classes for digits 0-9)

    def forward(self, x):
        x = self.flatten(x)  # Flatten the input image
        x = self.silu(self.fc1(x))
        x = self.silu(self.fc2(x))
        x = self.fc3(x)        # Output without activation for classification
        return x

class MLP_BN(nn.Module):
    def __init__(self):
        super(MLP_BN, self).__init__()
        self.flatten = nn.Flatten()
        self.silu = nn.SiLU()
        self.fc1 = nn.Linear(28*28, 512 ,bias=False)  # Input layer (flattened 28x28 images), 512 neurons
        self.bn1 = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(512, 256, bias=False)     # Hidden layer with 256 neurons
        self.bn2 = nn.BatchNorm1d(256)
        self.fc3 = nn.Linear(256, 10, bias=False)      # Output layer (10 classes for digits 0-9)

    def forward(self, x):
        x = self.flatten(x)  # Flatten the input image
        x = self.silu(self.bn1(self.fc1(x)))
        x = self.silu(self.bn2(self.fc2(x)))
        x = self.fc3(x)        # Output without activation for classification
        return x

# Define transformations for the dataset
transform = transforms.Compose([
    transforms.RandomRotation(10),
    transforms.RandomAffine(0, translate=(0.1,0.1)),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))  # Normalize with mean and std of MNIST
])

# Load the MNIST dataset
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

# Initialize model, loss function, and optimizer
# model = MLP().to(device)
model = MLP_BN.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
def train(model, device, train_loader, optimizer, criterion, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print(f'Train Epoch: {epoch} [{batch_idx*len(data)}/{len(train_loader.dataset)}] Loss: {loss.item():.6f}')

# Test function
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()  # Sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # Get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({accuracy:.2f}%)\n')

# Save checkpoint function
def save_checkpoint(epoch, model, optimizer, path):
  torch.save({
      'epoch': epoch,
      'model_state_dict': model.state_dict(),
      'optimizer_state_dict': optimizer.state_dict()
  }, path)
# Main training and testing loop
for epoch in range(1, 11):  # Train for 5 epochs
    train(model, device, train_loader, optimizer, criterion, epoch)
    test(model, device, test_loader)
    checkpoint_path= PATH + f'checkpoint_epoch_{epoch}.pth'
    save_checkpoint(epoch, model, optimizer, checkpoint_path)


Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
Train Epoch: 1 [0/60000] Loss: 2.311563
Train Epoch: 1 [6400/60000] Loss: 0.347010
Train Epoch: 1 [12800/60000] Loss: 0.188526
Train Epoch: 1 [19200/60000] Loss: 0.083934
Train Epoch: 1 [25600/60000] Loss: 0.102786
Train Epoch: 1 [32000/60000] Loss: 0.035130
Train Epoch: 1 [38400/60000] Loss: 0.041540
Train Epoch: 1 [44800/60000] Loss: 0.272029
Train Epoch: 1 [51200/60000] Loss: 0.090918
Train Epoch: 1 [57600/60000] Loss: 0.210721

Test set: Average loss: 0.0001, Accuracy: 9660/10000 (96.60%)

Train Epoch: 2 [0/60000] Loss: 0.286785
Train Epoch: 2 [6400/60000] Loss: 0.021039
Train Epoch: 2 [12800/60000] Loss: 0.205284
Train Epoch: 2 [19200/60000] Loss: 0.108772
Train Epoch: 2 [25600/60000] Loss: 0.091766
Train Epoch: 2 [32000/60000] Loss: 0.035830
Train Epoch: 2 [38400/60000] Loss: 0.104529
Train Epoch: 2 [44800/60000] Loss: 0.054115
Train Epoch: 2 [51200/6

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device used for traniing is: ",device)
# Save the model files
from google.colab import drive
drive.mount('/content/gdrive')
PATH = '/content/gdrive/My Drive/Colab Notebooks/chkpt_training_BN/'
# Define the neural network with 3 Linear layers and SiLU activation
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.flatten = nn.Flatten()
        self.silu = nn.SiLU()
        self.fc1 = nn.Linear(28*28, 512 ,bias=False)  # Input layer (flattened 28x28 images), 512 neurons
        self.fc2 = nn.Linear(512, 256, bias=False)     # Hidden layer with 256 neurons
        self.fc3 = nn.Linear(256, 10, bias=False)      # Output layer (10 classes for digits 0-9)

    def forward(self, x):
        x = self.flatten(x)  # Flatten the input image
        x = self.silu(self.fc1(x))
        x = self.silu(self.fc2(x))
        x = self.fc3(x)        # Output without activation for classification
        return x

class MLP_BN(nn.Module):
    def __init__(self):
        super(MLP_BN, self).__init__()
        self.flatten = nn.Flatten()
        self.silu = nn.SiLU()
        self.fc1 = nn.Linear(28*28, 512 ,bias=False)  # Input layer (flattened 28x28 images), 512 neurons
        self.bn1 = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(512, 256, bias=False)     # Hidden layer with 256 neurons
        self.bn2 = nn.BatchNorm1d(256)
        self.fc3 = nn.Linear(256, 10, bias=False)      # Output layer (10 classes for digits 0-9)

    def forward(self, x):
        x = self.flatten(x)  # Flatten the input image
        x = self.silu(self.bn1(self.fc1(x)))
        x = self.silu(self.bn2(self.fc2(x)))
        x = self.fc3(x)        # Output without activation for classification
        return x

# Define transformations for the dataset
transform = transforms.Compose([
    # transforms.RandomRotation(10),
    # transforms.RandomAffine(0, translate=(0.1,0.1)),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))  # Normalize with mean and std of MNIST
])

# Load the MNIST dataset
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

# Initialize model, loss function, and optimizer
# model = MLP().to(device)
model = MLP_BN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
def train(model, device, train_loader, optimizer, criterion, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print(f'Train Epoch: {epoch} [{batch_idx*len(data)}/{len(train_loader.dataset)}] Loss: {loss.item():.6f}')

# Test function
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()  # Sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # Get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({accuracy:.2f}%)\n')

# Save checkpoint function
def save_checkpoint(epoch, model, optimizer, path):
  torch.save({
      'epoch': epoch,
      'model_state_dict': model.state_dict(),
      'optimizer_state_dict': optimizer.state_dict()
  }, path)
# Main training and testing loop
for epoch in range(1, 11):  # Train for 5 epochs
    train(model, device, train_loader, optimizer, criterion, epoch)
    test(model, device, test_loader)
    checkpoint_path= PATH + f'checkpoint_epoch_{epoch}.pth'
    save_checkpoint(epoch, model, optimizer, checkpoint_path)


Device used for traniing is:  cuda
Mounted at /content/gdrive
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:01<00:00, 5079111.38it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 133930.92it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:01<00:00, 1268574.03it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 3965555.53it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw

Train Epoch: 1 [0/60000] Loss: 2.466960
Train Epoch: 1 [6400/60000] Loss: 0.271593
Train Epoch: 1 [12800/60000] Loss: 0.112380
Train Epoch: 1 [19200/60000] Loss: 0.167503
Train Epoch: 1 [25600/60000] Loss: 0.121213
Train Epoch: 1 [32000/60000] Loss: 0.051635
Train Epoch: 1 [38400/60000] Loss: 0.279183
Train Epoch: 1 [44800/60000] Loss: 0.195244
Train Epoch: 1 [51200/60000] Loss: 0.182917
Train Epoch: 1 [57600/60000] Loss: 0.207860

Test set: Average loss: 0.0001, Accuracy: 9692/10000 (96.92%)

Train Epoch: 2 [0/60000] Loss: 0.136522
Train Epoch: 2 [6400/60000] Loss: 0.151541
Train Epoch: 2 [12800/60000] Loss: 0.023403
Train Epoch: 2 [19200/60000] Loss: 0.165181
Train Epoch: 2 [25600/60000] Loss: 0.107602
Train Epoch: 2 [32000/60000] Loss: 0.135694
Train Epoch: 2 [38400/60000] Loss: 0.027912
Train Epoch: 2 [44800/60000] Loss: 0.102360
Train Epoch: 2 [51200/60000] Loss: 0.192512
Train Epoch: 2 [57600/60000] Loss: 

In [None]:
# Optimizing the model to reach 99% accuracy via batch Normailzation and adding preprocessing techniques
