In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm
import wandb
wandb.login()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mb23ee1006[0m ([33mb23ee1006-indian-institute-of-technology-jodhpur[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [2]:
device=torch.device("cuda:0"if torch.cuda.is_available() else "cpu")

In [3]:
transform_train = transforms.Compose([
    transforms.RandomRotation(degrees=(-10, 10)),  # Random rotation between -10 and 10 degrees
    transforms.ToTensor(),                       # Converting img to tensor
    transforms.Normalize((0.5,), (0.5,))         # Normalizing to 0.5 mean and std deviation
])

transform_test = transforms.Compose([
    transforms.ToTensor(),                       # Converting img to tensor
    transforms.Normalize((0.5,), (0.5,))         # Normalizing to 0.5 mean and std deviation
])

trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform_train)
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform_test)

train_size = int(0.8 * len(trainset)) #80-20 split between training data and cross-validation
val_size = len(trainset) - train_size
trainset, cvset = random_split(trainset, [train_size, val_size])

batch_size=64
train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True)  # Training DataLoader
cv_loader = DataLoader(cvset, batch_size=batch_size, shuffle=False)  # Cross-validation DataLoader
test_loader = DataLoader(testset, batch_size=batch_size, shuffle=False) # Test DataLoader



In [11]:
class CNN(nn.Module):
    def __init__(self):     #defining architecture
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=12, kernel_size=3, stride=1, padding=0)
        self.bn1 = nn.BatchNorm2d(12)
        self.conv2 = nn.Conv2d(in_channels=12, out_channels=13, kernel_size=3, stride=1, padding=0)
        self.bn2 = nn.BatchNorm2d(13)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv3 = nn.Conv2d(in_channels=13, out_channels=14, kernel_size=3, stride=1, padding=0)
        self.bn3 = nn.BatchNorm2d(14)
        self.conv4 = nn.Conv2d(in_channels=14, out_channels=16, kernel_size=3, stride=1, padding=0)
        self.bn4 = nn.BatchNorm2d(16)
        self.conv5=nn.Conv2d(in_channels=16, out_channels=16, kernel_size=1, stride=1, padding=0)
        self.bn5 = nn.BatchNorm2d(16)
        self.fc = nn.Linear(16* 4 * 4, 10)
        self.dropout = nn.Dropout(p=0.2)
        # self.dropout2 = nn.Dropout(p=0.15)

    def forward(self, x):                       #forward prop steps
        x = F.leaky_relu(self.bn1(self.conv1(x)))
        x = F.leaky_relu(self.bn2(self.conv2(x)))
        x = self.pool(x)
        # x= self.dropout2(x)
        x = F.leaky_relu(self.bn3(self.conv3(x)))
        x = F.leaky_relu(self.bn4(self.conv4(x)))
        x = F.leaky_relu(self.bn5(self.conv5(x)))
        x = self.pool(x)
        x = x.view(x.size(0), -1)  # Flattening the tensor
        x = self.dropout(x)
        x = self.fc(x)
        
        return F.softmax(x, dim=1)


# Creating instance of the model
model = CNN().to(device)

# Defining optimizer as Adam and loss function as CrossEntropy
optimizer = optim.Adam(model.parameters(), lr=0.0008)
criterion = nn.CrossEntropyLoss()


num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'Number of parameters: {num_params}')      #printing number of params




Number of parameters: 8205


In [12]:

wandb.init(project='MNIST improvement')
epochs=15
for epoch in range(epochs):
    model.train()
    train_loss = 0.0
    correct_train = 0

    for inputs, targets in tqdm(train_loader, desc=f'Epoch {epoch + 1}/{epochs}'):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()     #resetting gradients to 0
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()     #backprop
        optimizer.step()    #taking one optimization step

        train_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        correct_train += (predicted == targets).sum().item()

    train_loss /= len(train_loader.dataset)
    train_accuracy = correct_train / len(train_loader.dataset)

    # Log training loss and accuracy
    wandb.log({'Train Loss': train_loss, 'Train Accuracy': train_accuracy})

    # Validation loop
    model.eval()
    val_loss = 0.0
    correct_val = 0

    with torch.no_grad():
        for inputs, targets in cv_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            val_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            correct_val += (predicted == targets).sum().item()

    val_loss /= len(cv_loader.dataset)
    val_accuracy = correct_val / len(cv_loader.dataset)

    # Log validation loss and accuracy
    wandb.log({'Validation Loss': val_loss, 'Validation Accuracy': val_accuracy})


# Testing loop
model.eval()
test_loss = 0.0
correct_test = 0

with torch.no_grad():
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        test_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        correct_test += (predicted == targets).sum().item()

test_loss /= len(test_loader.dataset)
test_accuracy = correct_test / len(test_loader.dataset)

# Log test loss and accuracy
wandb.log({'Test Loss': test_loss, 'Test Accuracy': test_accuracy})

# Print final test accuracy
print(f'Test Accuracy: {test_accuracy:.4f}')

# Finish wandb run
wandb.finish()

Epoch 1/15: 100%|██████████| 750/750 [00:36<00:00, 20.69it/s]
Epoch 2/15: 100%|██████████| 750/750 [00:35<00:00, 20.90it/s]
Epoch 3/15: 100%|██████████| 750/750 [00:51<00:00, 14.54it/s]
Epoch 4/15: 100%|██████████| 750/750 [01:09<00:00, 10.86it/s]
Epoch 5/15: 100%|██████████| 750/750 [01:14<00:00, 10.09it/s]
Epoch 6/15: 100%|██████████| 750/750 [00:56<00:00, 13.32it/s]
Epoch 7/15: 100%|██████████| 750/750 [00:35<00:00, 21.17it/s]
Epoch 8/15: 100%|██████████| 750/750 [00:48<00:00, 15.45it/s]
Epoch 9/15: 100%|██████████| 750/750 [01:04<00:00, 11.71it/s]
Epoch 10/15: 100%|██████████| 750/750 [01:08<00:00, 11.01it/s]
Epoch 11/15: 100%|██████████| 750/750 [01:13<00:00, 10.21it/s]
Epoch 12/15: 100%|██████████| 750/750 [01:01<00:00, 12.13it/s]
Epoch 13/15: 100%|██████████| 750/750 [01:02<00:00, 11.92it/s]
Epoch 14/15: 100%|██████████| 750/750 [00:56<00:00, 13.39it/s]
Epoch 15/15: 100%|██████████| 750/750 [01:05<00:00, 11.44it/s]


Test Accuracy: 0.9915


0,1
Test Accuracy,▁
Test Loss,▁
Train Accuracy,▁▇▇▇███████████
Train Loss,█▂▂▂▁▁▁▁▁▁▁▁▁▁▁
Validation Accuracy,▁▄▅▆▆▆▇▇▇▇███▇█
Validation Loss,█▅▄▃▃▂▂▂▂▂▁▁▁▂▁

0,1
Test Accuracy,0.9915
Test Loss,1.46983
Train Accuracy,0.98554
Train Loss,1.47668
Validation Accuracy,0.98725
Validation Loss,1.47443
