<a href="https://colab.research.google.com/github/Hamza-10x/dl_asgn_1/blob/main/Assignment_1_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**Define the Model Architecture**

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim

class BestNet(nn.Module):
    def __init__(self, dropout_p=0.2):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Flatten(),
            # Layer 1
            nn.Linear(3*32*32, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(dropout_p),
            # Layer 2
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(dropout_p),
            # Layer 3
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(dropout_p),
            # Output
            nn.Linear(128, 10)
        )

        # Weight Initialization (He initialization)
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        # This tells PyTorch: "When I call model(x), run it through the stack"
        return self.layers(x)

best_model = BestNet(dropout_p=0.2).cuda() # Move to GPU if available

#**Set up Optimizer and Loss**

In [3]:
optimizer = optim.Adam(best_model.parameters(),
                       lr=1e-3,
                       weight_decay=1e-5)

scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)

criterion = nn.CrossEntropyLoss()

#**The Training Loop (The "Solver" Logic)**

In [4]:
def train_model(model, train_loader, epochs=10):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.cuda(), labels.cuda()

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward + Backward + Optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % 100 == 99:
                print(f'[Epoch {epoch + 1}, Batch {i + 1}] loss: {running_loss / 100:.3f}')
                running_loss = 0.0

        scheduler.step() # Decay learning rate every epoch

# Assuming you have a PyTorch DataLoader named 'train_loader'
# train_model(best_model, train_loader)

#**Data Preparation**

In [5]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# 1. Define transformations for the images
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # Normalizes to range [-1, 1]
])

# 2. Download and load the training and test sets
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=512,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = DataLoader(testset, batch_size=512,
                                         shuffle=False, num_workers=2)

100%|██████████| 170M/170M [00:03<00:00, 49.6MB/s]


#**Execution**

In [10]:
# Check for GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Training on: {device}")

# Initialize model, optimizer, and loss
model = BestNet(dropout_p=0.2).to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
criterion = nn.CrossEntropyLoss()
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)

# Run the training
train_model(model, trainloader, epochs=10)

Training on: cuda:0


#**Evaluating the Results**

In [11]:
def check_accuracy(loader, model):
    correct = 0
    total = 0
    model.eval() # Set model to evaluation mode (turns off Dropout/BatchNorm)
    with torch.no_grad(): # No need to track gradients during testing
        for data in loader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy on the test images: {100 * correct / total:.2f}%')

check_accuracy(testloader, model)

Accuracy on the test images: 55.61%
