In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets,transforms

In [2]:
batch_size = 128
epochs = 10
learning_rate = 0.001

In [3]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

100%|██████████| 9.91M/9.91M [00:00<00:00, 16.6MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 527kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 3.95MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 5.61MB/s]


###Without Dropout

In [4]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),               # 1 x 28 x 28 → 784
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 10)          # 10 output classes
        )

    def forward(self, x):
        return self.model(x)

model = MLP()

In [5]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [6]:
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for inputs, targets in train_loader:
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss:.4f}")

Epoch 1/10, Loss: 155.9951
Epoch 2/10, Loss: 65.5654
Epoch 3/10, Loss: 47.3655
Epoch 4/10, Loss: 36.7719
Epoch 5/10, Loss: 31.7373
Epoch 6/10, Loss: 27.3621
Epoch 7/10, Loss: 22.6663
Epoch 8/10, Loss: 22.5937
Epoch 9/10, Loss: 17.8873
Epoch 10/10, Loss: 17.5204


In [7]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, targets in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()

accuracy = correct / total * 100
print(f"\nMNIST Test Accuracy WITHOUT Dropout: {accuracy:.2f}%")



MNIST Test Accuracy WITHOUT Dropout: 97.80%


###With Dropout

In [9]:
dropout_prob = 0.5

In [10]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),               # 1 x 28 x 28 → 784
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Dropout(dropout_prob),   # Dropout!
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(dropout_prob),   # Another Dropout!
            nn.Linear(256, 10)          # 10 output classes
        )

    def forward(self, x):
        return self.model(x)

model = MLP()

In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


In [12]:
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for inputs, targets in train_loader:
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss:.4f}")


Epoch 1/10, Loss: 241.4402
Epoch 2/10, Loss: 130.6708
Epoch 3/10, Loss: 115.0560
Epoch 4/10, Loss: 103.8376
Epoch 5/10, Loss: 94.3639
Epoch 6/10, Loss: 91.9530
Epoch 7/10, Loss: 84.9976
Epoch 8/10, Loss: 82.6456
Epoch 9/10, Loss: 79.7114
Epoch 10/10, Loss: 73.9274


In [13]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, targets in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()

accuracy = correct / total * 100
print(f"\nMNIST Test Accuracy with Dropout: {accuracy:.2f}%")


MNIST Test Accuracy with Dropout: 97.24%


Here you can see that even the loss at last epoch is high in with dropout as compared to without dropout the final accuracy is kinda same this can be important when we deal with large datasets and more epochs like 1000