In [1]:
# !pip install torchvision

In [18]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Use MNIST for simplicity
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_data = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_data = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=1000, shuffle=False)


In [19]:
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(28*28, 256)
        self.fc2 = nn.Linear(256, 256)
        self.fc3 = nn.Linear(256, 10)
    
    def forward(self, x):
        x = x.view(-1, 28*28)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


<img src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRQrSW9Tn-OD2uDzu_gko0IYQFmA9oAZZBffA&s">


In [20]:
class ResidualBlock(nn.Module):
    def __init__(self, size):
        super(ResidualBlock, self).__init__()
        self.fc1 = nn.Linear(size, size)
        self.bn1 = nn.BatchNorm1d(size)  # batch normalization after first layer
        self.fc2 = nn.Linear(size, size)
        self.bn2 = nn.BatchNorm1d(size)  # batch normalization after second layer
        self.scale = 0.1  # scale residual to stabilize training
    
    def forward(self, x):
        residual = x  # skip connection
        out = F.relu(self.bn1(self.fc1(x)))
        out = self.bn2(self.fc2(out))
        out = residual + self.scale * out  # scaled residual
        out = F.relu(out)  # final activation
        return out


class ResNetMini(nn.Module):
    def __init__(self):
        super(ResNetMini, self).__init__()
        self.fc1 = nn.Linear(28*28, 256)
        self.resblock1 = ResidualBlock(256)
        self.resblock2 = ResidualBlock(256)
        self.fc2 = nn.Linear(256, 10)
    
    def forward(self, x):
        x = x.view(-1, 28*28)
        x = F.relu(self.fc1(x))
        x = self.resblock1(x)
        x = self.resblock2(x)
        x = self.fc2(x)
        return x



In [21]:
def train(model, optimizer, epochs=3):
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for X, y in train_loader:
            optimizer.zero_grad()
            output = model(X)
            loss = F.cross_entropy(output, y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")


In [22]:
def test(model):
    model.eval()
    correct = 0
    with torch.no_grad():
        for X, y in test_loader:
            output = model(X)
            pred = output.argmax(dim=1)
            correct += pred.eq(y).sum().item()
    print(f"Test Accuracy: {100. * correct / len(test_loader.dataset):.2f}%")


In [23]:
from datetime import datetime



In [24]:
start_time = datetime.now()

model1 = SimpleNN()

optimizer1 = optim.Adam(model1.parameters(), lr=0.001)
train(model1, optimizer1, epochs=3)
test(model1)

end_time = datetime.now()

print("Time Taken : ",end_time - start_time)

Epoch 1, Loss: 0.3244
Epoch 2, Loss: 0.1478
Epoch 3, Loss: 0.1100
Test Accuracy: 95.75%
Time Taken :  0:00:48.474472


In [25]:
start_time = datetime.now()

model2 = ResNetMini()
optimizer2 = optim.Adam(model2.parameters(), lr=0.001)
train(model2, optimizer2, epochs=3)
test(model2)

end_time = datetime.now()

print("Time Taken : ",end_time - start_time)

Epoch 1, Loss: 0.2729
Epoch 2, Loss: 0.1283
Epoch 3, Loss: 0.0956
Test Accuracy: 96.64%
Time Taken :  0:00:58.042012
