In [None]:

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.models import resnet18  
import time  


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}\n")


transform_cifar10 = transforms.Compose([
    transforms.RandomHorizontalFlip(),  
    transforms.RandomRotation(10),      
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform_mnist = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),  
    transforms.Resize((32, 32)),  
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_mnist = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform_mnist)
test_mnist = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform_mnist)

train_cifar10 = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_cifar10)
test_cifar10 = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_cifar10)


batch_size = 64
train_loader_mnist = torch.utils.data.DataLoader(train_mnist, batch_size=batch_size, shuffle=True)
test_loader_mnist = torch.utils.data.DataLoader(test_mnist, batch_size=batch_size, shuffle=False)

train_loader_cifar10 = torch.utils.data.DataLoader(train_cifar10, batch_size=batch_size, shuffle=True)
test_loader_cifar10 = torch.utils.data.DataLoader(test_cifar10, batch_size=batch_size, shuffle=False)


class ResNetFineTune(nn.Module):
    def __init__(self, num_classes):
        super(ResNetFineTune, self).__init__()
        self.model = resnet18(pretrained=True)
        
        for param in self.model.layer3.parameters():
            param.requires_grad = True
        for param in self.model.layer4.parameters():
            param.requires_grad = True

        self.model.fc = nn.Sequential(
            nn.BatchNorm1d(512),  
            nn.Dropout(0.5),  
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        return self.model(x)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


resnet_mnist = ResNetFineTune(num_classes=10).to(device)
resnet_cifar10 = ResNetFineTune(num_classes=10).to(device)

print(f"ðŸ”¹ Trainable Parameters in ResNet (MNIST): {count_parameters(resnet_mnist):,}")
print(f"ðŸ”¹ Trainable Parameters in ResNet (CIFAR-10): {count_parameters(resnet_cifar10):,}\n")


def train_model(model, train_loader, test_loader, dataset_name, epochs=10):
    print(f"Training on {dataset_name}...\n{'='*40}")
    
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0001)  

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)  

    start_time = time.time()  

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            correct += predicted.eq(labels).sum().item()
            total += labels.size(0)

        train_acc = 100 * correct / total
        print(f"Epoch {epoch+1}/{epochs} | Loss: {running_loss/len(train_loader):.4f} | Train Accuracy: {train_acc:.2f}%")
        
        scheduler.step()
        print(f"ðŸ”¹ Learning Rate after Epoch {epoch+1}: {scheduler.get_last_lr()[0]:.6f}")

    total_time = time.time() - start_time  
    print(f"\nðŸ•’ Training Time for {dataset_name}: {total_time:.2f} seconds\n")

    
    evaluate_model(model, test_loader, dataset_name)


def evaluate_model(model, test_loader, dataset_name):
    print(f"\nEvaluating {dataset_name} model...\n{'='*40}")
    
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = outputs.max(1)
            correct += predicted.eq(labels).sum().item()
            total += labels.size(0)

    test_acc = 100 * correct / total
    print(f"âœ… {dataset_name} Accuracy: {test_acc:.2f}%")
    print(f"{'='*40}\n")


train_model(resnet_cifar10, train_loader_cifar10, test_loader_cifar10, "CIFAR-10", epochs=10)
train_model(resnet_mnist, train_loader_mnist, test_loader_mnist, "MNIST", epochs=10)


Using device: cuda

ðŸ”¹ Trainable Parameters in ResNet (MNIST): 11,182,666
ðŸ”¹ Trainable Parameters in ResNet (CIFAR-10): 11,182,666

Training on CIFAR-10...
Epoch 1/10 | Loss: 1.2215 | Train Accuracy: 57.55%
ðŸ”¹ Learning Rate after Epoch 1: 0.000100
Epoch 2/10 | Loss: 0.7844 | Train Accuracy: 73.24%
ðŸ”¹ Learning Rate after Epoch 2: 0.000100
Epoch 3/10 | Loss: 0.6612 | Train Accuracy: 77.36%
ðŸ”¹ Learning Rate after Epoch 3: 0.000100
Epoch 4/10 | Loss: 0.5792 | Train Accuracy: 79.99%
ðŸ”¹ Learning Rate after Epoch 4: 0.000100
Epoch 5/10 | Loss: 0.5201 | Train Accuracy: 82.05%
ðŸ”¹ Learning Rate after Epoch 5: 0.000010
Epoch 6/10 | Loss: 0.4330 | Train Accuracy: 85.04%
ðŸ”¹ Learning Rate after Epoch 6: 0.000010
Epoch 7/10 | Loss: 0.4067 | Train Accuracy: 86.00%
ðŸ”¹ Learning Rate after Epoch 7: 0.000010
Epoch 8/10 | Loss: 0.3887 | Train Accuracy: 86.54%
ðŸ”¹ Learning Rate after Epoch 8: 0.000010
Epoch 9/10 | Loss: 0.3736 | Train Accuracy: 87.03%
ðŸ”¹ Learning Rate after Epoch 9: 0.0