In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import os
from torchvision import models

# ✅ Define Model
class EfficientNetB0(nn.Module):
    def __init__(self):
        super(EfficientNetB0, self).__init__()
        self.model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.IMAGENET1K_V1)
        self.model.classifier[1] = nn.Linear(self.model.classifier[1].in_features, 10)  # Modify for CIFAR-10 classes

    def forward(self, x):
        return self.model(x)

# ✅ Load CIFAR-10 Dataset
def load_cifar10(batch_size=128):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))  
    ])
    
    trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)

    testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)

    return trainloader, testloader

# ✅ Training Function (with Checkpoint Saving)
def train_model(model, optimizer, criterion, num_epochs=100, start_epoch=1, checkpoint_path="checkpoint.pth"):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    trainloader, testloader = load_cifar10()
    
    print("\n===== Training Start =====")
    for epoch in range(start_epoch, num_epochs + 1):
        model.train()
        running_loss = 0.0

        for images, labels in trainloader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        avg_train_loss = running_loss / len(trainloader)

        # ✅ Evaluate on test set
        model.eval()
        correct_top1, correct_top5 = 0, 0
        total = 0
        test_loss = 0.0

        with torch.no_grad():
            for images, labels in testloader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                test_loss += loss.item()

                _, preds = torch.topk(outputs, 5, dim=1)
                correct_top1 += (preds[:, 0] == labels).sum().item()
                correct_top5 += (preds == labels.view(-1, 1)).sum().item()
                total += labels.size(0)

        avg_test_loss = test_loss / len(testloader)
        top1_error = 1 - (correct_top1 / total)
        top5_error = 1 - (correct_top5 / total)

        print(f'Epoch [{epoch}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Test Loss: {avg_test_loss:.4f}, Top-1 Error: {top1_error:.4f}, Top-5 Error: {top5_error:.4f}')

        # ✅ Save a checkpoint every 5 epochs & at last epoch
        if epoch % 5 == 0 or epoch == num_epochs:
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
            }, checkpoint_path)
            print(f"✅ Checkpoint saved at epoch {epoch}\n")

# ✅ Training Setup (Run This After Defining train_model)
num_epochs = 50
learning_rate = 0.001
checkpoint_path = "checkpoint.pth"

criterion = nn.CrossEntropyLoss()

# ✅ Initialize EfficientNet-B0
efficientnet_model = EfficientNetB0()
optimizer_effnet = optim.Adam(efficientnet_model.parameters(), lr=learning_rate)

# ✅ Resume training if checkpoint exists
if os.path.exists(checkpoint_path):
    checkpoint = torch.load(checkpoint_path)
    efficientnet_model.load_state_dict(checkpoint['model_state_dict'])
    optimizer_effnet.load_state_dict(checkpoint['optimizer_state_dict'])
    start_epoch = checkpoint['epoch'] + 1
    print(f"🔄 Resuming training from epoch {start_epoch}")
else:
    start_epoch = 1
    print("🚀 Starting training from epoch 1")

# ✅ Start or Resume Training
train_model(efficientnet_model, optimizer_effnet, criterion, num_epochs, start_epoch, checkpoint_path)


🚀 Starting training from epoch 1
Files already downloaded and verified
Files already downloaded and verified

===== Training Start =====
Epoch [1/50], Train Loss: 0.3327, Test Loss: 0.2019, Top-1 Error: 0.0651, Top-5 Error: 0.0015
Epoch [2/50], Train Loss: 0.1553, Test Loss: 0.1715, Top-1 Error: 0.0571, Top-5 Error: 0.0008
Epoch [3/50], Train Loss: 0.1138, Test Loss: 0.1876, Top-1 Error: 0.0584, Top-5 Error: 0.0012
Epoch [4/50], Train Loss: 0.0898, Test Loss: 0.1829, Top-1 Error: 0.0532, Top-5 Error: 0.0008
Epoch [5/50], Train Loss: 0.0774, Test Loss: 0.2069, Top-1 Error: 0.0617, Top-5 Error: 0.0014
✅ Checkpoint saved at epoch 5

Epoch [6/50], Train Loss: 0.0663, Test Loss: 0.1973, Top-1 Error: 0.0546, Top-5 Error: 0.0009
Epoch [7/50], Train Loss: 0.0602, Test Loss: 0.1886, Top-1 Error: 0.0509, Top-5 Error: 0.0009
Epoch [8/50], Train Loss: 0.0534, Test Loss: 0.1976, Top-1 Error: 0.0559, Top-5 Error: 0.0013
Epoch [9/50], Train Loss: 0.0498, Test Loss: 0.2179, Top-1 Error: 0.0604, Top-5 