In [None]:
import torch
import torchvision.models as models
from torch.utils.data import DataLoader
from torch.utils.data import random_split
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Basic transform for normalization
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load training and test sets
trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

In [None]:
train_size = 45000
val_size = 5000
train_subset, val_subset = random_split(trainset, [train_size, val_size])

In [None]:
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_train)

In [None]:
trainloader = DataLoader(train_subset, batch_size=128, shuffle=True, num_workers=2)
valloader = DataLoader(val_subset, batch_size=128, shuffle=False, num_workers=2)
testloader = DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)

In [None]:
model = models.resnet18(pretrained=False)  # Start from scratch for a true baseline
model.fc = torch.nn.Linear(model.fc.in_features, 100)  # Adjust for 100 classes

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [None]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [None]:
# Initialize the scheduler
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5)

for epoch in range(50):  # Adjust epochs as needed
    model.train()
    running_loss = 0.0
    for inputs, labels in trainloader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(trainloader)}")

    # Validation (optional, to monitor progress)
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in valloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            val_loss += criterion(outputs, labels).item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    avg_val_loss = val_loss/len(valloader)
    print(f"Validation Accuracy: {100 * correct / total}%, Loss: {avg_val_loss}")
    
    # Update learning rate based on validation loss
    scheduler.step(avg_val_loss)

In [None]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in testloader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f"Baseline Test Accuracy: {100 * correct / total}%")

In [None]:
torch.save(model.state_dict(), 'baseline_cifar100.pth')