### Task 2: Use the converted weights and compare the fine-tuning results on CIFAR-10 dataset.
* Training from scratch
* ImageNet weights
* SimCLR pre-trained model 

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
import timm

# Define the transformation
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load CIFAR-10 dataset
train_dataset = datasets.CIFAR10(root='data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=100, shuffle=True, num_workers=4)

val_dataset = datasets.CIFAR10(root='data', train=False, download=True, transform=transform)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=100, shuffle=False, num_workers=4)

# Create the models
model_scratch = models.resnet50(pretrained=False)
model_imagenet = models.resnet50(pretrained=True)
model_simclr = timm.create_model('resnet50_simclr_cifar10', pretrained=True)

# Replace the last layer
num_ftrs = model_scratch.fc.in_features
model_scratch.fc = nn.Linear(num_ftrs, 10)
model_imagenet.fc = nn.Linear(num_ftrs, 10)
model_simclr.fc = nn.Linear(num_ftrs, 10)

# Move models to GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_scratch = model_scratch.to(device)
model_imagenet = model_imagenet.to(device)
model_simclr = model_simclr.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss().cuda()
optimizer_scratch = optim.SGD(model_scratch.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
optimizer_imagenet = optim.SGD(model_imagenet.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
optimizer_simclr = optim.SGD(model_simclr.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)

# Training function
def train_model(model, optimizer, num_epochs=10):
    model.train()
    loss_history = []
    for epoch in range(num_epochs):
        loss_train = 0
        for i, (inputs, targets) in enumerate(train_loader):
            inputs, targets = inputs.to(device), targets.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, targets)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_train += loss.item()
            loss_train.append(loss_train/len(train_loader))
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss_train/len(train_loader):.4f}")
    return loss_history

# Validation function
def validate_model(model):
    model.eval()
    correct = 0
    total = 0
    acc_history = []
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)

            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
    accuracy = 100 * correct / total
    acc_history.append(accuracy)
    print(f"Validation Accuracy: {accuracy:.2f}%")
    return acc_history

# Train and validate models
print("Training model from scratch:")
resnet50_no_pretrain_loos = train_model(model_scratch, optimizer_scratch)
resnet50_no_pretraint_acc = validate_model(model_scratch)

print("Training model with ImageNet pre-trained weights:")
resnet50_pretraint_loss = train_model(model_imagenet, optimizer_imagenet)
resnet50_pretraint_acc = validate_model(model_imagenet)

#print("Training model with SimCLR pre-trained weights:")
resnet50_simclr_loss = train_model(model_simclr, optimizer_simclr)
resnet50_simclr_acc = validate_model(model_simclr)

# Freezing/unfreezing layers experiment
def freeze_layers(model):
    for param in model.parameters():
        param.requires_grad = False

# Example of freezing layers
print("Freezing layers of the ImageNet pre-trained model:")
freeze_layers(model_imagenet)
model_imagenet.fc = nn.Linear(num_ftrs, 10).to(device)  # Ensure the final layer is trainable and moved to GPU
optimizer_imagenet = optim.SGD(model_imagenet.fc.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
resnet50_pretraint_freeze_loss = train_model(model_imagenet, optimizer_imagenet)
resnet50_pretraint_freeze_acc = validate_model(model_imagenet)

# Example of unfreezing layers after initial training
print("Unfreezing layers of the ImageNet pre-trained model:")
for param in model_imagenet.parameters():
    param.requires_grad = True
optimizer_imagenet = optim.SGD(model_imagenet.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
resnet50_pretraint_unfreeze_loss = train_model(model_imagenet, optimizer_imagenet)
resnet50_pretraint_unfreeze_acc= validate_model(model_imagenet)


Files already downloaded and verified
Files already downloaded and verified


config.json:   0%|          | 0.00/880 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/94.4M [00:00<?, ?B/s]

RuntimeError: Error(s) in loading state_dict for ResNet:
	size mismatch for conv1.weight: copying a param with shape torch.Size([64, 3, 3, 3]) from checkpoint, the shape in current model is torch.Size([64, 3, 7, 7]).