In [6]:

"""
Fine-tuning pretrained Swin Transformers (Tiny and Small) on CIFAR-100
and preparing for comparison with training from scratch. (Corrected)
"""


import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from transformers import SwinForImageClassification, AutoImageProcessor
import time
import copy
import pandas as pd
from tqdm.auto import tqdm # For progress bars


# --- Configuration ---
# Models to fine-tune
model_checkpoints = [
    "microsoft/swin-tiny-patch4-window7-224",
    "microsoft/swin-small-patch4-window7-224",
]


# Training Hyperparameters
BATCH_SIZE = 32 # As requested
EPOCHS = 3      # Fine-tune for 2-5 epochs (using 3 here, adjustable)
LR = 2e-5       # As requested (AdamW usually uses smaller LR for fine-tuning)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
NUM_CLASSES = 100 # For CIFAR-100


print(f"Using device: {DEVICE}")


# --- Data Loading and Preprocessing ---
# Swin models often pretrained on 224x224 images. Resize CIFAR-100.
processor = AutoImageProcessor.from_pretrained(model_checkpoints[0])
image_mean = processor.image_mean
image_std = processor.image_std
size = processor.size["height"] # Should be 224


normalize = transforms.Normalize(mean=image_mean, std=image_std)
_transform = transforms.Compose([
        transforms.Resize((size, size)),
        transforms.ToTensor(),
        normalize,
    ])


# Apply transforms to CIFAR-100
train_dataset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=_transform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True if DEVICE=='cuda' else False)


test_dataset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=_transform)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True if DEVICE=='cuda' else False)




# --- Model Loading and Modification ---
def load_and_prepare_model(checkpoint, num_labels, freeze_backbone=True):
    print(f"Loading model: {checkpoint}")
    model = SwinForImageClassification.from_pretrained(
        checkpoint,
        num_labels=num_labels,
        ignore_mismatched_sizes=True, # Necessary because we are replacing the head
    )


    # Freeze backbone if required
    if freeze_backbone:
        print("Freezing backbone weights...")
        for param in model.swin.parameters():
            param.requires_grad = False
        # Ensure the classifier head is trainable
        for param in model.classifier.parameters():
            param.requires_grad = True
    else:
        print("Training entire model (backbone unfrozen)...") # For scratch comparison later


    # Print trainable parameters
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Total Parameters: {total_params/1e6:.2f} M")
    print(f"Trainable Parameters: {trainable_params/1e6:.2f} M")


    return model


# --- Training and Evaluation Loop ---
def train_model(model, model_name, trainloader, testloader, optimizer, epochs, device):
    print(f"\n--- Fine-tuning {model_name} ---")
    model.to(device)
    criterion = nn.CrossEntropyLoss() # Define loss function inside
    results = {'train_loss': [], 'test_loss': [], 'test_acc': [], 'epoch_time': []}
    best_acc = 0.0


    for epoch in range(epochs):
        start_time = time.time()
        model.train()
        running_loss = 0.0
        progress_bar = tqdm(trainloader, desc=f"Epoch {epoch+1}/{epochs}", leave=False)
        for batch in progress_bar:
            # Assuming standard torchvision loader output (inputs, labels)
            inputs, labels = batch
            inputs, labels = inputs.to(device), labels.to(device)


            optimizer.zero_grad()


            # Transformers models usually return a dictionary-like object
            outputs = model(inputs)
            logits = outputs.logits # Extract logits


            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()


            running_loss += loss.item()
            # *** CORRECTED LINE BELOW ***
            progress_bar.set_postfix({'loss': loss.item()}) # Pass the float directly


        epoch_loss = running_loss / len(trainloader)
        results['train_loss'].append(epoch_loss)


        # Evaluation
        model.eval()
        test_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for batch in testloader:
                inputs, labels = batch
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                logits = outputs.logits
                loss = criterion(logits, labels)
                test_loss += loss.item()
                _, predicted = torch.max(logits.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()


        epoch_test_loss = test_loss / len(testloader)
        epoch_test_acc = 100 * correct / total
        results['test_loss'].append(epoch_test_loss)
        results['test_acc'].append(epoch_test_acc)


        if epoch_test_acc > best_acc:
             best_acc = epoch_test_acc


        end_time = time.time()
        epoch_duration = end_time - start_time
        results['epoch_time'].append(epoch_duration)


        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {epoch_loss:.4f}, Test Loss: {epoch_test_loss:.4f}, Test Acc: {epoch_test_acc:.2f}%, Time: {epoch_duration:.2f}s")


    print(f"Finished Training {model_name}. Best Test Accuracy: {best_acc:.2f}%")
    avg_epoch_time = sum(results['epoch_time']) / len(results['epoch_time']) if results['epoch_time'] else 0
    final_acc = results['test_acc'][-1] if results['test_acc'] else 0 # Use final epoch accuracy for report
    print(f"Final Epoch Test Accuracy: {final_acc:.2f}%")
    return final_acc, avg_epoch_time # Return final accuracy and avg time


# --- Main Execution ---
results_data = []


print("\n=== Processing Pretrained Swin Models ===")
for checkpoint in model_checkpoints:
    model_name = checkpoint.split('/')[-1]
    model = load_and_prepare_model(checkpoint, NUM_CLASSES, freeze_backbone=True)


    # Define optimizer for fine-tuning (only optimizing the head)
    # Use AdamW which is common for transformers
    optimizer = optim.AdamW(model.classifier.parameters(), lr=LR)


    # Train the model
    final_acc, avg_epoch_time = train_model(model, model_name, train_loader, test_loader, optimizer, EPOCHS, DEVICE)


    results_data.append({
        "Model Configuration": model_name + " (Fine-tuned)",
        "Avg Epoch Time (s)": f"{avg_epoch_time:.2f}",
        f"Test Acc (%) @{EPOCHS} epochs": f"{final_acc:.2f}"
    })
    del model # Free up memory
    if DEVICE == 'cuda': torch.cuda.empty_cache() # Clear CUDA cache




# --- Placeholder for Scratch Model Comparison ---
results_data.append({
    "Model Configuration": "Swin-Tiny (From Scratch - Placeholder)",
    "Avg Epoch Time (s)": "N/A", # Measure if implemented
    f"Test Acc (%) @{EPOCHS} epochs": "N/A" # Get from scratch run
})




# --- Display Summary Table ---
print("\n--- Results Summary (Fine-tuning vs Scratch) ---")
results_df = pd.DataFrame(results_data)
print(results_df.to_string(index=False))





Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Using device: cuda
Files already downloaded and verified
Files already downloaded and verified


Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-tiny-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([100]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([100, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



=== Processing Pretrained Swin Models ===
Loading model: microsoft/swin-tiny-patch4-window7-224
Freezing backbone weights...
Total Parameters: 27.60 M
Trainable Parameters: 0.08 M

--- Fine-tuning swin-tiny-patch4-window7-224 ---


                                                                         

Epoch 1/3, Train Loss: 4.0444, Test Loss: 3.4833, Test Acc: 47.31%, Time: 153.81s


                                                                         

Epoch 2/3, Train Loss: 3.0527, Test Loss: 2.6567, Test Acc: 58.63%, Time: 107.84s


                                                                         

Epoch 3/3, Train Loss: 2.3741, Test Loss: 2.1184, Test Acc: 62.47%, Time: 108.22s
Finished Training swin-tiny-patch4-window7-224. Best Test Accuracy: 62.47%
Final Epoch Test Accuracy: 62.47%
Loading model: microsoft/swin-small-patch4-window7-224


Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-small-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([100, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([100]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Freezing backbone weights...
Total Parameters: 48.91 M
Trainable Parameters: 0.08 M

--- Fine-tuning swin-small-patch4-window7-224 ---


                                                                         

Epoch 1/3, Train Loss: 3.9770, Test Loss: 3.3574, Test Acc: 53.54%, Time: 172.37s


                                                                         

Epoch 2/3, Train Loss: 2.8896, Test Loss: 2.4638, Test Acc: 63.23%, Time: 172.49s


                                                                         

Epoch 3/3, Train Loss: 2.1679, Test Loss: 1.9089, Test Acc: 66.97%, Time: 172.34s
Finished Training swin-small-patch4-window7-224. Best Test Accuracy: 66.97%
Final Epoch Test Accuracy: 66.97%

--- Results Summary (Fine-tuning vs Scratch) ---
                       Model Configuration Avg Epoch Time (s) Test Acc (%) @3 epochs
 swin-tiny-patch4-window7-224 (Fine-tuned)             123.29                  62.47
swin-small-patch4-window7-224 (Fine-tuned)             172.40                  66.97
    Swin-Tiny (From Scratch - Placeholder)                N/A                    N/A
