In [None]:
!pip install transformers timm

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->timm)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->timm)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->timm)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->timm)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->timm)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch->timm)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch->tim

In [None]:

from transformers import SwinForImageClassification, SwinConfig, AutoImageProcessor
from torchvision.datasets import CIFAR100
from torchvision import transforms
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def load_data(processor):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=processor.image_mean, std=processor.image_std)
    ])
    train_set = CIFAR100(root='./data', train=True, transform=transform, download=True)
    test_set = CIFAR100(root='./data', train=False, transform=transform, download=True)
    return DataLoader(train_set, batch_size=32, shuffle=True), DataLoader(test_set, batch_size=32)

def freeze_backbone(model):
    for param in model.swin.parameters():
        param.requires_grad = False

def evaluate(model, test_loader):
    model.eval()
    correct = total = 0
    with torch.no_grad():
        for imgs, labels in test_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs).logits
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    return 100 * correct / total

def train(model, train_loader, test_loader, epochs=3, lr=2e-5):
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    train_times = []
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        start = time.time()
        for imgs, labels in train_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs).logits
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        end = time.time()
        train_times.append(end - start)
        acc = evaluate(model, test_loader)
        print(f"Epoch {epoch+1}/{epochs} | Loss: {total_loss:.2f} | Val Acc: {acc:.2f}% | Time: {train_times[-1]:.2f}s")
    return sum(train_times)/epochs, acc

def run_experiment(name, model_type="pretrained", train_flag=True):
    print(f"\n=== Running {name} ({model_type}) ===")
    if model_type == "pretrained":
        model = SwinForImageClassification.from_pretrained(name, num_labels=100, ignore_mismatched_sizes=True)
        processor = AutoImageProcessor.from_pretrained(name)
        freeze_backbone(model)
    elif model_type == "scratch":
        config = SwinConfig.from_pretrained(name)
        config.num_labels = 100
        model = SwinForImageClassification(config)
        processor = AutoImageProcessor.from_pretrained(name)

    train_loader, test_loader = load_data(processor)

    if train_flag:
        avg_time, acc = train(model, train_loader, test_loader, epochs=3)
    else:
        acc = evaluate(model.to(device), test_loader)
        avg_time = 0.0

    return {
        "Model": name + (" (scratch)" if model_type == "scratch" else ""),
        "Accuracy": acc,
        "AvgTime": avg_time
    }

results = []
results.append(run_experiment("microsoft/swin-tiny-patch4-window7-224", "scratch", train_flag=False))
results.append(run_experiment("microsoft/swin-tiny-patch4-window7-224", "pretrained", train_flag=True))
results.append(run_experiment("microsoft/swin-small-patch4-window7-224", "pretrained", train_flag=True))


import pandas as pd
df = pd.DataFrame(results)
print("\n=== Final Results ===")
print(df)


=== Running microsoft/swin-tiny-patch4-window7-224 (scratch) ===

=== Running microsoft/swin-tiny-patch4-window7-224 (pretrained) ===


Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-tiny-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([100]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([100, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3 | Loss: 6345.68 | Val Acc: 46.84% | Time: 275.27s
Epoch 2/3 | Loss: 4781.13 | Val Acc: 58.29% | Time: 276.88s
Epoch 3/3 | Loss: 3714.58 | Val Acc: 62.62% | Time: 277.80s

=== Running microsoft/swin-small-patch4-window7-224 (pretrained) ===


config.json:   0%|          | 0.00/71.8k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/199M [00:00<?, ?B/s]

Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-small-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([100, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([100]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


preprocessor_config.json:   0%|          | 0.00/255 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/199M [00:00<?, ?B/s]

Epoch 1/3 | Loss: 6258.93 | Val Acc: 52.20% | Time: 443.10s
Epoch 2/3 | Loss: 4536.26 | Val Acc: 63.44% | Time: 443.19s
Epoch 3/3 | Loss: 3396.60 | Val Acc: 66.71% | Time: 443.46s

=== Final Results ===
                                              Model  Accuracy     AvgTime
0  microsoft/swin-tiny-patch4-window7-224 (scratch)      0.74    0.000000
1            microsoft/swin-tiny-patch4-window7-224     62.62  276.650857
2           microsoft/swin-small-patch4-window7-224     66.71  443.253162
