# Installation

In [None]:
# Installation
!pip install -q torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118
!pip install -q transformers
!pip install -q scikit-learn
!pip install -q accelerate
!pip install -q torchmetrics
!pip install -q torch torchvision transformers scikit-learn accelerate

In [None]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
from transformers import AutoImageProcessor, AutoModel, AutoModelForImageClassification
from transformers import AutoConfig, AutoModelForImageClassification
from torch.optim.lr_scheduler import ReduceLROnPlateau
from PIL import Image
import numpy as np
from tqdm import tqdm
import random
import os
import torch.optim as optim # import optim
from torch.amp import autocast, GradScaler
import matplotlib.pyplot as plt
from umap import UMAP  # pip install umap-learn
import matplotlib.pyplot as plt

In [None]:
print("CUDA Available:", torch.cuda.is_available())
print("Device:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU")

In [None]:
# Set random seed to ensure reproducible results
def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

# FINE-TUEN with Scheduler (Freeze Backbone)

In [None]:
#transform （keep in PIL foramt)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Lambda(lambda x: x.convert("RGB"))  # Make sure it is in PIL format
])

In [None]:
# Customize collate_fn to keep the image format as is
def collate_pil(batch):
    images, labels = zip(*batch)
    return list(images), torch.tensor(labels)

In [None]:
random.seed(42)

# dataset download
full_train = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
test_set = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

val_ratio = 0.1
val_size = int(len(full_train) * val_ratio)
train_size = len(full_train) - val_size

train_set, val_set = random_split(full_train, [train_size, val_size])

In [None]:
# try batch size as 128, and use num_worker to accelerate
train_loader = DataLoader(train_set, batch_size=128, shuffle=True, collate_fn=collate_pil, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_set, batch_size=128, shuffle=False, collate_fn=collate_pil, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_set, batch_size=128, shuffle=False, collate_fn=collate_pil, num_workers=2, pin_memory=True)

In [None]:
# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load processor from base model (not fine-tuned one, because processor没变)
processor = AutoImageProcessor.from_pretrained("facebook/dinov2-base")

# Load base DINOv2 model structure (100-class head)
# This model instance is set up for 100 classes, which is correct for CIFAR-100
model = AutoModelForImageClassification.from_pretrained(
    "facebook/dinov2-base",
    num_labels=100,
    ignore_mismatched_sizes=True
).to(device)

# Load fine-tuned weights from CIFAR-10 training
# Load the state_dict first
cifar10_state_dict = torch.load("/content/drive/MyDrive/rec_model/dinov2/dinov2_finetuned_cifar10.pth")

# Create a new state_dict excluding the classifier weights
# This is necessary because the saved state_dict's classifier is for 10 classes
# while the current model's classifier is for 100 classes.
filtered_state_dict = {k: v for k, v in cifar10_state_dict.items() if 'classifier' not in k}

# Load the filtered state_dict into the model
# strict=False is still useful here in case of other minor mismatches,
# but filtering the classifier is the key step for this specific error.
model.load_state_dict(filtered_state_dict, strict=False)

# Freeze selected layers BEFORE defining optimizer
for name, param in model.named_parameters():
    if "encoder.layer" in name:
        layer_num = int(name.split("encoder.layer.")[1].split(".")[0])
        if layer_num < 9:
            param.requires_grad = False

optimizer = optim.AdamW(model.parameters(), lr=5e-5) # use optim.AdamW
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2, verbose=True)
loss_fn = nn.CrossEntropyLoss()

# Initialize GradScaler for automatic mixed precision
scaler = GradScaler() # Add this line to initialize GradScaler

In [None]:
def evaluate(model, loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for imgs, labels in loader:
            inputs = processor(images=imgs, return_tensors="pt").to(device)
            labels = labels.to(device)
            outputs = model(**inputs)
            preds = outputs.logits.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    return correct / total


In [None]:
best_val_acc = 0.0
no_improvement = 0
early_stop_patience = 5  # Set patience as 5 to prevent early stopping
save_path = "/content/drive/MyDrive/cifar100model/dinov2_finetuned_cifar100.pth"

train_losses = []
train_accuracies = []
val_accuracies = []
lr_history = []

for epoch in range(40): # Beacause I already had early stopping, I used 40 to try
    model.train()
    # Initialize
    total_loss = 0
    epoch_loss = 0
    correct = 0
    total = 0
    correct_train = 0
    total_train = 0

    for imgs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
        inputs = processor(images=imgs, return_tensors="pt").to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        with autocast("cuda"):
            outputs = model(**inputs)
            loss = loss_fn(outputs.logits, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        epoch_loss += loss.item()

        # Train accuracy tracking
        preds = outputs.logits.argmax(dim=1)
        correct_train += (preds == labels).sum().item()
        total_train += labels.size(0)

    avg_loss = epoch_loss / len(train_loader)
    train_acc = correct_train / total_train
    val_acc = evaluate(model, val_loader)

    # Log values
    train_losses.append(avg_loss)
    train_accuracies.append(train_acc)
    val_accuracies.append(val_acc)
    scheduler.step(val_acc)
    lr_history.append(optimizer.param_groups[0]['lr'])

    print(f"[Epoch {epoch+1}] Avg Loss: {avg_loss:.4f} | Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f}")

    current_lr = optimizer.param_groups[0]['lr']
    print(f"[Epoch {epoch+1}] Current LR: {current_lr:.6f}")

    if val_acc > best_val_acc:
        print(f"New best val acc! Saving model to {save_path}")
        best_val_acc = val_acc
        no_improvement = 0
        torch.save(model.state_dict(), save_path)
    else:
        no_improvement += 1
        print(f"No improvement for {no_improvement} epoch(s).")

    if no_improvement >= early_stop_patience:
        print("Early stopping triggered.")
        break

In [None]:
plt.plot(train_accuracies, label="Train Accuracy")
plt.plot(val_accuracies, label="Validation Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Training vs Validation Accuracy")
plt.legend()
plt.grid(True)
plt.show()


In [None]:
plt.plot(train_losses, label="Training Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training Loss Curve")
plt.grid(True)
plt.show()


In [None]:
model.load_state_dict(torch.load(save_path))
test_acc = evaluate(model, test_loader)
print(f"Final Test Accuracy: {test_acc:.4f}")

In [None]:
import pandas as pd

log_df = pd.DataFrame({
    "epoch": list(range(1, len(train_losses)+1)),
    "train_loss": train_losses,
    "train_acc": train_accuracies,
    "val_acc": val_accuracies,
    "lr": lr_history
})

log_df.to_csv("/content/drive/MyDrive/cifar100model/dinov2_training_log.csv", index=False)
log_df.head()