In [2]:
!pip install datasets torch torchvision

Collecting datasets
  Downloading datasets-3.0.1-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.17-py310-none-any.whl.metadata (7.2 kB)
INFO: pip is looking at multiple versions of multiprocess to determine which version is compatible with other requirements. This could take a while.
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Downloading datasets-3.0.1-py3-none-any.whl (471 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m471.6/471.6 kB[0m [31m20.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.2 MB/s[0m eta [36m0:00:

In [3]:
import torch
import torch.nn as nn

model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14_lc')

in_features = model.linear_head.in_features
model.linear_head = nn.Linear(in_features, 200)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


Downloading: "https://github.com/facebookresearch/dinov2/zipball/main" to /root/.cache/torch/hub/main.zip
Downloading: "https://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_pretrain.pth" to /root/.cache/torch/hub/checkpoints/dinov2_vits14_pretrain.pth
100%|██████████| 84.2M/84.2M [00:00<00:00, 201MB/s]
Downloading: "https://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_linear4_head.pth" to /root/.cache/torch/hub/checkpoints/dinov2_vits14_linear4_head.pth
100%|██████████| 7.33M/7.33M [00:00<00:00, 154MB/s]


_LinearClassifierWrapper(
  (backbone): DinoVisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 384, kernel_size=(14, 14), stride=(14, 14))
      (norm): Identity()
    )
    (blocks): ModuleList(
      (0-11): 12 x NestedTensorBlock(
        (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
        (attn): MemEffAttention(
          (qkv): Linear(in_features=384, out_features=1152, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=384, out_features=384, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (ls1): LayerScale()
        (drop_path1): Identity()
        (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=384, out_features=1536, bias=True)
          (act): GELU(approximate='none')
          (fc2): Linear(in_features=1536, out_features=384, bias=True)
          (drop): Dropout(p=0.0, inplace=Fals

In [11]:
from datasets import load_dataset
from torchvision import transforms
import torch
from torch.utils.data import DataLoader
import os


dataset = load_dataset("zh-plus/tiny-imagenet")

In [5]:
import random

sample_size = 5000
random.seed(42)

train_data_sample = dataset['train'].select(random.sample(range(len(dataset['train'])), sample_size))
val_data_sample = dataset['valid'].select(random.sample(range(len(dataset['valid'])), sample_size // 10))


In [6]:

preprocess = transforms.Compose([
    transforms.Lambda(lambda img: img.convert("RGB") if img.mode != "RGB" else img),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


In [7]:

def preprocess_data(example):
    example['image'] = preprocess(example['image'])
    return example

train_data_processed = train_data_sample.map(preprocess_data)
val_data_processed = val_data_sample.map(preprocess_data)

train_loader = DataLoader(train_data_processed, batch_size=64, shuffle=True)
val_loader = DataLoader(val_data_processed, batch_size=64, shuffle=False)

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

In [9]:
def collate_fn(batch):

    images = torch.stack([torch.tensor(item['image']) for item in batch])
    labels = torch.tensor([item['label'] for item in batch])
    return images, labels

train_loader2 = DataLoader(train_data_processed, batch_size=64, shuffle=True, collate_fn=collate_fn)
val_loader2 = DataLoader(val_data_processed, batch_size=64, shuffle=False, collate_fn=collate_fn)






In [13]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


def accuracy(output, target, topk=(1, 5)):
    """计算Top-1和Top-5准确率"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res


In [14]:

def train_model(model, train_loader, criterion, optimizer, device, epochs=10):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        top1_correct = 0
        top5_correct = 0
        total = 0
        for batch in train_loader:
            inputs, labels = batch
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            top1, top5 = accuracy(outputs, labels, topk=(1, 5))
            top1_correct += top1.item()
            top5_correct += top5.item()
            total += labels.size(0)

        avg_loss = running_loss / len(train_loader)
        avg_top1_acc = top1_correct / len(train_loader)
        avg_top5_acc = top5_correct / len(train_loader)

        print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, Top-1 Accuracy: {avg_top1_acc:.2f}%, Top-5 Accuracy: {avg_top5_acc:.2f}%")

def validate_model(model, val_loader, device):
    model.eval()
    running_loss = 0.0
    top1_correct = 0
    top5_correct = 0
    total = 0
    with torch.no_grad():
        for batch in val_loader:
            inputs, labels = batch
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item()

            top1, top5 = accuracy(outputs, labels, topk=(1, 5))
            top1_correct += top1.item()
            top5_correct += top5.item()
            total += labels.size(0)

    avg_loss = running_loss / len(val_loader)
    avg_top1_acc = top1_correct / len(val_loader)
    avg_top5_acc = top5_correct / len(val_loader)

    print(f"Validation Loss: {avg_loss:.4f}, Top-1 Accuracy: {avg_top1_acc:.2f}%, Top-5 Accuracy: {avg_top5_acc:.2f}%")
    return avg_top1_acc, avg_top5_acc


In [15]:

train_model(model, train_loader2, criterion, optimizer, device, epochs=10)

validate_model(model, val_loader2, device)

Epoch [1/10], Loss: 5.8905, Top-1 Accuracy: 0.51%, Top-5 Accuracy: 2.67%
Epoch [2/10], Loss: 5.2828, Top-1 Accuracy: 1.09%, Top-5 Accuracy: 4.79%
Epoch [3/10], Loss: 5.1537, Top-1 Accuracy: 1.50%, Top-5 Accuracy: 6.63%
Epoch [4/10], Loss: 5.0789, Top-1 Accuracy: 1.86%, Top-5 Accuracy: 8.09%
Epoch [5/10], Loss: 4.9885, Top-1 Accuracy: 2.85%, Top-5 Accuracy: 10.07%
Epoch [6/10], Loss: 4.9242, Top-1 Accuracy: 3.32%, Top-5 Accuracy: 11.57%
Epoch [7/10], Loss: 4.8765, Top-1 Accuracy: 4.23%, Top-5 Accuracy: 13.65%
Epoch [8/10], Loss: 4.7939, Top-1 Accuracy: 4.15%, Top-5 Accuracy: 14.50%
Epoch [9/10], Loss: 4.7436, Top-1 Accuracy: 4.47%, Top-5 Accuracy: 15.64%
Epoch [10/10], Loss: 4.7111, Top-1 Accuracy: 5.16%, Top-5 Accuracy: 16.65%
Validation Loss: 4.8580, Top-1 Accuracy: 6.58%, Top-5 Accuracy: 15.31%


(6.5805288553237915, 15.309495210647583)

In [18]:

def train_model_5(model, train_loader, criterion, optimizer, device, epochs=5):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        top1_correct = 0
        top5_correct = 0
        total = 0
        for batch in train_loader:
            inputs, labels = batch
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            top1, top5 = accuracy(outputs, labels, topk=(1, 5))
            top1_correct += top1.item()
            top5_correct += top5.item()
            total += labels.size(0)

        avg_loss = running_loss / len(train_loader)
        avg_top1_acc = top1_correct / len(train_loader)
        avg_top5_acc = top5_correct / len(train_loader)

        print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, Top-1 Accuracy: {avg_top1_acc:.2f}%, Top-5 Accuracy: {avg_top5_acc:.2f}%")

def validate_model_5(model, val_loader, device):
    model.eval()
    running_loss = 0.0
    top1_correct = 0
    top5_correct = 0
    total = 0
    with torch.no_grad():
        for batch in val_loader:
            inputs, labels = batch
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item()

            top1, top5 = accuracy(outputs, labels, topk=(1, 5))
            top1_correct += top1.item()
            top5_correct += top5.item()
            total += labels.size(0)

    avg_loss = running_loss / len(val_loader)
    avg_top1_acc = top1_correct / len(val_loader)
    avg_top5_acc = top5_correct / len(val_loader)

    print(f"Validation Loss: {avg_loss:.4f}, Top-1 Accuracy: {avg_top1_acc:.2f}%, Top-5 Accuracy: {avg_top5_acc:.2f}%")
    return avg_top1_acc, avg_top5_acc


In [19]:

train_model_5(model, train_loader2, criterion, optimizer, device, epochs=5)

validate_model_5(model, val_loader2, device)

Epoch [1/5], Loss: 4.6283, Top-1 Accuracy: 6.13%, Top-5 Accuracy: 19.11%
Epoch [2/5], Loss: 4.6255, Top-1 Accuracy: 6.23%, Top-5 Accuracy: 18.67%
Epoch [3/5], Loss: 4.6199, Top-1 Accuracy: 5.64%, Top-5 Accuracy: 18.02%
Epoch [4/5], Loss: 4.6431, Top-1 Accuracy: 5.44%, Top-5 Accuracy: 18.85%
Epoch [5/5], Loss: 4.6106, Top-1 Accuracy: 6.31%, Top-5 Accuracy: 19.03%
Validation Loss: 4.8926, Top-1 Accuracy: 6.78%, Top-5 Accuracy: 17.10%


(6.7758413553237915, 17.097355604171753)

In [None]:

train_data = dataset['train']
val_data = dataset['valid']

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
val_loader = DataLoader(val_data, batch_size=64, shuffle=False)
