## 深度学习报告
作业报告基于B榜最优分数编写，用中文完成（可附带英文版本），整合在Jupyter notebook文件，至少包括以下部分：
- 数据预处理
- 数据可视化
- 模型构建
- 模型训练
- 模型评估
- 陈述总结
- 参考文献，注意全部列出的参考文献需在文中引用。


---
# 1. 库


In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.models as models
from torchtoolbox.transform import Cutout
import os
from PIL import Image 
import pandas as pd
from PIL import Image 
import pandas
import datetime
from torch.utils.data import random_split
from torch.utils.data import DataLoader, random_split, Subset
from torchvision.transforms import AutoAugment, AutoAugmentPolicy

from transformers import ViTFeatureExtractor, ViTForImageClassification,ViTImageProcessor


  from .autonotebook import tqdm as notebook_tqdm


---
# 2. 数据导入与处理

## 2.1 数据预处理

In [2]:
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    AutoAugment(AutoAugmentPolicy.IMAGENET),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.RandomVerticalFlip(),
    transforms.RandomAffine(degrees=15, translate=(0.1, 0.1)),
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
    Cutout(), #遮挡增强
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

## 2.2 学号信息

In [3]:
#22211360121-李凯荣-22人工智能1班
student_id = '22211360121'
#生成路径
subdir = ''

## 2.3 训练数据集

In [4]:
# 加载数据集
full_dataset = torchvision.datasets.ImageFolder(root='new data/train', transform=transform)


In [5]:
# 训练数据集划分
train_size=int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

In [6]:
#应用数据增强
val_dataset=Subset(full_dataset,val_dataset.indices)
val_dataset.dataset.transform=val_transforms

In [7]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4,pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4, pin_memory=True)

In [8]:
print(f"训练集大小: {len(train_dataset)}, 验证集大小: {len(val_dataset)}")

训练集大小: 2844, 验证集大小: 711


In [9]:
print(f"Number of batches in train_loader: {len(train_loader)}")

Number of batches in train_loader: 89


In [10]:
print(f"Dataset size: {len(train_loader.dataset)}")

Dataset size: 2844


## 2.4 测试数据集 A

In [11]:
# 加载测试集
test_folder = 'new data/testA'
test_images = [img for img in os.listdir(test_folder) if img.endswith('.jpg')]

## 2.5 其他

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


---
# 3. 数据可视化

---
# 4. 模型构建

## 4.1 模型

In [13]:
model = ViTForImageClassification.from_pretrained(
    'google/vit-large-patch16-224-in21k',
    num_labels=100,  # 100个鸟类类别
    ignore_mismatched_sizes=True,
)

processor = ViTImageProcessor.from_pretrained("google/vit-large-patch16-224-in21k")

Some weights of the model checkpoint at google/vit-large-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-large-patch16-224-in21k and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
# 冻结除最后4层外的所有层
for name, param in model.named_parameters():
    # 解冻分类头层
    if 'classifier' in name:
        param.requires_grad = True
    # 解冻最后4个Transformer块
    elif 'encoder.layer.20' in name or 'encoder.layer.21' in name or \
        'encoder.layer.22' in name or 'encoder.layer.23' in name:
        param.requires_grad = True
    else:
        param.requires_grad = False  # 冻结其他层

# 将模型移至设备
model = model.to(device)

## 4.2 损失函数与优化器

In [15]:
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(
    model.parameters(), 
    lr=1e-5,  # 更小的学习率
    weight_decay=0.05,  # 更强的权重衰减
    eps=1e-8  # 数值稳定性
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer, 
    T_max=50,  # 半周期长度
    eta_min=1e-6  # 最小学习率
)

---
# 5. 模型训练

## 5.1 设备

In [16]:
#使用GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

## 5. 训练

In [17]:
#早停机制
class EarlyStopping:
    def __init__(self, patience=5, verbose=True, delta=0, path='best_model.pth'):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = float('inf')
        self.delta = delta
        self.path = path
        
    def __call__(self, val_loss, model):
        score = -val_loss
        
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter}/{self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0
            
    def save_checkpoint(self, val_loss, model):
        if self.verbose:
            print(f'验证损失改善 ({self.val_loss_min:.4f} → {val_loss:.4f}). 保存模型...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss

# 初始化时指定保存路径
early_stopping = EarlyStopping(patience=5, verbose=True, path='best_model.pth')

In [18]:
from tqdm import tqdm
#轮数
num_epochs=100

# 训练模型
for epoch in range(num_epochs):  # 假设训练5个epoch
    print(f"Starting epoch {epoch+1}/{num_epochs}")
    model.train()
    train_loss=0.0
    train_correct=0
    train_total=0
    
    
    train_bar = tqdm(train_loader, desc=f"Epoch {epoch+1} Training")

    for inputs, labels in train_bar:
        inputs = inputs.to(device)  # 将输入数据移动到 GPU
        labels = labels.to(device)  # 将标签数据移动到 GPU

        optimizer.zero_grad() #清零梯度
        outputs = model(inputs) #前向传播
        logits=outputs.logits

        loss = criterion(logits, labels) #损失计算
        loss.backward() #反向传播
        optimizer.step() #更新参数

        train_loss+=loss.item()
        _, predicted = torch.max(logits, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()

        # 更新进度条
        train_bar.set_postfix(loss=loss.item())
        train_bar.update(1)

    train_loss = train_loss / len(train_loader)
    train_acc = 100 * train_correct / train_total
    
    # 验证阶段
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    
    val_bar = tqdm(val_loader, desc="Validating")
    with torch.no_grad():
        for inputs, labels in val_bar:
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            logits=outputs.logits

            loss = criterion(logits, labels)
            
            val_loss += loss.item()
            _, predicted = torch.max(logits, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
            
            val_bar.set_postfix(loss=loss.item())

    val_loss = val_loss / len(val_loader)
    val_acc = 100 * val_correct / val_total
    
    print(f'Epoch {epoch+1}/{num_epochs}')
    print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
    print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')
    
    scheduler.step()

    # 早停机制检查
    early_stopping(val_loss, model)
    if early_stopping.early_stop:
        print("早停触发，停止训练")
        break

    



Starting epoch 1/100


Epoch 1 Training: 100%|██████████| 89/89 [01:40<00:00,  1.13s/it, loss=4.13]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.63s/it, loss=4.02]


Epoch 1/100
Train Loss: 4.3848, Train Acc: 10.69%
Val Loss: 4.0916, Val Acc: 28.69%
验证损失改善 (inf → 4.0916). 保存模型...
Starting epoch 2/100


Epoch 2 Training: 100%|██████████| 89/89 [01:40<00:00,  1.13s/it, loss=3.31]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.61s/it, loss=3.37]


Epoch 2/100
Train Loss: 3.6953, Train Acc: 54.68%
Val Loss: 3.4559, Val Acc: 61.04%
验证损失改善 (4.0916 → 3.4559). 保存模型...
Starting epoch 3/100


Epoch 3 Training: 100%|██████████| 89/89 [01:38<00:00,  1.11s/it, loss=2.72]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=2.83]


Epoch 3/100
Train Loss: 2.9657, Train Acc: 79.64%
Val Loss: 2.8572, Val Acc: 72.29%
验证损失改善 (3.4559 → 2.8572). 保存模型...
Starting epoch 4/100


Epoch 4 Training: 100%|██████████| 89/89 [01:38<00:00,  1.11s/it, loss=2.14]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=2.44]


Epoch 4/100
Train Loss: 2.3204, Train Acc: 87.55%
Val Loss: 2.3706, Val Acc: 77.64%
验证损失改善 (2.8572 → 2.3706). 保存模型...
Starting epoch 5/100


Epoch 5 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=1.53]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=2.1] 


Epoch 5/100
Train Loss: 1.8053, Train Acc: 91.21%
Val Loss: 1.9938, Val Acc: 80.45%
验证损失改善 (2.3706 → 1.9938). 保存模型...
Starting epoch 6/100


Epoch 6 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=1.18]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.88]


Epoch 6/100
Train Loss: 1.4094, Train Acc: 93.04%
Val Loss: 1.7132, Val Acc: 82.56%
验证损失改善 (1.9938 → 1.7132). 保存模型...
Starting epoch 7/100


Epoch 7 Training: 100%|██████████| 89/89 [01:40<00:00,  1.13s/it, loss=1.04] 
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.64]


Epoch 7/100
Train Loss: 1.1157, Train Acc: 94.62%
Val Loss: 1.5074, Val Acc: 84.39%
验证损失改善 (1.7132 → 1.5074). 保存模型...
Starting epoch 8/100


Epoch 8 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.854]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.63s/it, loss=1.52]


Epoch 8/100
Train Loss: 0.8927, Train Acc: 95.92%
Val Loss: 1.3530, Val Acc: 85.09%
验证损失改善 (1.5074 → 1.3530). 保存模型...
Starting epoch 9/100


Epoch 9 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.659]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.4] 


Epoch 9/100
Train Loss: 0.7239, Train Acc: 97.15%
Val Loss: 1.2352, Val Acc: 85.65%
验证损失改善 (1.3530 → 1.2352). 保存模型...
Starting epoch 10/100


Epoch 10 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.568]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.32]


Epoch 10/100
Train Loss: 0.5951, Train Acc: 97.75%
Val Loss: 1.1429, Val Acc: 86.22%
验证损失改善 (1.2352 → 1.1429). 保存模型...
Starting epoch 11/100


Epoch 11 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.361]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.63s/it, loss=1.24] 


Epoch 11/100
Train Loss: 0.4947, Train Acc: 98.31%
Val Loss: 1.0701, Val Acc: 86.50%
验证损失改善 (1.1429 → 1.0701). 保存模型...
Starting epoch 12/100


Epoch 12 Training: 100%|██████████| 89/89 [01:39<00:00,  1.11s/it, loss=0.426]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.17] 


Epoch 12/100
Train Loss: 0.4153, Train Acc: 98.80%
Val Loss: 1.0137, Val Acc: 86.78%
验证损失改善 (1.0701 → 1.0137). 保存模型...
Starting epoch 13/100


Epoch 13 Training: 100%|██████████| 89/89 [01:38<00:00,  1.11s/it, loss=0.451]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.09] 


Epoch 13/100
Train Loss: 0.3527, Train Acc: 99.12%
Val Loss: 0.9712, Val Acc: 87.20%
验证损失改善 (1.0137 → 0.9712). 保存模型...
Starting epoch 14/100


Epoch 14 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.561]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.65s/it, loss=1.12] 


Epoch 14/100
Train Loss: 0.3025, Train Acc: 99.47%
Val Loss: 0.9449, Val Acc: 86.22%
验证损失改善 (0.9712 → 0.9449). 保存模型...
Starting epoch 15/100


Epoch 15 Training: 100%|██████████| 89/89 [01:39<00:00,  1.11s/it, loss=0.202]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.12] 


Epoch 15/100
Train Loss: 0.2605, Train Acc: 99.61%
Val Loss: 0.9099, Val Acc: 86.64%
验证损失改善 (0.9449 → 0.9099). 保存模型...
Starting epoch 16/100


Epoch 16 Training: 100%|██████████| 89/89 [01:39<00:00,  1.11s/it, loss=0.195]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.11] 


Epoch 16/100
Train Loss: 0.2267, Train Acc: 99.72%
Val Loss: 0.8809, Val Acc: 87.20%
验证损失改善 (0.9099 → 0.8809). 保存模型...
Starting epoch 17/100


Epoch 17 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.202]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.63s/it, loss=1.11] 


Epoch 17/100
Train Loss: 0.1994, Train Acc: 99.79%
Val Loss: 0.8613, Val Acc: 87.06%
验证损失改善 (0.8809 → 0.8613). 保存模型...
Starting epoch 18/100


Epoch 18 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.167]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.63s/it, loss=1.07] 


Epoch 18/100
Train Loss: 0.1769, Train Acc: 99.86%
Val Loss: 0.8395, Val Acc: 86.64%
验证损失改善 (0.8613 → 0.8395). 保存模型...
Starting epoch 19/100


Epoch 19 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.137]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.05] 


Epoch 19/100
Train Loss: 0.1580, Train Acc: 99.89%
Val Loss: 0.8247, Val Acc: 86.78%
验证损失改善 (0.8395 → 0.8247). 保存模型...
Starting epoch 20/100


Epoch 20 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.188]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.63s/it, loss=1.08] 


Epoch 20/100
Train Loss: 0.1425, Train Acc: 99.89%
Val Loss: 0.8111, Val Acc: 86.64%
验证损失改善 (0.8247 → 0.8111). 保存模型...
Starting epoch 21/100


Epoch 21 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.134] 
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.64s/it, loss=1.08] 


Epoch 21/100
Train Loss: 0.1294, Train Acc: 99.93%
Val Loss: 0.8007, Val Acc: 87.34%
验证损失改善 (0.8111 → 0.8007). 保存模型...
Starting epoch 22/100


Epoch 22 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.141] 
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.63s/it, loss=1.1]  


Epoch 22/100
Train Loss: 0.1183, Train Acc: 99.96%
Val Loss: 0.7929, Val Acc: 86.78%
验证损失改善 (0.8007 → 0.7929). 保存模型...
Starting epoch 23/100


Epoch 23 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.0998]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.63s/it, loss=1.1]  


Epoch 23/100
Train Loss: 0.1089, Train Acc: 99.96%
Val Loss: 0.7813, Val Acc: 86.64%
验证损失改善 (0.7929 → 0.7813). 保存模型...
Starting epoch 24/100


Epoch 24 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.109] 
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.64s/it, loss=1.1]  


Epoch 24/100
Train Loss: 0.1009, Train Acc: 100.00%
Val Loss: 0.7712, Val Acc: 87.34%
验证损失改善 (0.7813 → 0.7712). 保存模型...
Starting epoch 25/100


Epoch 25 Training: 100%|██████████| 89/89 [01:41<00:00,  1.14s/it, loss=0.0911]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.63s/it, loss=1.1]  


Epoch 25/100
Train Loss: 0.0939, Train Acc: 100.00%
Val Loss: 0.7695, Val Acc: 86.92%
验证损失改善 (0.7712 → 0.7695). 保存模型...
Starting epoch 26/100


Epoch 26 Training: 100%|██████████| 89/89 [01:40<00:00,  1.13s/it, loss=0.0829]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.63s/it, loss=1.11] 


Epoch 26/100
Train Loss: 0.0878, Train Acc: 100.00%
Val Loss: 0.7614, Val Acc: 87.20%
验证损失改善 (0.7695 → 0.7614). 保存模型...
Starting epoch 27/100


Epoch 27 Training: 100%|██████████| 89/89 [01:42<00:00,  1.16s/it, loss=0.077] 
Validating: 100%|██████████| 23/23 [00:33<00:00,  1.44s/it, loss=1.1]  


Epoch 27/100
Train Loss: 0.0825, Train Acc: 100.00%
Val Loss: 0.7546, Val Acc: 87.20%
验证损失改善 (0.7614 → 0.7546). 保存模型...
Starting epoch 28/100


Epoch 28 Training: 100%|██████████| 89/89 [01:41<00:00,  1.14s/it, loss=0.0752]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.64s/it, loss=1.11] 


Epoch 28/100
Train Loss: 0.0780, Train Acc: 100.00%
Val Loss: 0.7497, Val Acc: 87.20%
验证损失改善 (0.7546 → 0.7497). 保存模型...
Starting epoch 29/100


Epoch 29 Training: 100%|██████████| 89/89 [01:40<00:00,  1.12s/it, loss=0.107] 
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.65s/it, loss=1.11] 


Epoch 29/100
Train Loss: 0.0739, Train Acc: 100.00%
Val Loss: 0.7449, Val Acc: 86.78%
验证损失改善 (0.7497 → 0.7449). 保存模型...
Starting epoch 30/100


Epoch 30 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.0673]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.64s/it, loss=1.1]  


Epoch 30/100
Train Loss: 0.0703, Train Acc: 100.00%
Val Loss: 0.7405, Val Acc: 86.64%
验证损失改善 (0.7449 → 0.7405). 保存模型...
Starting epoch 31/100


Epoch 31 Training: 100%|██████████| 89/89 [01:40<00:00,  1.13s/it, loss=0.0585]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.64s/it, loss=1.1]  


Epoch 31/100
Train Loss: 0.0671, Train Acc: 100.00%
Val Loss: 0.7364, Val Acc: 86.92%
验证损失改善 (0.7405 → 0.7364). 保存模型...
Starting epoch 32/100


Epoch 32 Training: 100%|██████████| 89/89 [01:40<00:00,  1.13s/it, loss=0.0566]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.65s/it, loss=1.1]  


Epoch 32/100
Train Loss: 0.0643, Train Acc: 100.00%
Val Loss: 0.7324, Val Acc: 87.06%
验证损失改善 (0.7364 → 0.7324). 保存模型...
Starting epoch 33/100


Epoch 33 Training: 100%|██████████| 89/89 [01:42<00:00,  1.16s/it, loss=0.0777]
Validating: 100%|██████████| 23/23 [00:39<00:00,  1.73s/it, loss=1.1]  


Epoch 33/100
Train Loss: 0.0618, Train Acc: 100.00%
Val Loss: 0.7290, Val Acc: 86.92%
验证损失改善 (0.7324 → 0.7290). 保存模型...
Starting epoch 34/100


Epoch 34 Training: 100%|██████████| 89/89 [01:45<00:00,  1.18s/it, loss=0.0586]
Validating: 100%|██████████| 23/23 [00:38<00:00,  1.68s/it, loss=1.1]  


Epoch 34/100
Train Loss: 0.0595, Train Acc: 100.00%
Val Loss: 0.7275, Val Acc: 86.92%
验证损失改善 (0.7290 → 0.7275). 保存模型...
Starting epoch 35/100


Epoch 35 Training: 100%|██████████| 89/89 [01:42<00:00,  1.16s/it, loss=0.0524]
Validating: 100%|██████████| 23/23 [00:39<00:00,  1.70s/it, loss=1.09] 


Epoch 35/100
Train Loss: 0.0574, Train Acc: 100.00%
Val Loss: 0.7229, Val Acc: 86.92%
验证损失改善 (0.7275 → 0.7229). 保存模型...
Starting epoch 36/100


Epoch 36 Training: 100%|██████████| 89/89 [01:46<00:00,  1.19s/it, loss=0.0495]
Validating: 100%|██████████| 23/23 [00:39<00:00,  1.70s/it, loss=1.11] 


Epoch 36/100
Train Loss: 0.0556, Train Acc: 100.00%
Val Loss: 0.7229, Val Acc: 86.78%
EarlyStopping counter: 1/5
Starting epoch 37/100


Epoch 37 Training: 100%|██████████| 89/89 [01:44<00:00,  1.18s/it, loss=0.0568]
Validating: 100%|██████████| 23/23 [00:39<00:00,  1.70s/it, loss=1.11] 


Epoch 37/100
Train Loss: 0.0540, Train Acc: 100.00%
Val Loss: 0.7202, Val Acc: 86.92%
验证损失改善 (0.7229 → 0.7202). 保存模型...
Starting epoch 38/100


Epoch 38 Training: 100%|██████████| 89/89 [01:46<00:00,  1.19s/it, loss=0.0485]
Validating: 100%|██████████| 23/23 [00:38<00:00,  1.68s/it, loss=1.1]  


Epoch 38/100
Train Loss: 0.0524, Train Acc: 100.00%
Val Loss: 0.7182, Val Acc: 86.92%
验证损失改善 (0.7202 → 0.7182). 保存模型...
Starting epoch 39/100


Epoch 39 Training: 100%|██████████| 89/89 [01:44<00:00,  1.17s/it, loss=0.0476]
Validating: 100%|██████████| 23/23 [00:38<00:00,  1.69s/it, loss=1.1]  


Epoch 39/100
Train Loss: 0.0511, Train Acc: 100.00%
Val Loss: 0.7158, Val Acc: 86.92%
验证损失改善 (0.7182 → 0.7158). 保存模型...
Starting epoch 40/100


Epoch 40 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.0458]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.1]  


Epoch 40/100
Train Loss: 0.0499, Train Acc: 100.00%
Val Loss: 0.7143, Val Acc: 86.92%
验证损失改善 (0.7158 → 0.7143). 保存模型...
Starting epoch 41/100


Epoch 41 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.0468]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.1]  


Epoch 41/100
Train Loss: 0.0488, Train Acc: 100.00%
Val Loss: 0.7129, Val Acc: 86.92%
验证损失改善 (0.7143 → 0.7129). 保存模型...
Starting epoch 42/100


Epoch 42 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.0471]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.1]  


Epoch 42/100
Train Loss: 0.0478, Train Acc: 100.00%
Val Loss: 0.7111, Val Acc: 86.78%
验证损失改善 (0.7129 → 0.7111). 保存模型...
Starting epoch 43/100


Epoch 43 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.045] 
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.1]  


Epoch 43/100
Train Loss: 0.0469, Train Acc: 100.00%
Val Loss: 0.7095, Val Acc: 86.64%
验证损失改善 (0.7111 → 0.7095). 保存模型...
Starting epoch 44/100


Epoch 44 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.0416]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.1]  


Epoch 44/100
Train Loss: 0.0460, Train Acc: 100.00%
Val Loss: 0.7089, Val Acc: 86.64%
验证损失改善 (0.7095 → 0.7089). 保存模型...
Starting epoch 45/100


Epoch 45 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.046] 
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.63s/it, loss=1.1]  


Epoch 45/100
Train Loss: 0.0452, Train Acc: 100.00%
Val Loss: 0.7073, Val Acc: 86.78%
验证损失改善 (0.7089 → 0.7073). 保存模型...
Starting epoch 46/100


Epoch 46 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.0395]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.1]  


Epoch 46/100
Train Loss: 0.0445, Train Acc: 100.00%
Val Loss: 0.7066, Val Acc: 86.64%
验证损失改善 (0.7073 → 0.7066). 保存模型...
Starting epoch 47/100


Epoch 47 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.0483]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.11] 


Epoch 47/100
Train Loss: 0.0438, Train Acc: 100.00%
Val Loss: 0.7052, Val Acc: 86.64%
验证损失改善 (0.7066 → 0.7052). 保存模型...
Starting epoch 48/100


Epoch 48 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.04]  
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.1]  


Epoch 48/100
Train Loss: 0.0432, Train Acc: 100.00%
Val Loss: 0.7045, Val Acc: 86.64%
验证损失改善 (0.7052 → 0.7045). 保存模型...
Starting epoch 49/100


Epoch 49 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.0421]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.63s/it, loss=1.1]  


Epoch 49/100
Train Loss: 0.0426, Train Acc: 100.00%
Val Loss: 0.7038, Val Acc: 86.78%
验证损失改善 (0.7045 → 0.7038). 保存模型...
Starting epoch 50/100


Epoch 50 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.038] 
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.1]  


Epoch 50/100
Train Loss: 0.0420, Train Acc: 100.00%
Val Loss: 0.7027, Val Acc: 86.64%
验证损失改善 (0.7038 → 0.7027). 保存模型...
Starting epoch 51/100


Epoch 51 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.0406]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.11] 


Epoch 51/100
Train Loss: 0.0414, Train Acc: 100.00%
Val Loss: 0.7017, Val Acc: 86.64%
验证损失改善 (0.7027 → 0.7017). 保存模型...
Starting epoch 52/100


Epoch 52 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.0406]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.1]  


Epoch 52/100
Train Loss: 0.0408, Train Acc: 100.00%
Val Loss: 0.7011, Val Acc: 86.64%
验证损失改善 (0.7017 → 0.7011). 保存模型...
Starting epoch 53/100


Epoch 53 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.0388]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.61s/it, loss=1.1]  


Epoch 53/100
Train Loss: 0.0402, Train Acc: 100.00%
Val Loss: 0.7000, Val Acc: 86.64%
验证损失改善 (0.7011 → 0.7000). 保存模型...
Starting epoch 54/100


Epoch 54 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.0511]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.1]  


Epoch 54/100
Train Loss: 0.0396, Train Acc: 100.00%
Val Loss: 0.6989, Val Acc: 86.64%
验证损失改善 (0.7000 → 0.6989). 保存模型...
Starting epoch 55/100


Epoch 55 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.0446]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.1]  


Epoch 55/100
Train Loss: 0.0390, Train Acc: 100.00%
Val Loss: 0.6970, Val Acc: 86.64%
验证损失改善 (0.6989 → 0.6970). 保存模型...
Starting epoch 56/100


Epoch 56 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.0385]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.11] 


Epoch 56/100
Train Loss: 0.0383, Train Acc: 100.00%
Val Loss: 0.6969, Val Acc: 86.64%
验证损失改善 (0.6970 → 0.6969). 保存模型...
Starting epoch 57/100


Epoch 57 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.0414]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.11] 


Epoch 57/100
Train Loss: 0.0376, Train Acc: 100.00%
Val Loss: 0.6954, Val Acc: 86.64%
验证损失改善 (0.6969 → 0.6954). 保存模型...
Starting epoch 58/100


Epoch 58 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.0329]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.1]  


Epoch 58/100
Train Loss: 0.0367, Train Acc: 100.00%
Val Loss: 0.6941, Val Acc: 86.78%
验证损失改善 (0.6954 → 0.6941). 保存模型...
Starting epoch 59/100


Epoch 59 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.0353]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.1]  


Epoch 59/100
Train Loss: 0.0359, Train Acc: 100.00%
Val Loss: 0.6923, Val Acc: 86.64%
验证损失改善 (0.6941 → 0.6923). 保存模型...
Starting epoch 60/100


Epoch 60 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.0326]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.11] 


Epoch 60/100
Train Loss: 0.0350, Train Acc: 100.00%
Val Loss: 0.6905, Val Acc: 86.78%
验证损失改善 (0.6923 → 0.6905). 保存模型...
Starting epoch 61/100


Epoch 61 Training: 100%|██████████| 89/89 [01:40<00:00,  1.13s/it, loss=0.0327]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.11] 


Epoch 61/100
Train Loss: 0.0340, Train Acc: 100.00%
Val Loss: 0.6885, Val Acc: 86.92%
验证损失改善 (0.6905 → 0.6885). 保存模型...
Starting epoch 62/100


Epoch 62 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.0287]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.11] 


Epoch 62/100
Train Loss: 0.0330, Train Acc: 100.00%
Val Loss: 0.6882, Val Acc: 86.92%
验证损失改善 (0.6885 → 0.6882). 保存模型...
Starting epoch 63/100


Epoch 63 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.0296]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.11] 


Epoch 63/100
Train Loss: 0.0319, Train Acc: 100.00%
Val Loss: 0.6852, Val Acc: 86.78%
验证损失改善 (0.6882 → 0.6852). 保存模型...
Starting epoch 64/100


Epoch 64 Training: 100%|██████████| 89/89 [01:38<00:00,  1.11s/it, loss=0.0328]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.11] 


Epoch 64/100
Train Loss: 0.0307, Train Acc: 100.00%
Val Loss: 0.6825, Val Acc: 86.92%
验证损失改善 (0.6852 → 0.6825). 保存模型...
Starting epoch 65/100


Epoch 65 Training: 100%|██████████| 89/89 [01:39<00:00,  1.12s/it, loss=0.0288]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.63s/it, loss=1.11] 


Epoch 65/100
Train Loss: 0.0295, Train Acc: 100.00%
Val Loss: 0.6818, Val Acc: 86.92%
验证损失改善 (0.6825 → 0.6818). 保存模型...
Starting epoch 66/100


Epoch 66 Training: 100%|██████████| 89/89 [01:40<00:00,  1.13s/it, loss=0.0283]
Validating: 100%|██████████| 23/23 [00:37<00:00,  1.62s/it, loss=1.11] 


Epoch 66/100
Train Loss: 0.0283, Train Acc: 100.00%
Val Loss: 0.6775, Val Acc: 86.64%
验证损失改善 (0.6818 → 0.6775). 保存模型...
Starting epoch 67/100


Epoch 67 Training:  25%|██▍       | 22/89 [00:42<02:09,  1.93s/it, loss=0.0304]


KeyboardInterrupt: 

---
# 6. 模型评估

## 6.1 模型加载

In [19]:
# 模型加载修改
model = ViTForImageClassification.from_pretrained(
    'google/vit-large-patch16-224-in21k',
    num_labels=100,
    ignore_mismatched_sizes=True
)
model.load_state_dict(torch.load('best_model.pth'))
model = model.to(device)

Some weights of the model checkpoint at google/vit-large-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-large-patch16-224-in21k and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## 6.2 推理预测

In [20]:
model=model.to(device)
model.eval()
predicts = []
idx = []

# 对测试集中的每张图像进行预测
with torch.no_grad():
    for img_name in test_images:
        img_path = os.path.join(test_folder, img_name)        
        image = Image.open(img_path).convert('RGB')  # 以RGB模式打开图像
        image = transform(image).unsqueeze(0).to(device)  # 应用预处理并增加batch维度
        outputs = model(image)
        logits=outputs.logits

        _, predicted = torch.max(logits, 1)
        predicts.append(predicted.item())
        idx.append(img_name.replace('.jpg', ''))  # 去掉文件扩展名作为ID


## 6.3 结果保存

In [21]:
# 保存预测结果到CSV文件
submission = pd.DataFrame({'id': idx, 'label': predicts})
submission['id']=submission['id'].astype(int)
submission=submission.sort_values(by='id')
submission

Unnamed: 0,id,label
0,0,65
1,1,39
237,2,87
348,3,94
459,4,90
...,...,...
137,1120,6
138,1121,54
139,1122,6
140,1123,65


In [22]:
submission.to_csv(subdir + student_id + 'submission_{}.csv'.format(
    datetime.datetime.now().strftime('%Y%m%d_%H%M%S')),
                index=False)

---
# 7. 陈述总结