## 深度学习报告
作业报告基于B榜最优分数编写，用中文完成（可附带英文版本），整合在Jupyter notebook文件，至少包括以下部分：
- 数据预处理
- 数据可视化
- 模型构建
- 模型训练
- 模型评估
- 陈述总结
- 参考文献，注意全部列出的参考文献需在文中引用。


---
# 1. 库


In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.models as models
from torchtoolbox.transform import Cutout
import os
from PIL import Image 
import pandas as pd
from PIL import Image 
import pandas
import datetime
from torch.utils.data import random_split

---
# 2. 数据导入与处理

## 2.1 数据预处理

In [3]:
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
    Cutout(), #遮挡增强
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

## 2.2 学号信息

In [4]:
#22211360121-李凯荣-22人工智能1班
student_id = '22211360121'
#生成路径
subdir = ''

## 2.3 训练数据集

In [5]:
# 加载数据集
full_dataset = torchvision.datasets.ImageFolder(root='new data/train', transform=transform)


In [6]:
# 训练数据集划分
train_size=int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

In [7]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4,pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4, pin_memory=True)

In [8]:
print(f"训练集大小: {len(train_dataset)}, 验证集大小: {len(val_dataset)}")

训练集大小: 2844, 验证集大小: 711


In [9]:
print(f"Number of batches in train_loader: {len(train_loader)}")

Number of batches in train_loader: 89


In [10]:
print(f"Dataset size: {len(train_loader.dataset)}")

Dataset size: 2844


## 2.4 测试数据集 A

In [11]:
# 加载测试集
test_folder = 'new data/testA'
test_images = [img for img in os.listdir(test_folder) if img.endswith('.jpg')]

## 2.5 其他

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


---
# 3. 数据可视化

---
# 4. 模型构建

## 4.1 模型

In [None]:
# Load model directly
from transformers import AutoImageProcessor, AutoModelForImageClassification

processor = AutoImageProcessor.from_pretrained("Hemg/Birds-Species-classification")
model = AutoModelForImageClassification.from_pretrained("Hemg/Birds-Species-classification")


In [14]:
all_layers = list(model.children())
for layer in all_layers[:-2]:  # 冻结除最后两层外的所有层
    for param in layer.parameters():
        param.requires_grad = False

In [15]:
# 创建模型
num_ftrs = model.classifier[2].in_features  # 获取 ConvNext 分类器的输入特征数
model.classifier[2] = nn.Linear(num_ftrs, 100)  # 替换为 100 个类别的输出层

## 4.2 损失函数与优化器

In [23]:
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(
    model.parameters(), 
    lr=5e-5,  # 更小的学习率
    weight_decay=0.05,  # 更强的权重衰减
    eps=1e-8  # 数值稳定性
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer, 
    T_max=50,  # 半周期长度
    eta_min=1e-6  # 最小学习率
)

---
# 5. 模型训练

## 5.1 设备

In [17]:
#使用GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

## 5. 训练

In [18]:
#早停机制
class EarlyStopping:
    def __init__(self, patience=5, verbose=True, delta=0, path='best_model.pth'):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = float('inf')
        self.delta = delta
        self.path = path
        
    def __call__(self, val_loss, model):
        score = -val_loss
        
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter}/{self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0
            
    def save_checkpoint(self, val_loss, model):
        if self.verbose:
            print(f'验证损失改善 ({self.val_loss_min:.4f} → {val_loss:.4f}). 保存模型...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss

# 初始化时指定保存路径
early_stopping = EarlyStopping(patience=5, verbose=True, path='best_bird_model.pth')

In [21]:
#轮数
num_epochs=60

In [19]:

# 训练模型
for epoch in range(num_epochs):  # 假设训练5个epoch
    print(f"Starting epoch {epoch+1}/{num_epochs}")
    model.train()
    train_loss=0.0
    train_correct=0
    train_total=0
    
    for i,(inputs, labels) in enumerate(train_loader):

        inputs = inputs.to(device)  # 将输入数据移动到 GPU
        labels = labels.to(device)  # 将标签数据移动到 GPU

        optimizer.zero_grad() #清零梯度
        outputs = model(inputs) #前向传播
        loss = criterion(outputs, labels) #损失计算
        loss.backward() #反向传播
        optimizer.step() #更新参数

        train_loss+=loss.item()
        _, predicted = torch.max(outputs, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()

    train_loss = train_loss / len(train_loader)
    train_acc = 100 * train_correct / train_total
    
    # 验证阶段
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
    
    val_loss = val_loss / len(val_loader)
    val_acc = 100 * val_correct / val_total
    
    print(f'Epoch {epoch+1}/{num_epochs} | '
            f'Train Loss: {train_loss:.4f}, Acc: {train_acc:.2f}% | '
            f'Val Loss: {val_loss:.4f}, Acc: {val_acc:.2f}%')
    
    # 早停机制检查
    early_stopping(val_loss, model)
    if early_stopping.early_stop:
        print("早停触发，停止训练")
        break



Starting epoch 1/60
Epoch 1/60 | Train Loss: 4.3375, Acc: 13.71% | Val Loss: 4.2471, Acc: 16.88%
验证损失改善 (inf → 4.2471). 保存模型...
Starting epoch 2/60
Epoch 2/60 | Train Loss: 4.1514, Acc: 23.24% | Val Loss: 4.0897, Acc: 22.08%
验证损失改善 (4.2471 → 4.0897). 保存模型...
Starting epoch 3/60
Epoch 3/60 | Train Loss: 3.9791, Acc: 28.38% | Val Loss: 3.9142, Acc: 25.88%
验证损失改善 (4.0897 → 3.9142). 保存模型...
Starting epoch 4/60
Epoch 4/60 | Train Loss: 3.8118, Acc: 33.68% | Val Loss: 3.7600, Acc: 27.57%
验证损失改善 (3.9142 → 3.7600). 保存模型...
Starting epoch 5/60
Epoch 5/60 | Train Loss: 3.6527, Acc: 36.78% | Val Loss: 3.6137, Acc: 29.82%
验证损失改善 (3.7600 → 3.6137). 保存模型...
Starting epoch 6/60
Epoch 6/60 | Train Loss: 3.5053, Acc: 39.49% | Val Loss: 3.4866, Acc: 32.07%
验证损失改善 (3.6137 → 3.4866). 保存模型...
Starting epoch 7/60
Epoch 7/60 | Train Loss: 3.3648, Acc: 42.90% | Val Loss: 3.3515, Acc: 33.19%
验证损失改善 (3.4866 → 3.3515). 保存模型...
Starting epoch 8/60
Epoch 8/60 | Train Loss: 3.2264, Acc: 43.46% | Val Loss: 3.2086, A

In [19]:
if device.type == 'cuda':
    print(f"GPU内存总量: {torch.cuda.get_device_properties(device).total_memory/1e9:.2f} GB")
    print(f"当前已用GPU内存: {torch.cuda.memory_allocated(device)/1e9:.2f} GB")
    print(f"GPU内存峰值: {torch.cuda.max_memory_allocated(device)/1e9:.2f} GB")
    
    # 清理缓存
    torch.cuda.empty_cache()
    print("已清理GPU缓存")
    
    # 再次检查
    print(f"清理后已用GPU内存: {torch.cuda.memory_allocated(device)/1e9:.2f} GB")

GPU内存总量: 8.59 GB
当前已用GPU内存: 0.35 GB
GPU内存峰值: 0.35 GB
已清理GPU缓存
清理后已用GPU内存: 0.35 GB


In [24]:
# ==============================
# 优化的继续训练代码段（包含进度条）
# ==============================

import time
from tqdm import tqdm

# 0. 进度条配置
progress_bar = True  # 可以设置为False来禁用进度条
refresh_rate = 10   # 进度条刷新频率（秒）

# 1. 确保设备正确
print(f"\n当前使用设备: {device}")
print(f"模型位置: {next(model.parameters()).device}")

# 2. 加载之前保存的最佳模型
print("\n加载之前训练的最佳模型...")
model.load_state_dict(torch.load('best_bird_model.pth', map_location=device))
model = model.to(device)  # 确保模型在GPU上
print(f"模型已加载到 {device}")

# 3. 更新数据加载器设置
train_loader = DataLoader(
    train_dataset, 
    batch_size=32, 
    shuffle=True,
    num_workers=4,  # 使用多线程加载
    pin_memory=True if device.type == 'cuda' else False
)

val_loader = DataLoader(
    val_dataset, 
    batch_size=32, 
    shuffle=False,
    num_workers=4,  # 使用多线程加载
    pin_memory=True if device.type == 'cuda' else False
)

# 4. 询问用户继续训练参数
print("\n" + "="*50)
print("继续训练选项:")
print(f"当前模型训练轮次: {num_epochs}")
print(f"当前验证损失: {early_stopping.val_loss_min:.4f}")

# 获取继续训练参数
additional_epochs = int(input("请输入要增加的训练轮次数: "))
new_lr = input("请输入新的学习率 (留空则保持当前学习率): ")

# 更新学习率
if new_lr:
    new_lr = float(new_lr)
    for param_group in optimizer.param_groups:
        param_group['lr'] = new_lr
    print(f"学习率已设置为: {new_lr}")

# 5. 重置早停机制
print("重置早停计数器")
early_stopping.counter = 0
early_stopping.best_score = None
early_stopping.early_stop = False
early_stopping.val_loss_min = float('inf')

# 6. 新的训练循环（包含进度条）
print(f"\n开始继续训练 {additional_epochs} 轮...")
new_total_epochs = num_epochs + additional_epochs

for epoch in range(num_epochs, new_total_epochs):
    epoch_start_time = time.time()
    print(f"\n继续训练: 轮次 {epoch+1}/{new_total_epochs}")
    
    # 训练阶段
    model.train()
    train_loss = 0.0
    train_correct = 0
    train_total = 0
    
    # 创建训练进度条
    if progress_bar:
        train_bar = tqdm(
            enumerate(train_loader),
            total=len(train_loader),
            desc="训练批次",
            mininterval=refresh_rate,
            dynamic_ncols=True
        )
    else:
        train_bar = enumerate(train_loader)
    
    for i, (inputs, labels) in train_bar:
        # 确保数据在GPU上
        inputs = inputs.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)
        
        optimizer.zero_grad()
        
        # 前向传播
        outputs = model(inputs)
        
        # 处理ViT输出格式
        logits = outputs.logits if hasattr(outputs, 'logits') else outputs
        
        # 计算损失
        loss = criterion(logits, labels)
        
        # 反向传播
        loss.backward()
        
        # 参数更新
        optimizer.step()
        
        # 统计信息
        train_loss += loss.item()
        _, predicted = torch.max(logits, 1)
        batch_correct = (predicted == labels).sum().item()
        train_correct += batch_correct
        train_total += labels.size(0)
        
        # 更新进度条
        if progress_bar:
            train_bar.set_postfix({
                'loss': f'{loss.item():.4f}',
                'acc': f'{100 * batch_correct / labels.size(0):.2f}%'
            })
    
    train_loss = train_loss / len(train_loader)
    train_acc = 100 * train_correct / train_total
    
    # 验证阶段
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    
    # 创建验证进度条
    if progress_bar:
        val_bar = tqdm(
            enumerate(val_loader),
            total=len(val_loader),
            desc="验证批次",
            mininterval=refresh_rate,
            dynamic_ncols=True
        )
    else:
        val_bar = enumerate(val_loader)
    
    # 禁用梯度计算以加速验证
    with torch.no_grad():
        for i, (inputs, labels) in val_bar:
            inputs = inputs.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)
            
            outputs = model(inputs)
            logits = outputs.logits if hasattr(outputs, 'logits') else outputs
            loss = criterion(logits, labels)
            
            val_loss += loss.item()
            _, predicted = torch.max(logits, 1)
            batch_correct = (predicted == labels).sum().item()
            val_correct += batch_correct
            val_total += labels.size(0)
            
            # 更新进度条
            if progress_bar:
                val_bar.set_postfix({
                    'loss': f'{loss.item():.4f}',
                    'acc': f'{100 * batch_correct / labels.size(0):.2f}%'
                })
    
    val_loss = val_loss / len(val_loader)
    val_acc = 100 * val_correct / val_total
    
    # 计算epoch耗时
    epoch_time = time.time() - epoch_start_time
    
    # 打印当前轮次结果
    print("\n训练结果:")
    print(f"训练损失: {train_loss:.4f}, 训练准确率: {train_acc:.2f}%")
    print(f"验证损失: {val_loss:.4f}, 验证准确率: {val_acc:.2f}%")
    print(f"本轮耗时: {epoch_time:.2f}秒")
    
    # 更新学习率调度器
    if scheduler is not None:
        scheduler.step()
        current_lr = optimizer.param_groups[0]['lr']
        print(f"当前学习率: {current_lr:.8f}")
    
    # 早停机制检查
    early_stopping(val_loss, model)
    if early_stopping.early_stop:
        print("早停触发，停止训练")
        break

# 7. 保存最终模型
final_model_path = 'final_bird_model.pth'
torch.save(model.state_dict(), final_model_path)
print(f"\n训练完成! 最终模型已保存至: {final_model_path}")

# 8. 清理GPU缓存
if device.type == 'cuda':
    torch.cuda.empty_cache()
    print("已清理GPU缓存")

# 9. 更新总轮次数
num_epochs = new_total_epochs
print(f"总训练轮次: {num_epochs}")
print(f"最终验证损失: {val_loss:.4f}, 验证准确率: {val_acc:.2f}%")


当前使用设备: cuda
模型位置: cuda:0

加载之前训练的最佳模型...
模型已加载到 cuda

继续训练选项:
当前模型训练轮次: 60
当前验证损失: inf
学习率已设置为: 1.4029
重置早停计数器

开始继续训练 60 轮...

继续训练: 轮次 61/120


训练批次: 100%|██████████| 89/89 [00:19<00:00,  4.51it/s, loss=418.6357, acc=10.71%]
验证批次: 100%|██████████| 23/23 [00:05<00:00,  3.95it/s, loss=392.6264, acc=0.00%]



训练结果:
训练损失: 568.5861, 训练准确率: 9.95%
验证损失: 420.1157, 验证准确率: 8.02%
本轮耗时: 44.57秒
当前学习率: 1.40151585
验证损失改善 (inf → 420.1157). 保存模型...

继续训练: 轮次 62/120


训练批次: 100%|██████████| 89/89 [00:19<00:00,  4.54it/s, loss=347.2461, acc=0.00%] 
验证批次: 100%|██████████| 23/23 [00:05<00:00,  3.88it/s, loss=458.1166, acc=0.00%]



训练结果:
训练损失: 341.3055, 训练准确率: 8.37%
验证损失: 326.4205, 验证准确率: 12.94%
本轮耗时: 45.23秒
当前学习率: 1.39736886
验证损失改善 (420.1157 → 326.4205). 保存模型...

继续训练: 轮次 63/120


训练批次:  28%|██▊       | 25/89 [00:06<00:15,  4.16it/s, loss=478.2703, acc=0.00%]


KeyboardInterrupt: 

---
# 6. 模型评估

## 6.1 模型加载

In [21]:
# 加载模型
model = models.convnext_base(pretrained=False)  
model.classifier[2] = nn.Linear(num_ftrs, 100)
model.load_state_dict(torch.load('best_bird_model.pth'))  # 加载之前训练好的模型参数



<All keys matched successfully>

## 6.2 推理预测

In [22]:
model=model.to(device)
model.eval()
predicts = []
idx = []

# 对测试集中的每张图像进行预测
with torch.no_grad():
    for img_name in test_images:
        img_path = os.path.join(test_folder, img_name)        
        image = Image.open(img_path).convert('RGB')  # 以RGB模式打开图像
        image = transform(image).unsqueeze(0)  # 应用预处理并增加batch维度
        image=image.to(device)
        outputs = model(image)
        _, predicted = torch.max(outputs, 1)
        predicts.append(predicted.item())
        idx.append(img_name.replace('.jpg', ''))  # 去掉文件扩展名作为ID


## 6.3 结果保存

In [23]:
# 保存预测结果到CSV文件
submission = pd.DataFrame({'id': idx, 'label': predicts})
submission['id']=submission['id'].astype(int)
submission=submission.sort_values(by='id')
submission

Unnamed: 0,id,label
0,0,66
1,1,39
237,2,22
348,3,63
459,4,52
...,...,...
137,1120,47
138,1121,54
139,1122,86
140,1123,17


In [24]:
submission.to_csv(subdir + student_id + 'submission_{}.csv'.format(
    datetime.datetime.now().strftime('%Y%m%d_%H%M%S')),
                index=False)

---
# 7. 陈述总结