In [61]:
import os
import random
from typing import List, Tuple
from pathlib import Path
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from torch.nn.functional import cross_entropy
from torchvision.datasets import ImageFolder
from sklearn.model_selection import train_test_split
from open_clip import create_model_from_pretrained
from tqdm.notebook import tqdm
from torchvision import transforms
import timm
# ========== 1. 设备 ==========
device = "cuda" if torch.cuda.is_available() else "cpu"
print("device:", device)

# ========== 2. 数据路径 ==========
# ❗❗❗ 把这个改成你“四个类别”所在的文件夹 ❗❗❗
root_dir = r"D:/OneDriveFiles/OneDrive/人工智能基础期末/dataset2/"

# 目录结构要求：
# root_dir/
#   classA/
#   classB/
#   classC/
#   classD/

# ========== 3. 训练超参数 ==========
batch_size   = 64
num_workers  = 0
num_epochs   = 30
lr           = 1e-3      # 只训练线性头，可以稍微大一点
weight_decay = 1e-2

# ========== 4. 随机种子（保证每次划分一致） ==========
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if device == "cuda":
    torch.cuda.manual_seed_all(seed)


device: cuda


In [54]:
# 1. 从 HuggingFace 下载并加载 BiomedCLIP 模型和预处理
model_id = "hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224"

clip_model, preprocess = create_model_from_pretrained(model_id)

clip_model.to(device)
clip_model.eval()

# 2. 统一用一个 preprocess，当成 train / val 都可以
preprocess_train = preprocess
preprocess_val = preprocess

# 3. 用 encode_image 跑一个假输入，自动推特征维度
with torch.no_grad():
    dummy = torch.zeros(1, 3, 224, 224).to(device)
    dummy_feat = clip_model.encode_image(dummy)   # [1, D]
embed_dim = dummy_feat.shape[-1]
print("embed_dim:", embed_dim)

embed_dim: 512


In [55]:
train_tfm = transforms.Compose([
    transforms.Resize(224),
    # 轻微随机裁剪 + 缩放，让构图有一点变化
    transforms.RandomResizedCrop(224, scale=(0.9, 1.0)),
    # 轻微旋转，胸片一般 ±5 度问题不大
    transforms.RandomRotation(degrees=5),
    # 如果你不放心左右翻转，就先关掉；要开的话可以：
    # T.RandomHorizontalFlip(p=0.5),

    # 亮度/对比度轻微扰动，增强对曝光/机器差异的鲁棒性
    transforms.ColorJitter(brightness=0.1, contrast=0.1),

    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5]),
])

val_tfm = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5]),
])

# 数据增强

train_set = ImageFolder(str("D:/OneDriveFiles/OneDrive/人工智能基础期末/data_split/train"), transform=train_tfm)
val_set   = ImageFolder(str("D:/OneDriveFiles/OneDrive/人工智能基础期末/data_split/val"),   transform=val_tfm)

train_loader = DataLoader(train_set, batch_size=64, shuffle=True,  num_workers=8)
val_loader   = DataLoader(val_set,   batch_size=64, shuffle=False, num_workers=8)

print("类别映射：", train_set.class_to_idx)
print("训练集大小：", len(train_set))
print("验证集大小：", len(val_set))

类别映射： {'class 0': 0, 'class 1': 1, 'class 2': 2, 'class 3': 3}
训练集大小： 5841
验证集大小： 1462


In [56]:
class BiomedCLIPClassifier(nn.Module):
    def __init__(self, clip_model, embed_dim, num_classes):
        super().__init__()
        self.clip = clip_model
        self.head = nn.Linear(embed_dim, num_classes)

    def forward(self, x):
        feats = self.clip.encode_image(x)          # [B, embed_dim]
        feats = feats / (feats.norm(dim=-1, keepdim=True) + 1e-6)
        logits = self.head(feats)
        return logits


num_classes = 4   # ❗ 改成你自己的类别数
model = BiomedCLIPClassifier(clip_model, embed_dim, num_classes).to(device)
# 先全冻
for p in model.parameters():
    p.requires_grad = False

# 拿到 blocks 列表（12 层）
blocks = model.clip.visual.trunk.blocks
print("blocks 数量:", len(blocks))  # 应该是 12

for i, block in enumerate(blocks):
    for p in block.parameters():
        if i < 9:
            p.requires_grad = False
        else:
            p.requires_grad = True

# 头也要解冻
for p in model.head.parameters():
    p.requires_grad = True

# 看一下现在可训练参数量
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print("总参数量:", f"{total_params:,}")
print("可训练参数量（后几层 + head）:", f"{trainable_params:,}")


blocks 数量: 12
总参数量: 195,904,773
可训练参数量（后几层 + head）: 21,265,668


In [57]:
lr           = 1e-5      # 微调 backbone，lr 小一点
weight_decay = 1e-2

optimizer = AdamW(
    [p for p in model.parameters() if p.requires_grad],
    lr=lr,
    weight_decay=weight_decay
)

print("优化器中参数个数:", sum(p.numel() for p in optimizer.param_groups[0]["params"]))


优化器中参数个数: 21265668


In [58]:
def train_one_epoch(epoch: int):
    model.train()
    total_loss = 0.0
    total_correct = 0
    total = 0

    # ✅ 用 tqdm 包 train_loader
    for imgs, labels in tqdm(train_loader, desc=f"Train {epoch}", leave=False):
        imgs = imgs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        logits = model(imgs)
        loss = cross_entropy(logits, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * imgs.size(0)
        preds = logits.argmax(dim=1)
        total_correct += (preds == labels).sum().item()
        total += imgs.size(0)

    print(f"[Epoch {epoch}] Train loss: {total_loss / total:.4f} | "
          f"acc: {total_correct / total:.4f}")


@torch.no_grad()
def eval_one_epoch(epoch: int):
    model.eval()
    total_loss = 0.0
    total_correct = 0
    total = 0

    # ✅ 用 tqdm 包 val_loader
    for imgs, labels in tqdm(val_loader, desc=f"Val   {epoch}", leave=False):
        imgs = imgs.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        logits = model(imgs)
        loss = cross_entropy(logits, labels)

        total_loss += loss.item() * imgs.size(0)
        preds = logits.argmax(dim=1)
        total_correct += (preds == labels).sum().item()
        total += imgs.size(0)

    print(f"[Epoch {epoch}] Val   loss: {total_loss / total:.4f} | "
          f"acc: {total_correct / total:.4f}")


In [45]:
print("torch.cuda.is_available():", torch.cuda.is_available())
print("device 变量:", device)

if torch.cuda.is_available():
    print("GPU 数量:", torch.cuda.device_count())
    print("当前默认 GPU:", torch.cuda.current_device())

torch.cuda.is_available(): True
device 变量: cuda
GPU 数量: 1
当前默认 GPU: 0


In [46]:
print("len(train_loader) =", len(train_loader))
print("len(val_loader)   =", len(val_loader))


len(train_loader) = 92
len(val_loader)   = 23


In [59]:
for epoch in range(1, num_epochs + 1):
    train_one_epoch(epoch)
    eval_one_epoch(epoch)


Train 1:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 1] Train loss: 1.3202 | acc: 0.5330


Val   1:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 1] Val   loss: 1.2421 | acc: 0.7866


Train 2:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 2] Train loss: 1.2073 | acc: 0.8175


Val   2:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 2] Val   loss: 1.1919 | acc: 0.8393


Train 3:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 3] Train loss: 1.1746 | acc: 0.8543


Val   3:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 3] Val   loss: 1.1671 | acc: 0.8516


Train 4:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 4] Train loss: 1.1535 | acc: 0.8610


Val   4:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 4] Val   loss: 1.1518 | acc: 0.8447


Train 5:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 5] Train loss: 1.1359 | acc: 0.8636


Val   5:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 5] Val   loss: 1.1331 | acc: 0.8543


Train 6:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 6] Train loss: 1.1174 | acc: 0.8735


Val   6:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 6] Val   loss: 1.1142 | acc: 0.8625


Train 7:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 7] Train loss: 1.1011 | acc: 0.8771


Val   7:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 7] Val   loss: 1.0979 | acc: 0.8618


Train 8:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 8] Train loss: 1.0835 | acc: 0.8815


Val   8:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 8] Val   loss: 1.0820 | acc: 0.8632


Train 9:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 9] Train loss: 1.0656 | acc: 0.8812


Val   9:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 9] Val   loss: 1.0657 | acc: 0.8611


Train 10:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 10] Train loss: 1.0456 | acc: 0.8863


Val   10:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 10] Val   loss: 1.0475 | acc: 0.8591


Train 11:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 11] Train loss: 1.0233 | acc: 0.8963


Val   11:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 11] Val   loss: 1.0263 | acc: 0.8735


Train 12:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 12] Train loss: 1.0031 | acc: 0.9000


Val   12:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 12] Val   loss: 1.0083 | acc: 0.8687


Train 13:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 13] Train loss: 0.9846 | acc: 0.8993


Val   13:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 13] Val   loss: 0.9920 | acc: 0.8680


Train 14:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 14] Train loss: 0.9665 | acc: 0.9004


Val   14:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 14] Val   loss: 0.9756 | acc: 0.8646


Train 15:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 15] Train loss: 0.9482 | acc: 0.9074


Val   15:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 15] Val   loss: 0.9584 | acc: 0.8666


Train 16:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 16] Train loss: 0.9310 | acc: 0.9110


Val   16:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 16] Val   loss: 0.9467 | acc: 0.8577


Train 17:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 17] Train loss: 0.9128 | acc: 0.9188


Val   17:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 17] Val   loss: 0.9306 | acc: 0.8639


Train 18:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 18] Train loss: 0.8959 | acc: 0.9233


Val   18:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 18] Val   loss: 0.9230 | acc: 0.8618


Train 19:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 19] Train loss: 0.8831 | acc: 0.9209


Val   19:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 19] Val   loss: 0.9059 | acc: 0.8714


Train 20:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 20] Train loss: 0.8707 | acc: 0.9154


Val   20:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 20] Val   loss: 0.8890 | acc: 0.8735


Train 21:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 21] Train loss: 0.8515 | acc: 0.9278


Val   21:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 21] Val   loss: 0.8809 | acc: 0.8707


Train 22:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 22] Train loss: 0.8375 | acc: 0.9300


Val   22:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 22] Val   loss: 0.8755 | acc: 0.8570


Train 23:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 23] Train loss: 0.8211 | acc: 0.9367


Val   23:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 23] Val   loss: 0.8575 | acc: 0.8700


Train 24:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 24] Train loss: 0.8076 | acc: 0.9377


Val   24:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 24] Val   loss: 0.8418 | acc: 0.8748


Train 25:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 25] Train loss: 0.7943 | acc: 0.9392


Val   25:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 25] Val   loss: 0.8301 | acc: 0.8735


Train 26:   0%|          | 0/92 [00:00<?, ?it/s]

[Epoch 26] Train loss: 0.7794 | acc: 0.9437


Val   26:   0%|          | 0/23 [00:00<?, ?it/s]

[Epoch 26] Val   loss: 0.8217 | acc: 0.8810


Train 27:   0%|          | 0/92 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [51]:
## 分析
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import torch

model.eval()
all_labels, all_preds = [], []

with torch.no_grad():
    for imgs, labels in val_loader:
        imgs = imgs.to(device)
        labels = labels.to(device)
        logits = model(imgs)
        preds = logits.argmax(dim=1)
        all_labels.append(labels.cpu().numpy())
        all_preds.append(preds.cpu().numpy())

all_labels = np.concatenate(all_labels)
all_preds  = np.concatenate(all_preds)

print(classification_report(all_labels, all_preds, target_names=classes))
print(confusion_matrix(all_labels, all_preds))


NameError: name 'classes' is not defined