In [22]:
import os
import random
from typing import List, Tuple
from pathlib import Path
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from torch.nn.functional import cross_entropy
from torchvision.datasets import ImageFolder
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm
from torchvision import transforms
from transformers import AutoProcessor, AutoModel
import timm
import shutil

# ========== 1. 设备 ==========
device = "cuda" if torch.cuda.is_available() else "cpu"
print("device:", device)

# ========== 2. 数据路径 ==========
# ❗❗❗ 把这个改成你“四个类别”所在的文件夹 ❗❗❗
root_dir = r"D:/OneDriveFiles/OneDrive/人工智能基础期末/dataset2/"

# 目录结构要求：
# root_dir/
#   classA/
#   classB/
#   classC/
#   classD/

# ========== 3. 训练超参数 ==========
batch_size   = 32
num_workers  = 0
num_epochs   = 30
lr           = 1e-3      # 只训练线性头，可以稍微大一点
weight_decay = 1e-2

# ========== 4. 随机种子（保证每次划分一致） ==========
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if device == "cuda":
    torch.cuda.manual_seed_all(seed)

local_model_dir = r"/root/人工智能基础期末作业/medsig_model"  # TODO: 改成你的路径

train_dir = r"/root/人工智能基础期末作业/data_split/train"
val_dir = r"/root/人工智能基础期末作业/data_split/val"
image_size = 448

device: cuda


In [23]:
RAW_DIR = Path("/root/人工智能基础期末作业/dataset2/")
OUT_DIR = Path("/root/人工智能基础期末作业/data_split")    # 拆分后的 train/val 会放在这里

classes = os.listdir(RAW_DIR)
classes

['class 0', 'class 1', 'class 2', 'class 3', '.DS_Store']

In [24]:
classes = [d for d in os.listdir(RAW_DIR) if (RAW_DIR / d).is_dir()]
print("发现的类别：", classes)

for phase in ["train", "val"]:
    for cls in classes:
        (OUT_DIR / phase / cls).mkdir(parents=True, exist_ok=True)


发现的类别： ['class 0', 'class 1', 'class 2', 'class 3']


In [25]:
# train_ratio = 0.8   # 训练集占 80%
# val_ratio   = 0.2   # 验证集占 20%，train_ratio + val_ratio 应该 = 1
# device = "cuda" if torch.cuda.is_available() else "cpu"
# print("使用设备:", device)


# for cls in classes:
#     src_dir = RAW_DIR / cls
#     files = [f for f in os.listdir(src_dir)
#              if f.lower().endswith((".png", ".jpg", ".jpeg"))]

#     random.shuffle(files)
#     n = len(files)
#     n_train = int(n * train_ratio)
#     # val 集就是剩下的
#     train_files = files[:n_train]
#     val_files   = files[n_train:]

#     print(f"{cls}: 总数 {n}, 训练 {len(train_files)}, 验证 {len(val_files)}")

#     # 复制到目标文件夹（想省空间可以用 shutil.move）
#     for fname in train_files:
#         shutil.copy(src_dir / fname, OUT_DIR / "train" / cls / fname)
#     for fname in val_files:
#         shutil.copy(src_dir / fname, OUT_DIR / "val" / cls / fname)

# print("划分完成，已保存到", OUT_DIR)


In [26]:
# %% 从本地加载 MedSigLIP，多模态模型中抽出视觉塔
print("Loading base model from local dir:", local_model_dir)

raw_model = AutoModel.from_pretrained(
    local_model_dir,
    local_files_only=True,
)
print("raw_model class:", type(raw_model))

# ---- 关键：只抽出“视觉 encoder” ----
if hasattr(raw_model, "vision_model"):
    # 大多数 CLIP/SigLIP 多模态模型的视觉塔都叫 vision_model
    img_encoder = raw_model.vision_model
    print("Use raw_model.vision_model as image encoder.")
elif hasattr(raw_model, "get_image_features"):
    # 有些实现是直接在 model 上提供 get_image_features
    img_encoder = raw_model
    print("Use raw_model itself as image encoder (get_image_features).")
else:
    raise RuntimeError(
        "在 raw_model 里找不到 vision_model 或 get_image_features，"
        "请 print(raw_model) 看看结构，然后再定位视觉塔。"
    )

img_encoder.to(device)
img_encoder.eval()

# ---- 用 dummy 探测 embedding 维度 ----
with torch.no_grad():
    dummy = torch.zeros(1, 3, image_size, image_size).to(device)   # [1,3,448,448]

    try:
        out = img_encoder(pixel_values=dummy)   # 优先用 keyword
    except TypeError:
        out = img_encoder(dummy)               # 有的模型只收 positional

    if hasattr(out, "image_embeds"):
        feats = out.image_embeds                    # [1, D]
    elif hasattr(out, "pooler_output"):
        feats = out.pooler_output                   # [1, D]
    elif hasattr(out, "last_hidden_state"):
        feats = out.last_hidden_state.mean(dim=1)   # [1, D]
    elif isinstance(out, torch.Tensor):
        feats = out
    else:
        print("Unknown output type:", type(out))
        print(out)
        raise RuntimeError(
            "无法从 img_encoder 的输出中找到特征，请 print(out) 再调整逻辑。"
        )

embed_dim = feats.shape[-1]
print("image embed dim:", embed_dim)


# ---- 封装分类模型：视觉塔 + 线性 head ----
class MedSigVisionClassifier(nn.Module):
    def __init__(self, img_encoder, embed_dim, num_classes):
        super().__init__()
        self.encoder = img_encoder
        self.head = nn.Linear(embed_dim, num_classes)

    def forward(self, pixel_values):
        # pixel_values: [B,3,448,448]
        try:
            out = self.encoder(pixel_values=pixel_values)
        except TypeError:
            out = self.encoder(pixel_values)

        if hasattr(out, "image_embeds"):
            feats = out.image_embeds
        elif hasattr(out, "pooler_output"):
            feats = out.pooler_output
        elif hasattr(out, "last_hidden_state"):
            feats = out.last_hidden_state.mean(dim=1)
        elif isinstance(out, torch.Tensor):
            feats = out
        else:
            raise RuntimeError("encoder 输出里找不到特征，需根据实际结构单独处理。")

        # L2 归一化（保持和 CLIP 系一致的风格）
        feats = feats / (feats.norm(dim=-1, keepdim=True) + 1e-6)
        logits = self.head(feats)    # [B,num_classes]
        return logits


# ==== 构建模型 + DataParallel ====
model = MedSigVisionClassifier(img_encoder, embed_dim, 4)

if torch.cuda.device_count() > 1:
    print("使用", torch.cuda.device_count(), "张 GPU 进行 DataParallel")
    model = nn.DataParallel(model)   # 在多卡上自动切 batch

model = model.to(device)

# 关键：取出真正的模型（DataParallel 包了一层壳）
core = model.module if isinstance(model, nn.DataParallel) else model

# 先冻结视觉塔，只训 head 当 baseline
for p in core.encoder.parameters():
    p.requires_grad = False
for p in core.head.parameters():
    p.requires_grad = True

# 统计参数量用 core（真正的模型）
total_params = sum(p.numel() for p in core.parameters())
trainable_params = sum(p.numel() for p in core.parameters() if p.requires_grad)
print(f"总参数量: {total_params:,}")
print(f"当前可训练参数量(仅 head): {trainable_params:,}")


Loading base model from local dir: /root/人工智能基础期末作业/medsig_model
raw_model class: <class 'transformers.models.siglip.modeling_siglip.SiglipModel'>
Use raw_model.vision_model as image encoder.
image embed dim: 1152
使用 2 张 GPU 进行 DataParallel
总参数量: 428,570,052
当前可训练参数量(仅 head): 4,612


In [27]:
# %% DataLoader：胸片增强版（更稳、更贴近 X-ray）
import torch
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

# ---------- 1) 尽量用 MedSig 的 processor 里的 mean/std（更匹配预训练） ----------
try:
    processor = AutoProcessor.from_pretrained(local_model_dir, local_files_only=True)
    img_proc = getattr(processor, "image_processor", processor)  # 兼容不同 processor 结构
    mean = list(getattr(img_proc, "image_mean", [0.5, 0.5, 0.5]))
    std  = list(getattr(img_proc, "image_std",  [0.5, 0.5, 0.5]))
    print("Use processor mean/std:", mean, std)
except Exception as e:
    mean = [0.5, 0.5, 0.5]
    std  = [0.5, 0.5, 0.5]
    print("WARN: cannot load processor mean/std, fallback to 0.5. err =", e)

# ---------- 2) 胸片增强：轻几何 + 轻强度 + 少量噪声/模糊 ----------
# ⚠️ 胸片很多任务不建议默认左右翻转（会改变左右肺解剖信息）
use_hflip = False  # 若你确定标签与左右无关（例如“是否肺炎”），可改 True

resize_pad = int(image_size * 1.10)  # 先略放大再裁回，避免硬拉伸

train_tfms_list = [
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize(resize_pad, interpolation=transforms.InterpolationMode.BILINEAR),
]

if use_hflip:
    train_tfms_list.append(transforms.RandomHorizontalFlip(p=0.5))

train_tfms_list += [
    transforms.RandomAffine(
        degrees=5,                 # 小角度旋转
        translate=(0.03, 0.03),    # 小平移
        scale=(0.95, 1.05),        # 小缩放
        interpolation=transforms.InterpolationMode.BILINEAR,
        fill=0
    ),
    transforms.CenterCrop(image_size),  # 用 CenterCrop 比 RandomResizedCrop 更不容易裁掉病灶
    transforms.RandomApply(
        [transforms.ColorJitter(brightness=0.10, contrast=0.10)],
        p=0.8
    ),
    transforms.RandomAutocontrast(p=0.2),  # 轻微增强对比度（对胸片常有用）
    transforms.RandomApply(
        [transforms.GaussianBlur(kernel_size=5, sigma=(0.1, 1.0))],
        p=0.10
    ),
    transforms.ToTensor(),
    transforms.RandomApply(
        [transforms.Lambda(lambda x: (x + torch.randn_like(x) * 0.02).clamp(0.0, 1.0))],
        p=0.15
    ),
    transforms.Normalize(mean=mean, std=std),
]

train_transform = transforms.Compose(train_tfms_list)

# val/test：不做随机增强，只做统一预处理
val_transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize(resize_pad, interpolation=transforms.InterpolationMode.BILINEAR),
    transforms.CenterCrop(image_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
])

# ---------- 3) Dataset / DataLoader ----------
train_dataset = ImageFolder(train_dir, transform=train_transform)
val_dataset   = ImageFolder(val_dir,   transform=val_transform)

print("classes:", train_dataset.classes)
print("train size:", len(train_dataset), "val size:", len(val_dataset))

train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers,
    pin_memory=True,
)

val_loader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True,
)


WARN: cannot load processor mean/std, fallback to 0.5. err = 
SiglipTokenizer requires the SentencePiece library but it was not found in your environment. Check out the instructions on the
installation page of its repo: https://github.com/google/sentencepiece#installation and follow the ones
that match your environment. Please note that you may need to restart your runtime after installation.

classes: ['class 0', 'class 1', 'class 2', 'class 3']
train size: 5841 val size: 1462


In [28]:
# %% 分阶段解冻 + 分层学习率 + warmup cosine（MedSigLIP 推荐）
import math
import torch
import torch.nn as nn
from torch.optim import AdamW
from tqdm.notebook import tqdm
from transformers import get_cosine_schedule_with_warmup

# 处理 DataParallel 外壳
core = model.module if isinstance(model, nn.DataParallel) else model

# ====== 训练配置（你可以先按这个跑）======
stage1_epochs = 2                 # 只训 head
stage2_epochs = 8                 # 解冻最后 1/3 blocks
label_smoothing = 0.10
head_lr = 1e-3
backbone_lr = 2e-5               # 建议 1e-5 ~ 3e-5
grad_clip = 1.0
use_amp = (device == "cuda")

assert num_epochs >= stage1_epochs + stage2_epochs, "num_epochs 太小，至少要 >= stage1+stage2"

# ====== 找到 ViT blocks（尽量兼容不同结构）======
def _get_by_path(obj, path: str):
    cur = obj
    for name in path.split("."):
        if not hasattr(cur, name):
            return None
        cur = getattr(cur, name)
    return cur

def get_blocks(encoder):
    candidates = [
        "encoder.layers",                 # e.g., CLIPVisionModel.encoder.layers
        "encoder.encoder.layers",         # 你 notebook 里原来用的
        "vision_model.encoder.layers",    # 少数实现
        "transformer.layers",
        "layers",
        "blocks",
    ]
    for p in candidates:
        blk = _get_by_path(encoder, p)
        if blk is not None and hasattr(blk, "__len__"):
            return blk, p
    raise RuntimeError("找不到 encoder 的 block 列表（layers/blocks）。你可以 print(core.encoder) 看结构再补路径。")

blocks, blk_path = get_blocks(core.encoder)
n_blocks = len(blocks)
print(f"找到 blocks: {blk_path}, 数量={n_blocks}")

def set_trainable(stage: int):
    '''
    stage=1: 冻结 backbone，只训 head
    stage=2: 解冻最后 1/3 blocks + head
    stage=3: 解冻最后 2/3 blocks + head（更强但更易过拟合）
    '''
    # 1) 全冻结
    for p in core.encoder.parameters():
        p.requires_grad = False
    # 2) head 永远训
    for p in core.head.parameters():
        p.requires_grad = True

    if stage == 1:
        return

    if stage == 2:
        start = int(math.floor(n_blocks * (8/9)))+1  # 最后 1/3
    elif stage == 3:
        start = int(math.floor(n_blocks * (1/9)))-1  # 最后 2/3
    else:
        raise ValueError("stage must be 1/2/3")

    # 3) 解冻 blocks[start:]
    for b in blocks[start:]:
        for p in b.parameters():
            p.requires_grad = True

    # 4) 常见：最后的 LayerNorm / post_layernorm 也一并解冻（有则解）
    for ln_name in ["post_layernorm", "post_layer_norm", "layernorm", "layer_norm", "ln_post", "final_layer_norm", "norm"]:
        m = getattr(core.encoder, ln_name, None)
        if m is not None:
            for p in m.parameters():
                p.requires_grad = True

def count_trainable():
    tot = sum(p.numel() for p in core.parameters())
    tr  = sum(p.numel() for p in core.parameters() if p.requires_grad)
    return tot, tr

# 初始 stage1
set_trainable(stage=1)
tot, tr = count_trainable()
print(f"[stage1] total={tot:,} trainable={tr:,}")

# ====== optimizer：分层学习率（head/backbone）======
optimizer = AdamW(
    [
        {"params": core.head.parameters(), "lr": head_lr, "weight_decay": weight_decay},
        {"params": core.encoder.parameters(), "lr": backbone_lr, "weight_decay": weight_decay},
    ]
)

# ====== scheduler：warmup + cosine（按 step 调度）======
total_steps = num_epochs * len(train_loader)
warmup_steps = max(1, int(0.05 * total_steps))
scheduler = get_cosine_schedule_with_warmup(
    optimizer,
    num_warmup_steps=warmup_steps,
    num_training_steps=total_steps,
)

# label smoothing（PyTorch 老版本可能不支持 label_smoothing 参数，做个兼容）
try:
    criterion = nn.CrossEntropyLoss(label_smoothing=label_smoothing)
except TypeError:
    def _ls_ce(logits, target, eps: float):
        n_class = logits.size(-1)
        log_probs = torch.log_softmax(logits, dim=-1)
        # 标准 CE
        nll = -log_probs.gather(dim=-1, index=target.unsqueeze(1)).squeeze(1)
        # 平滑项：均匀分布
        smooth = -log_probs.mean(dim=-1)
        return ((1 - eps) * nll + eps * smooth).mean()
    criterion = lambda logits, labels: _ls_ce(logits, labels, label_smoothing)

scaler = torch.cuda.amp.GradScaler(enabled=use_amp)

@torch.no_grad()
def eval_one_epoch():
    model.eval()
    running_loss, correct, total = 0.0, 0, 0
    for imgs, labels in val_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        with torch.cuda.amp.autocast(enabled=use_amp):
            logits = model(imgs)
            loss = criterion(logits, labels)
        running_loss += loss.item() * imgs.size(0)
        pred = logits.argmax(dim=1)
        correct += (pred == labels).sum().item()
        total += labels.size(0)
    return running_loss / max(total, 1), correct / max(total, 1)

def train_one_epoch(epoch: int, global_step: int):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    pbar = tqdm(train_loader, desc=f"Train epoch {epoch}", leave=False)
    for imgs, labels in pbar:
        imgs, labels = imgs.to(device), labels.to(device)

        optimizer.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=use_amp):
            logits = model(imgs)
            loss = criterion(logits, labels)

        scaler.scale(loss).backward()
        if grad_clip is not None and grad_clip > 0:
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
        scaler.step(optimizer)
        scaler.update()

        scheduler.step()
        global_step += 1

        running_loss += loss.item() * imgs.size(0)
        pred = logits.argmax(dim=1)
        correct += (pred == labels).sum().item()
        total += labels.size(0)

        pbar.set_postfix(loss=f"{running_loss/max(total,1):.4f}", acc=f"{correct/max(total,1):.4f}",
                         lr_head=f"{optimizer.param_groups[0]['lr']:.2e}",
                         lr_backbone=f"{optimizer.param_groups[1]['lr']:.2e}")

    return running_loss / max(total, 1), correct / max(total, 1), global_step

# ====== 主训练循环（带分阶段解冻）======
torch.cuda.empty_cache()
log_path = "trainlog_staged_unfreeze.log"
best_val_acc = 0.0
best_state_dict = None
global_step = 0

for epoch in range(1, num_epochs + 1):
    # stage 切换点：epoch 开始前切
    if epoch == stage1_epochs + 1:
        set_trainable(stage=2)
        tot, tr = count_trainable()
        print(f"[switch->stage2 @epoch{epoch}] total={tot:,} trainable={tr:,}")
    # if epoch == stage1_epochs + stage2_epochs + 1:
    #     set_trainable(stage=3)
    #     tot, tr = count_trainable()
    #     print(f"[switch->stage3 @epoch{epoch}] total={tot:,} trainable={tr:,}")

    tr_loss, tr_acc, global_step = train_one_epoch(epoch, global_step)
    va_loss, va_acc = eval_one_epoch()

    line = (f"[Epoch {epoch:02d}/{num_epochs}] "
            f"train_loss={tr_loss:.4f} train_acc={tr_acc:.4f} | "
            f"val_loss={va_loss:.4f} val_acc={va_acc:.4f}")
    print(line)
    with open(log_path, "a", encoding="utf-8") as f:
        f.write(line + "\n")

    if va_acc > best_val_acc:
        best_val_acc = va_acc
        best_state_dict = (model.module if isinstance(model, nn.DataParallel) else model).state_dict()

print("Best val_acc:", best_val_acc)
if best_state_dict is not None:
    save_path = f"./medsig_staged_best_acc{best_val_acc:.4f}.pth"
    torch.save(best_state_dict, save_path)
    print("Saved best model to:", save_path)


找到 blocks: encoder.layers, 数量=27
[stage1] total=428,570,052 trainable=4,612


  scaler = torch.cuda.amp.GradScaler(enabled=use_amp)


Train epoch 1:   0%|          | 0/183 [00:00<?, ?it/s]

  with torch.cuda.amp.autocast(enabled=use_amp):
  with torch.cuda.amp.autocast(enabled=use_amp):


[Epoch 01/30] train_loss=1.3032 train_acc=0.5044 | val_loss=1.1589 val_acc=0.6737


Train epoch 2:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 02/30] train_loss=1.0089 train_acc=0.7552 | val_loss=0.8675 val_acc=0.8242
[switch->stage2 @epoch3] total=428,570,052 trainable=30,485,924


Train epoch 3:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 03/30] train_loss=0.7007 train_acc=0.8422 | val_loss=0.6046 val_acc=0.8721


Train epoch 4:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 04/30] train_loss=0.5944 train_acc=0.8649 | val_loss=0.5855 val_acc=0.8741


Train epoch 5:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 05/30] train_loss=0.5779 train_acc=0.8749 | val_loss=0.5739 val_acc=0.8865


Train epoch 6:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 06/30] train_loss=0.5661 train_acc=0.8791 | val_loss=0.5639 val_acc=0.8912


Train epoch 7:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 07/30] train_loss=0.5584 train_acc=0.8817 | val_loss=0.5632 val_acc=0.8871


Train epoch 8:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 08/30] train_loss=0.5511 train_acc=0.8853 | val_loss=0.5517 val_acc=0.8933


Train epoch 9:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 09/30] train_loss=0.5441 train_acc=0.8851 | val_loss=0.5500 val_acc=0.8871


Train epoch 10:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 10/30] train_loss=0.5420 train_acc=0.8889 | val_loss=0.5448 val_acc=0.8953


Train epoch 11:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 11/30] train_loss=0.5338 train_acc=0.8951 | val_loss=0.5451 val_acc=0.8912


Train epoch 12:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 12/30] train_loss=0.5315 train_acc=0.8964 | val_loss=0.5438 val_acc=0.8912


Train epoch 13:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 13/30] train_loss=0.5250 train_acc=0.9016 | val_loss=0.5378 val_acc=0.8967


Train epoch 14:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 14/30] train_loss=0.5217 train_acc=0.8998 | val_loss=0.5386 val_acc=0.9001


Train epoch 15:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 15/30] train_loss=0.5156 train_acc=0.9082 | val_loss=0.5363 val_acc=0.8947


Train epoch 16:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 16/30] train_loss=0.5124 train_acc=0.9070 | val_loss=0.5345 val_acc=0.9001


Train epoch 17:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 17/30] train_loss=0.5087 train_acc=0.9089 | val_loss=0.5335 val_acc=0.9022


Train epoch 18:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 18/30] train_loss=0.5087 train_acc=0.9087 | val_loss=0.5343 val_acc=0.9063


Train epoch 19:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 19/30] train_loss=0.5021 train_acc=0.9130 | val_loss=0.5348 val_acc=0.9008


Train epoch 20:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 20/30] train_loss=0.5023 train_acc=0.9130 | val_loss=0.5384 val_acc=0.8995


Train epoch 21:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 21/30] train_loss=0.5012 train_acc=0.9158 | val_loss=0.5378 val_acc=0.9008


Train epoch 22:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 22/30] train_loss=0.4986 train_acc=0.9146 | val_loss=0.5344 val_acc=0.9022


Train epoch 23:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 23/30] train_loss=0.4987 train_acc=0.9188 | val_loss=0.5343 val_acc=0.9042


Train epoch 24:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 24/30] train_loss=0.5000 train_acc=0.9175 | val_loss=0.5342 val_acc=0.9036


Train epoch 25:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 25/30] train_loss=0.4972 train_acc=0.9163 | val_loss=0.5348 val_acc=0.9036


Train epoch 26:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 26/30] train_loss=0.4922 train_acc=0.9195 | val_loss=0.5348 val_acc=0.9015


Train epoch 27:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 27/30] train_loss=0.4961 train_acc=0.9192 | val_loss=0.5349 val_acc=0.9022


Train epoch 28:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 28/30] train_loss=0.4968 train_acc=0.9166 | val_loss=0.5351 val_acc=0.9029


Train epoch 29:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 29/30] train_loss=0.4941 train_acc=0.9171 | val_loss=0.5349 val_acc=0.9029


Train epoch 30:   0%|          | 0/183 [00:00<?, ?it/s]

[Epoch 30/30] train_loss=0.4958 train_acc=0.9161 | val_loss=0.5349 val_acc=0.9022
Best val_acc: 0.9062927496580028
Saved best model to: ./medsig_staged_best_acc0.9063.pth


In [29]:
# （已替换）原 cell7 的训练/保存逻辑已被 '分阶段解冻' cell 覆盖。


In [None]:
# （已替换）原 cell8 的训练/保存逻辑已被 '分阶段解冻' cell 覆盖。


In [None]:
# （已替换）原 cell9 的训练/保存逻辑已被 '分阶段解冻' cell 覆盖。


In [None]:
# （已替换）原 cell10 的训练/保存逻辑已被 '分阶段解冻' cell 覆盖。


In [None]:
# （已替换）原 cell11 的训练/保存逻辑已被 '分阶段解冻' cell 覆盖。


In [None]:
# （已替换）原 cell12 的训练/保存逻辑已被 '分阶段解冻' cell 覆盖。


In [None]:
import torch
import torch.nn as nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 1. 先构建“同结构”的模型 ------------------------------------
# 这里用你自己的模型类和参数
# 比如你的是：
# model_core = MedSigVisionClassifier(img_encoder, embed_dim, num_classes)
torch.cuda.empty_cache()
model_core = MedSigVisionClassifier(img_encoder, embed_dim, 4)  # 按你之前的一样写
model_core = model_core.to(device)

# 2. 读取 pth（state_dict） ------------------------------------
state_dict_path = "medsiglip448_cls_best_acc0.9770.pth"   # 改成你自己的文件名
state_dict = torch.load(state_dict_path, map_location=device)

# 3. 把参数加载进模型 ------------------------------------------
model_core.load_state_dict(state_dict)

# 4. 如果你想多卡跑，可以再包一层 DataParallel（可选）
if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model_core)
else:
    model = model_core

model.eval()
print("✅ 模型加载完毕，可以直接用 model 做推理/验证")


In [None]:
@torch.no_grad()
def eval_once(loader):
    model.eval()
    correct = 0
    total = 0
    for imgs, labels in loader:
        imgs = imgs.to(device)
        labels = labels.to(device)

        logits = model(imgs)
        preds = torch.argmax(logits, dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    print("acc =", correct / total)

# 比如在 val_loader / test_loader 上跑一下
eval_once(val_loader)
