In [1]:
import os, sys
from pathlib import Path

print("cwd =", os.getcwd())
print("sys.path[0] =", sys.path[0])
print("has medimageinsightmodel.py in cwd?", Path("medimageinsightmodel.py").exists())
print("cwd files (first 50):", [p.name for p in Path(".").iterdir()][:50])


cwd = /root/autodl-tmp/models--lion-ai--MedImageInsights/snapshots/1
sys.path[0] = /root/miniconda3/lib/python312.zip
has medimageinsightmodel.py in cwd? True
cwd files (first 50): ['.gitattributes', '.gitignore', '.idea', '.python-version', '2024.09.27', 'MedImageInsight', 'README.md', 'example.py', 'fastapi_app.py', 'flask_app.py', 'medimageinsightmodel.py', 'pyproject.toml', 'requirements.txt', 'uv.lock', '.ipynb_checkpoints', '__pycache__', 'MI_finetune.ipynb', 'best_mi2_cxr4.pt']


In [2]:
from pathlib import Path
hits = list(Path(".").rglob("medimageinsightmodel.py"))
print("hits =", len(hits))
for p in hits[:10]:
    print("->", p.resolve())


hits = 1
-> /root/autodl-tmp/models--lion-ai--MedImageInsights/snapshots/1/medimageinsightmodel.py


In [3]:
import sys
repo_dir = str(hits[0].parent.resolve())
sys.path.insert(0, repo_dir)

from medimageinsightmodel import MedImageInsight
print("import ok ✅", MedImageInsight, "from", repo_dir)




import ok ✅ <class 'medimageinsightmodel.MedImageInsight'> from /root/autodl-tmp/models--lion-ai--MedImageInsights/snapshots/1


In [4]:
from pathlib import Path

root = Path.home() / "autodl-tmp" / "models--lion-ai--MedImageInsights"
cfgs = list(root.rglob("config.yaml"))
print("found", len(cfgs))
for p in cfgs[:10]:
    print(p)


found 1
/root/autodl-tmp/models--lion-ai--MedImageInsights/snapshots/1/2024.09.27/config.yaml


In [5]:
import os
import math
import time
import random
import numpy as np
from pathlib import Path
from typing import Tuple

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import ImageFolder

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


# -------------------------
# 0) 可改的配置
# -------------------------
TRAIN_DIR = "/root/autodl-tmp/train"
VAL_DIR   = "/root/autodl-tmp/val"

NUM_CLASSES = 4
IMAGE_SIZE  = 512          # 你要 >400，建议先 512；显存不够改 448
BATCH_SIZE  = 256            # OOM 就改小
NUM_WORKERS = 4
EPOCHS      = 20

# 分阶段：先训 head 再训 backbone
HEAD_ONLY_EPOCHS = 0

# 学习率（常用设置：head 大，backbone 小）
LR_HEAD     = 1e-3
LR_BACKBONE = 2e-5
WEIGHT_DECAY = 0.05

# 梯度累积：等效 batch = BATCH_SIZE * GRAD_ACCUM
GRAD_ACCUM = 1             # 显存紧就设 2/4/8

# label smoothing（可选）
LABEL_SMOOTHING = 0.1

# AMP 混合精度
USE_AMP = True

# 是否开启水平翻转（胸片一般默认关）
USE_HFLIP = False


# -------------------------
# 1) 轻量胸片增强（不会乱裁）
# -------------------------
def build_transforms_for_mi2(preprocess):
    # preprocess 是 MI2 自带的 build_transforms(...) 产物（PIL->Tensor->Normalize） :contentReference[oaicite:3]{index=3}
    # 我们在它前面加“安全增强”，尽量不裁掉病灶
    aug = [
        transforms.Lambda(lambda im: im.convert("RGB") if getattr(im, "mode", None) != "RGB" else im),
        transforms.Resize(int(IMAGE_SIZE * 1.10)),
    ]
    if USE_HFLIP:
        aug.append(transforms.RandomHorizontalFlip(p=0.5))
    aug += [
        transforms.RandomAffine(
            degrees=5,
            translate=(0.03, 0.03),
            scale=(0.95, 1.05),
            interpolation=transforms.InterpolationMode.BILINEAR,
            fill=0
        ),
        transforms.CenterCrop(IMAGE_SIZE),
        transforms.RandomApply([transforms.ColorJitter(brightness=0.10, contrast=0.10)], p=0.8),
        transforms.RandomAutocontrast(p=0.2),
    ]
    train_tfm = transforms.Compose(aug + [preprocess])

    val_tfm = transforms.Compose([
        transforms.Lambda(lambda im: im.convert("RGB") if getattr(im, "mode", None) != "RGB" else im),
        transforms.Resize(int(IMAGE_SIZE * 1.10)),
        transforms.CenterCrop(IMAGE_SIZE),
        preprocess,
    ])
    return train_tfm, val_tfm


# -------------------------
# 2) 组装可训练的 MI2 分类模型
# -------------------------
class MI2Classifier(nn.Module):
    def __init__(self, mi2_backbone: nn.Module, embed_dim: int, num_classes: int):
        super().__init__()
        self.backbone = mi2_backbone
        self.head = nn.Linear(embed_dim, num_classes)

        # 定位 blocks（只做一次）
        self._blocks_name, self._blocks = _find_best_blocklist(self.backbone)
        print(f"[MI2Classifier] using blocks: {self._blocks_name} (len={len(self._blocks)})")

    def set_trainable(self, last_k: int):
        """
        last_k=0: 冻结整个 backbone（只训 head）
        last_k>0: 冻结 backbone，只解冻最后 last_k 个 block（再加 head）
        """
        freeze_all_params(self.backbone)

        if last_k > 0:
            last_k = min(last_k, len(self._blocks))
            for blk in self._blocks[-last_k:]:
                unfreeze_params(blk)

        # head 永远可训练
        unfreeze_params(self.head)

    def forward(self, x):
        # 不能用 no_grad 包住 encode_image（因为你可能只解冻了部分层）
        feats = self.backbone.encode_image(x)
        logits = self.head(feats)
        return logits



def infer_embed_dim(mi2_model, sample_tensor: torch.Tensor) -> int:
    mi2_model.eval()
    with torch.no_grad():
        feat = mi2_model.encode_image(sample_tensor)
    return int(feat.shape[-1])


# -------------------------
# 3) 训练 / 验证
# -------------------------
def train_one_epoch(model, loader, optimizer, criterion, device, scaler=None):
    model.train()
    total_loss = 0.0
    y_true, y_pred = [], []

    optimizer.zero_grad(set_to_none=True)

    for step, (xb, yb) in enumerate(loader):
        xb = xb.to(device, non_blocking=True)
        yb = yb.to(device, non_blocking=True)

        with torch.cuda.amp.autocast(enabled=USE_AMP):
            logits = model(xb)
            loss = criterion(logits, yb)
            loss = loss / GRAD_ACCUM

        if USE_AMP:
            scaler.scale(loss).backward()
        else:
            loss.backward()

        if (step + 1) % GRAD_ACCUM == 0:
            if USE_AMP:
                scaler.step(optimizer)
                scaler.update()
            else:
                optimizer.step()
            optimizer.zero_grad(set_to_none=True)

        total_loss += loss.item() * GRAD_ACCUM
        pred = torch.argmax(logits, dim=1)
        y_true.append(yb.detach().cpu().numpy())
        y_pred.append(pred.detach().cpu().numpy())

    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred)
    acc = accuracy_score(y_true, y_pred)
    return total_loss / max(1, len(loader)), acc


@torch.no_grad()
def eval_one_epoch(model, loader, criterion, device):
    model.eval()
    total_loss = 0.0
    y_true, y_pred = [], []

    for xb, yb in loader:
        xb = xb.to(device, non_blocking=True)
        yb = yb.to(device, non_blocking=True)
        with torch.cuda.amp.autocast(enabled=USE_AMP):
            logits = model(xb)
            loss = criterion(logits, yb)
        total_loss += loss.item()
        pred = torch.argmax(logits, dim=1)
        y_true.append(yb.cpu().numpy())
        y_pred.append(pred.cpu().numpy())

    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred)
    acc = accuracy_score(y_true, y_pred)
    return total_loss / max(1, len(loader)), acc, y_true, y_pred

In [6]:
from tqdm.auto import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

torch.backends.cudnn.benchmark = True




In [7]:
import torch
import torch.nn as nn

def _find_best_blocklist(backbone: nn.Module):
    """
    尝试在各种常见结构里找到 transformer blocks / layers (ModuleList)。
    找不到就退化：挑一个“最长的 ModuleList”当 blocks。
    """
    # 1) 常见显式路径（按经验覆盖 CLIP/ViT/Transformer encoder）
    candidates = [
        ("visual.transformer.resblocks", lambda m: m.visual.transformer.resblocks),
        ("transformer.resblocks",        lambda m: m.transformer.resblocks),
        ("vision_model.encoder.layers",  lambda m: m.vision_model.encoder.layers),
        ("vision_model.blocks",          lambda m: m.vision_model.blocks),
        ("encoder.layers",               lambda m: m.encoder.layers),
        ("blocks",                       lambda m: m.blocks),
        ("layers",                       lambda m: m.layers),
    ]
    for name, getter in candidates:
        try:
            blocks = getter(backbone)
            if isinstance(blocks, (nn.ModuleList, list, tuple)) and len(blocks) > 0:
                return name, blocks
        except Exception:
            pass

    # 2) 兜底：遍历所有 ModuleList，选最长的那个
    best = None
    best_name = None
    for n, mod in backbone.named_modules():
        if isinstance(mod, nn.ModuleList) and len(mod) > 0:
            # 粗略判断：里面大部分是 nn.Module
            if all(isinstance(x, nn.Module) for x in mod):
                if best is None or len(mod) > len(best):
                    best = mod
                    best_name = n
    if best is None:
        raise RuntimeError("找不到 backbone 的 blocks/layers(ModuleList)。请把 mi.model 的结构打印出来我来定点改。")
    return best_name, best

def freeze_all_params(module: nn.Module):
    for p in module.parameters():
        p.requires_grad = False

def unfreeze_params(module: nn.Module):
    for p in module.parameters():
        p.requires_grad = True

def count_trainable_params(module: nn.Module):
    return sum(p.numel() for p in module.parameters() if p.requires_grad)


In [8]:
from medimageinsightmodel import MedImageInsight

mi = MedImageInsight(
    model_dir="/root/autodl-tmp/models--lion-ai--MedImageInsights/snapshots/1/2024.09.27",
    vision_model_name="medimageinsigt-v1.0.0.pt",
    language_model_name="language_model.pth",
)
mi.load_model()
device = mi.device
print("MI2 device:", device)


Model loaded successfully on device: cuda
MI2 device: cuda


In [9]:
train_tfm, val_tfm = build_transforms_for_mi2(mi.preprocess)

train_ds = ImageFolder(TRAIN_DIR, transform=train_tfm)
val_ds   = ImageFolder(VAL_DIR,   transform=val_tfm)

print("classes:", train_ds.classes)
assert len(train_ds.classes) == NUM_CLASSES, "NUM_CLASSES 与文件夹类别数不一致"

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,
                          num_workers=NUM_WORKERS, pin_memory=True)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False,
                          num_workers=NUM_WORKERS, pin_memory=True)

# 推断 embed dim
x0, _ = train_ds[0]
x0 = x0.unsqueeze(0).to(device)
embed_dim = infer_embed_dim(mi.model, x0)
print("embed_dim =", embed_dim)

import torch.nn as nn

device = torch.device("cuda:0")  # 主卡
model = MI2Classifier(mi.model, embed_dim=embed_dim, num_classes=NUM_CLASSES).to(device)

if torch.cuda.device_count() >= 2:
    print("Using DataParallel on", torch.cuda.device_count(), "GPUs")
    model = nn.DataParallel(model, device_ids=[0, 1])  # 双卡 0,1


criterion = nn.CrossEntropyLoss(label_smoothing=LABEL_SMOOTHING)
scaler = torch.cuda.amp.GradScaler(enabled=USE_AMP)

best_acc = -1.0
best_path = "best_mi2_cxr4.pt"


classes: ['class 0', 'class 1', 'class 2', 'class 3']


  return fn(*args, **kwargs)


embed_dim = 1024
[MI2Classifier] using blocks: lang_encoder.resblocks (len=16)
Using DataParallel on 2 GPUs


  scaler = torch.cuda.amp.GradScaler(enabled=USE_AMP)


In [10]:
def train_one_epoch_tqdm(model, loader, optimizer, criterion, device, scaler=None, epoch=0):
    model.train()
    total_loss = 0.0
    correct = 0
    total = 0

    optimizer.zero_grad(set_to_none=True)

    pbar = tqdm(loader, desc=f"Train e{epoch:02d}", leave=False)
    for step, (xb, yb) in enumerate(pbar, start=1):
        xb = xb.to(device, non_blocking=True)
        yb = yb.to(device, non_blocking=True)

        with torch.cuda.amp.autocast(enabled=USE_AMP):
            logits = model(xb)
            loss = criterion(logits, yb) / GRAD_ACCUM

        if USE_AMP:
            scaler.scale(loss).backward()
        else:
            loss.backward()

        if step % GRAD_ACCUM == 0:
            if USE_AMP:
                scaler.step(optimizer); scaler.update()
            else:
                optimizer.step()
            optimizer.zero_grad(set_to_none=True)

        # 统计（用未除以 accum 的真实 loss）
        loss_item = float(loss.detach()) * GRAD_ACCUM
        total_loss += loss_item

        pred = logits.argmax(dim=1)
        correct += int((pred == yb).sum().item())
        total += int(yb.size(0))

        avg_loss = total_loss / step
        avg_acc = correct / max(1, total)

        pbar.set_postfix(loss=f"{avg_loss:.4f}", acc=f"{avg_acc:.4f}")

    return total_loss / max(1, len(loader)), correct / max(1, total)

@torch.no_grad()
def eval_one_epoch_tqdm(model, loader, criterion, device, epoch=0):
    model.eval()
    total_loss = 0.0
    correct = 0
    total = 0
    y_true, y_pred = [], []

    pbar = tqdm(loader, desc=f"Val   e{epoch:02d}", leave=False)
    for step, (xb, yb) in enumerate(pbar, start=1):
        xb = xb.to(device, non_blocking=True)
        yb = yb.to(device, non_blocking=True)

        with torch.cuda.amp.autocast(enabled=USE_AMP):
            logits = model(xb)
            loss = criterion(logits, yb)

        loss_item = float(loss.detach())
        total_loss += loss_item

        pred = logits.argmax(dim=1)
        correct += int((pred == yb).sum().item())
        total += int(yb.size(0))

        y_true.append(yb.cpu().numpy())
        y_pred.append(pred.cpu().numpy())

        avg_loss = total_loss / step
        avg_acc = correct / max(1, total)
        pbar.set_postfix(loss=f"{avg_loss:.4f}", acc=f"{avg_acc:.4f}")

    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred)

    return total_loss / max(1, len(loader)), correct / max(1, total), y_true, y_pred


In [11]:
# epochs_bar = tqdm(range(1, EPOCHS + 1), desc="Epochs")
# torch.cuda.empty_cache()
# for epoch in epochs_bar:
#     m = model.module if isinstance(model, nn.DataParallel) else model
#     if epoch <= HEAD_ONLY_EPOCHS:
#         m.freeze_backbone(True)
#         params = [
#             {"params": m.head.parameters(), "lr": LR_HEAD, "weight_decay": WEIGHT_DECAY},
#         ]
#         stage = "head"
#     else:
#         m.freeze_backbone(False)
#         params = [
#             {"params": m.head.parameters(), "lr": LR_HEAD, "weight_decay": WEIGHT_DECAY},
#             {"params": m.backbone.parameters(), "lr": LR_BACKBONE, "weight_decay": WEIGHT_DECAY},
#         ]
#         stage = "finetune"
#     optimizer = optim.AdamW(params)

#     tr_loss, tr_acc = train_one_epoch_tqdm(model, train_loader, optimizer, criterion, device, scaler=scaler, epoch=epoch)
#     va_loss, va_acc, y_true, y_pred = eval_one_epoch_tqdm(model, val_loader, criterion, device, epoch=epoch)

#     msg = f"e{epoch:02d} [{stage}] tr_loss={tr_loss:.4f} tr_acc={tr_acc:.4f} | va_loss={va_loss:.4f} va_acc={va_acc:.4f}"
#     epochs_bar.set_postfix_str(msg)
#     print(msg)

#     if va_acc > best_acc:
#         best_acc = va_acc
#         state_dict = (model.module.state_dict()
#               if isinstance(model, nn.DataParallel)
#               else model.state_dict())
#         torch.save({
#             "epoch": epoch,
#             "best_acc": best_acc,
#             "state_dict": state_dict,
#             "classes": train_ds.classes,
#             "embed_dim": embed_dim,
#         }, best_path)
#         print("  ✅ saved:", best_path, "best_acc=", best_acc)

# print("\nBest val acc:", best_acc)
# print("Confusion matrix:\n", confusion_matrix(y_true, y_pred))
# print(classification_report(y_true, y_pred, target_names=train_ds.classes, digits=4))


In [12]:
LAST_K_BLOCKS = 2  # ✅ 你想训练的“最后几层”，建议先 1~3 试
epochs_bar = tqdm(range(1, EPOCHS + 1), desc="Epochs")
torch.cuda.empty_cache()
for epoch in epochs_bar:
    m = model.module if isinstance(model, nn.DataParallel) else model

    if epoch <= HEAD_ONLY_EPOCHS:
        m.set_trainable(last_k=0)        # 只训 head
        stage = "head"
        params = [{"params": m.head.parameters(), "lr": LR_HEAD, "weight_decay": WEIGHT_DECAY}]
    else:
        m.set_trainable(last_k=LAST_K_BLOCKS)  # ✅ 只训最后K层 + head
        stage = f"last{LAST_K_BLOCKS}+head"
        # 只把 requires_grad=True 的 backbone 参数交给 optimizer（避免误加冻结层）
        backbone_trainable = [p for p in m.backbone.parameters() if p.requires_grad]
        params = [
            {"params": m.head.parameters(), "lr": LR_HEAD, "weight_decay": WEIGHT_DECAY},
            {"params": backbone_trainable, "lr": LR_BACKBONE, "weight_decay": WEIGHT_DECAY},
        ]

    optimizer = optim.AdamW(params)

    # 可选：打印一下确认只训了哪些参数
    if epoch in [1, HEAD_ONLY_EPOCHS, HEAD_ONLY_EPOCHS + 1]:
        print(f"[epoch {epoch}] stage={stage}  trainable(backbone)={count_trainable_params(m.backbone)/1e6:.2f}M  trainable(total)={count_trainable_params(m)/1e6:.2f}M")

    tr_loss, tr_acc = train_one_epoch_tqdm(model, train_loader, optimizer, criterion, device, scaler=scaler, epoch=epoch)
    va_loss, va_acc, y_true, y_pred = eval_one_epoch_tqdm(model, val_loader, criterion, device, epoch=epoch)

    print(f"[Epoch {epoch:02d}/{EPOCHS}] {stage} | train loss={tr_loss:.4f} acc={tr_acc:.4f} | val loss={va_loss:.4f} acc={va_acc:.4f}")

    # 保存 best（DP 情况保存 module）
    state_dict = (model.module.state_dict() if isinstance(model, nn.DataParallel) else model.state_dict())
    if va_acc > best_acc:
        best_acc = va_acc
        torch.save({"epoch": epoch, "best_acc": best_acc, "state_dict": state_dict,
                    "classes": train_ds.classes, "embed_dim": embed_dim}, best_path)
        print("  ✅ saved:", best_path, "best_acc=", best_acc)


Epochs:   0%|          | 0/20 [00:00<?, ?it/s]

[epoch 1] stage=last2+head  trainable(backbone)=25.19M  trainable(total)=25.20M


Train e01:   0%|          | 0/23 [00:00<?, ?it/s]

  with torch.cuda.amp.autocast(enabled=USE_AMP):


Val   e01:   0%|          | 0/6 [00:00<?, ?it/s]

  with torch.cuda.amp.autocast(enabled=USE_AMP):


[Epoch 01/20] last2+head | train loss=1.3090 acc=0.6718 | val loss=1.2251 acc=0.7141
  ✅ saved: best_mi2_cxr4.pt best_acc= 0.7140902872777017


Train e02:   0%|          | 0/23 [00:00<?, ?it/s]

Val   e02:   0%|          | 0/6 [00:00<?, ?it/s]

[Epoch 02/20] last2+head | train loss=1.1711 acc=0.7444 | val loss=1.0969 acc=0.7620
  ✅ saved: best_mi2_cxr4.pt best_acc= 0.761969904240766


Train e03:   0%|          | 0/23 [00:00<?, ?it/s]

Val   e03:   0%|          | 0/6 [00:00<?, ?it/s]

[Epoch 03/20] last2+head | train loss=1.0585 acc=0.7735 | val loss=0.9902 acc=0.7818
  ✅ saved: best_mi2_cxr4.pt best_acc= 0.7818057455540356


Train e04:   0%|          | 0/23 [00:00<?, ?it/s]

Val   e04:   0%|          | 0/6 [00:00<?, ?it/s]

[Epoch 04/20] last2+head | train loss=0.9631 acc=0.7879 | val loss=0.9002 acc=0.7934
  ✅ saved: best_mi2_cxr4.pt best_acc= 0.7934336525307798


Train e05:   0%|          | 0/23 [00:00<?, ?it/s]

Val   e05:   0%|          | 0/6 [00:00<?, ?it/s]

[Epoch 05/20] last2+head | train loss=0.8825 acc=0.8045 | val loss=0.8258 acc=0.8064
  ✅ saved: best_mi2_cxr4.pt best_acc= 0.8064295485636115


Train e06:   0%|          | 0/23 [00:00<?, ?it/s]

Val   e06:   0%|          | 0/6 [00:00<?, ?it/s]

[Epoch 06/20] last2+head | train loss=0.8149 acc=0.8216 | val loss=0.7668 acc=0.8290
  ✅ saved: best_mi2_cxr4.pt best_acc= 0.8290013679890561


Train e07:   0%|          | 0/23 [00:00<?, ?it/s]

Val   e07:   0%|          | 0/6 [00:00<?, ?it/s]

[Epoch 07/20] last2+head | train loss=0.7634 acc=0.8430 | val loss=0.7195 acc=0.8570
  ✅ saved: best_mi2_cxr4.pt best_acc= 0.8570451436388509


Train e08:   0%|          | 0/23 [00:00<?, ?it/s]

Val   e08:   0%|          | 0/6 [00:00<?, ?it/s]

[Epoch 08/20] last2+head | train loss=0.7218 acc=0.8569 | val loss=0.6821 acc=0.8694
  ✅ saved: best_mi2_cxr4.pt best_acc= 0.8693570451436389


Train e09:   0%|          | 0/23 [00:00<?, ?it/s]

Val   e09:   0%|          | 0/6 [00:00<?, ?it/s]

[Epoch 09/20] last2+head | train loss=0.6873 acc=0.8615 | val loss=0.6539 acc=0.8762
  ✅ saved: best_mi2_cxr4.pt best_acc= 0.8761969904240766


Train e10:   0%|          | 0/23 [00:00<?, ?it/s]

Val   e10:   0%|          | 0/6 [00:00<?, ?it/s]

[Epoch 10/20] last2+head | train loss=0.6624 acc=0.8651 | val loss=0.6320 acc=0.8796
  ✅ saved: best_mi2_cxr4.pt best_acc= 0.8796169630642955


Train e11:   0%|          | 0/23 [00:00<?, ?it/s]

Val   e11:   0%|          | 0/6 [00:00<?, ?it/s]

[Epoch 11/20] last2+head | train loss=0.6422 acc=0.8718 | val loss=0.6163 acc=0.8817
  ✅ saved: best_mi2_cxr4.pt best_acc= 0.8816689466484268


Train e12:   0%|          | 0/23 [00:00<?, ?it/s]

Val   e12:   0%|          | 0/6 [00:00<?, ?it/s]

[Epoch 12/20] last2+head | train loss=0.6276 acc=0.8742 | val loss=0.6038 acc=0.8789


Train e13:   0%|          | 0/23 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fe1f28434c0>
Traceback (most recent call last):
  File "/root/miniconda3/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()
  File "/root/miniconda3/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 1647, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/root/miniconda3/lib/python3.12/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fe1f28434c0>
Traceback (most recent call last):
  File "/root/miniconda3/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()
  File "/root/miniconda3/lib/python3.12/site-packages/torc

Val   e13:   0%|          | 0/6 [00:00<?, ?it/s]

[Epoch 13/20] last2+head | train loss=0.6164 acc=0.8757 | val loss=0.5946 acc=0.8837
  ✅ saved: best_mi2_cxr4.pt best_acc= 0.8837209302325582


Train e14:   0%|          | 0/23 [00:00<?, ?it/s]

Val   e14:   0%|          | 0/6 [00:00<?, ?it/s]

[Epoch 14/20] last2+head | train loss=0.6074 acc=0.8757 | val loss=0.5878 acc=0.8796


Train e15:   0%|          | 0/23 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fe1f28434c0>
Traceback (most recent call last):
  File "/root/miniconda3/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()
  File "/root/miniconda3/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 1647, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/root/miniconda3/lib/python3.12/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fe1f28434c0>
Traceback (most recent call last):
  File "/root/miniconda3/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()
  File "/root/miniconda3/lib/python3.12/site-packages/torc

Val   e15:   0%|          | 0/6 [00:00<?, ?it/s]

[Epoch 15/20] last2+head | train loss=0.6010 acc=0.8783 | val loss=0.5829 acc=0.8824


Train e16:   0%|          | 0/23 [00:00<?, ?it/s]

Val   e16:   0%|          | 0/6 [00:00<?, ?it/s]

[Epoch 16/20] last2+head | train loss=0.5961 acc=0.8783 | val loss=0.5791 acc=0.8830


Train e17:   0%|          | 0/23 [00:00<?, ?it/s]

Val   e17:   0%|          | 0/6 [00:00<?, ?it/s]

[Epoch 17/20] last2+head | train loss=0.5925 acc=0.8767 | val loss=0.5760 acc=0.8830


Train e18:   0%|          | 0/23 [00:00<?, ?it/s]

Val   e18:   0%|          | 0/6 [00:00<?, ?it/s]

[Epoch 18/20] last2+head | train loss=0.5893 acc=0.8815 | val loss=0.5742 acc=0.8810


Train e19:   0%|          | 0/23 [00:00<?, ?it/s]

Val   e19:   0%|          | 0/6 [00:00<?, ?it/s]

[Epoch 19/20] last2+head | train loss=0.5872 acc=0.8808 | val loss=0.5722 acc=0.8851
  ✅ saved: best_mi2_cxr4.pt best_acc= 0.8850889192886456


Train e20:   0%|          | 0/23 [00:00<?, ?it/s]

Val   e20:   0%|          | 0/6 [00:00<?, ?it/s]

[Epoch 20/20] last2+head | train loss=0.5841 acc=0.8793 | val loss=0.5706 acc=0.8844


In [None]:
print(model)