In [1]:
import os
import random
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import shutil
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torchvision.models import resnet18, ResNet18_Weights, resnet34, ResNet34_Weights, resnet50, ResNet50_Weights
from tqdm.notebook import tqdm
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score


In [2]:
RAW_DIR = Path("D:/OneDriveFiles/OneDrive/人工智能基础期末/dataset2")
OUT_DIR = Path("D:/OneDriveFiles/OneDrive/人工智能基础期末/data_split")    # 拆分后的 train/val 会放在这里

classes = os.listdir(RAW_DIR)
classes


['.DS_Store',
 'baseline_Conv_transformer.ipynb',
 'baseline_resnet.ipynb',
 'BiomedCLIP_baseline.ipynb',
 'class 0',
 'class 1',
 'class 2',
 'class 3',
 'CLIP ViT-L14.ipynb',
 'medsig.ipynb',
 'MedSigLIP.ipynb',
 'medsiglip448_cls_best_acc0.8762.pth',
 'medsig_lora.ipynb',
 'trainlog_19.log',
 'trainlog_24.log',
 'trainlog_linear.log',
 'trainlog_resnet.log']

In [3]:
random.seed(42)

train_ratio = 0.8   # 训练集占 80%
val_ratio   = 0.2   # 验证集占 20%，train_ratio + val_ratio 应该 = 1


In [4]:
BATCH_SIZE   = 256
EPOCHS       = 10
LR           = 1e-3
WEIGHT_DECAY = 1e-4
TRAIN_RATIO  = 0.7
VAL_RATIO    = 0.15     # TEST 就是剩下 0.15

device = "cuda" if torch.cuda.is_available() else "cpu"
print("使用设备:", device)


使用设备: cuda


In [5]:
classes = [d for d in os.listdir(RAW_DIR) if (RAW_DIR / d).is_dir()]
print("发现的类别：", classes)

for phase in ["train", "val"]:
    for cls in classes:
        (OUT_DIR / phase / cls).mkdir(parents=True, exist_ok=True)


发现的类别： ['class 0', 'class 1', 'class 2', 'class 3']


In [6]:
for cls in classes:
    src_dir = RAW_DIR / cls
    files = [f for f in os.listdir(src_dir)
             if f.lower().endswith((".png", ".jpg", ".jpeg"))]

    random.shuffle(files)
    n = len(files)
    n_train = int(n * train_ratio)
    # val 集就是剩下的
    train_files = files[:n_train]
    val_files   = files[n_train:]

    print(f"{cls}: 总数 {n}, 训练 {len(train_files)}, 验证 {len(val_files)}")

    # 复制到目标文件夹（想省空间可以用 shutil.move）
    for fname in train_files:
        shutil.copy(src_dir / fname, OUT_DIR / "train" / cls / fname)
    for fname in val_files:
        shutil.copy(src_dir / fname, OUT_DIR / "val" / cls / fname)

print("划分完成，已保存到", OUT_DIR)


class 0: 总数 2530, 训练 2024, 验证 506
class 1: 总数 1345, 训练 1076, 验证 269
class 2: 总数 1626, 训练 1300, 验证 326
class 3: 总数 1802, 训练 1441, 验证 361
划分完成，已保存到 D:\OneDriveFiles\OneDrive\人工智能基础期末\data_split


In [7]:
train_tfm = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # 灰度 → 1 通道
    transforms.Resize((224, 224)),               # 统一到 224×224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5]),
])

val_tfm = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5]),
])

train_set = ImageFolder(str(OUT_DIR / "train"), transform=train_tfm)
val_set   = ImageFolder(str(OUT_DIR / "val"),   transform=val_tfm)

train_loader = DataLoader(train_set, batch_size=32, shuffle=True,  num_workers=4)
val_loader   = DataLoader(val_set,   batch_size=32, shuffle=False, num_workers=4)

print("类别映射：", train_set.class_to_idx)
print("训练集大小：", len(train_set))
print("验证集大小：", len(val_set))

类别映射： {'class 0': 0, 'class 1': 1, 'class 2': 2, 'class 3': 3}
训练集大小： 5841
验证集大小： 1462


In [8]:
resnet_train_tfm = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),   # 灰度转 3 通道
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],   # ImageNet 均值
        std=[0.229, 0.224, 0.225],    # ImageNet 方差
    ),
])

resnet_eval_tfm = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    ),
])

train_dir = "D:/OneDriveFiles/OneDrive/人工智能基础期末/data_split/train"   # 改成你自己的路径
val_dir   = "D:/OneDriveFiles/OneDrive/人工智能基础期末/data_split/val"     # 改成你自己的路径

resnet_train_set = ImageFolder(train_dir, transform=resnet_train_tfm)
resnet_val_set   = ImageFolder(val_dir,   transform=resnet_eval_tfm)

resnet_train_loader = DataLoader(resnet_train_set, batch_size=BATCH_SIZE, shuffle=True,  num_workers=4)
resnet_val_loader   = DataLoader(resnet_val_set,   batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

classes = resnet_train_set.classes
num_classes = len(classes)
print("类别：", classes, "，数量：", num_classes)


类别： ['class 0', 'class 1', 'class 2', 'class 3'] ，数量： 4


In [9]:
class ResNetClassifier(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        # 加载预训练 ResNet18
        weights = ResNet18_Weights.IMAGENET1K_V1
        self.backbone = resnet18(weights=weights)

        # 不用改第一层，因为我们已经在 transform 里把图像转成 RGB 3 通道了

        # 替换最后一层全连接为 num_classes
        in_features = self.backbone.fc.in_features
        self.backbone.fc = nn.Linear(in_features, num_classes)

    def forward(self, x):
        return self.backbone(x)

In [15]:
class ResNet34Classifier(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        # 加载预训练 ResNet18
        weights = ResNet34_Weights.IMAGENET1K_V1
        self.backbone = resnet34(weights=weights)

        # 不用改第一层，因为我们已经在 transform 里把图像转成 RGB 3 通道了

        # 替换最后一层全连接为 num_classes
        in_features = self.backbone.fc.in_features
        self.backbone.fc = nn.Linear(in_features, num_classes)

    def forward(self, x):
        return self.backbone(x)

In [16]:
def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0.0
    all_preds, all_labels = [], []
    progress = tqdm(loader, desc="Training", leave=False)

    for x, y in progress:
        x, y = x.to(device), y.to(device)

        optimizer.zero_grad()
        logits = model(x)
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * x.size(0)
        preds = logits.argmax(dim=1)
        all_preds.extend(preds.detach().cpu().tolist())
        all_labels.extend(y.detach().cpu().tolist())

    avg_loss = total_loss / len(loader.dataset)
    acc = accuracy_score(all_labels, all_preds)
    f1  = f1_score(all_labels, all_preds, average="macro")
    return avg_loss, acc, f1


@torch.no_grad()
def eval_one_epoch(model, loader, criterion, device):
    model.eval()
    total_loss = 0.0
    all_preds, all_labels = [], []
    progress = tqdm(loader, desc="Validating", leave=False)
    for x, y in progress:
        x, y = x.to(device), y.to(device)

        logits = model(x)
        loss = criterion(logits, y)

        total_loss += loss.item() * x.size(0)
        preds = logits.argmax(dim=1)
        all_preds.extend(preds.detach().cpu().tolist())
        all_labels.extend(y.detach().cpu().tolist())

    avg_loss = total_loss / len(loader.dataset)
    acc = accuracy_score(all_labels, all_preds)
    f1  = f1_score(all_labels, all_preds, average="macro")
    return avg_loss, acc, f1, all_labels, all_preds


In [17]:
# 训练Resnet18+DropMLP
resnet_model = ResNetClassifier(num_classes=num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet_model.parameters(), lr=1e-4, weight_decay=1e-4)

num_epochs = 15
best_val_acc = 0.0
best_state = None
history_resnet = {"train_loss": [], "val_loss": [], "train_acc": [], "val_acc": []}

for name, param in resnet_model.named_parameters():
    print(name, param.requires_grad)

backbone.conv1.weight True
backbone.bn1.weight True
backbone.bn1.bias True
backbone.layer1.0.conv1.weight True
backbone.layer1.0.bn1.weight True
backbone.layer1.0.bn1.bias True
backbone.layer1.0.conv2.weight True
backbone.layer1.0.bn2.weight True
backbone.layer1.0.bn2.bias True
backbone.layer1.1.conv1.weight True
backbone.layer1.1.bn1.weight True
backbone.layer1.1.bn1.bias True
backbone.layer1.1.conv2.weight True
backbone.layer1.1.bn2.weight True
backbone.layer1.1.bn2.bias True
backbone.layer2.0.conv1.weight True
backbone.layer2.0.bn1.weight True
backbone.layer2.0.bn1.bias True
backbone.layer2.0.conv2.weight True
backbone.layer2.0.bn2.weight True
backbone.layer2.0.bn2.bias True
backbone.layer2.0.downsample.0.weight True
backbone.layer2.0.downsample.1.weight True
backbone.layer2.0.downsample.1.bias True
backbone.layer2.1.conv1.weight True
backbone.layer2.1.bn1.weight True
backbone.layer2.1.bn1.bias True
backbone.layer2.1.conv2.weight True
backbone.layer2.1.bn2.weight True
backbone.layer

In [None]:
# 全量训练 ResNet18
num_classes = 4
resnet_model = ResNetClassifier(num_classes=num_classes).to(device)
resnet34_model = ResNet34Classifier(num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet_model.parameters(), lr=1e-4, weight_decay=1e-4)

num_epochs = 15
best_val_acc = 0.0
best_state = None
history_resnet = {"train_loss": [], "val_loss": [], "train_acc": [], "val_acc": []}

for epoch in range(1, num_epochs + 1):
    tr_loss, tr_acc, tr_f1 = train_one_epoch(
        resnet34_model, resnet_train_loader, optimizer, criterion, device
    )
    val_loss, val_acc, val_f1, _, _ = eval_one_epoch(
        resnet34_model, resnet_val_loader, criterion, device
    )

    history_resnet["train_loss"].append(tr_loss)
    history_resnet["val_loss"].append(val_loss)
    history_resnet["train_acc"].append(tr_acc)
    history_resnet["val_acc"].append(val_acc)

    print(
        f"[Epoch {epoch:02d}] "
        f"Train loss={tr_loss:.4f} acc={tr_acc:.4f} f1={tr_f1:.4f} | "
        f"Val loss={val_loss:.4f} acc={val_acc:.4f} f1={val_f1:.4f}"
    )

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_state = resnet34_model.state_dict()
        print("  -> 保存当前最好模型")


Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to C:\Users\pc/.cache\torch\hub\checkpoints\resnet34-b627a593.pth


100%|██████████| 83.3M/83.3M [00:30<00:00, 2.89MB/s]


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validating:   0%|          | 0/6 [00:00<?, ?it/s]

[Epoch 01] Train loss=1.5741 acc=0.2472 f1=0.1809 | Val loss=1.6622 acc=0.2264 f1=0.1309
  -> 保存当前最好模型


Training:   0%|          | 0/23 [00:00<?, ?it/s]

In [None]:
# 将最后的fc层换成MLP + Dropout
class ResNetClassifier_DropMLP(nn.Module):
    def __init__(self, num_classes, dropout_p=0.5, hidden_dim=256):
        super().__init__()
        weights = ResNet18_Weights.IMAGENET1K_V1
        self.backbone = resnet18(weights=weights)
        
        in_features = self.backbone.fc.in_features

        # 替换 fc 头为 MLP + Dropout
        self.backbone.fc = nn.Sequential(
            nn.Linear(in_features, hidden_dim),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout_p),
            nn.Linear(hidden_dim, num_classes)
        )

    def forward(self, x):
        return self.backbone(x)

In [None]:
# 训练Resnet18+DropMLP
resnet_model = ResNetClassifier_DropMLP(num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet_model.parameters(), lr=1e-4, weight_decay=1e-4)

num_epochs = 15
best_val_acc = 0.0
best_state = None
history_resnet = {"train_loss": [], "val_loss": [], "train_acc": [], "val_acc": []}

for name, param in resnet_model.named_parameters():
    print(name, param.requires_grad)

In [None]:
for epoch in range(1, num_epochs + 1):
    tr_loss, tr_acc, tr_f1 = train_one_epoch(
        resnet_model, resnet_train_loader, optimizer, criterion, device
    )
    val_loss, val_acc, val_f1, _, _ = eval_one_epoch(
        resnet_model, resnet_val_loader, criterion, device
    )

    history_resnet["train_loss"].append(tr_loss)
    history_resnet["val_loss"].append(val_loss)
    history_resnet["train_acc"].append(tr_acc)
    history_resnet["val_acc"].append(val_acc)

    print(
        f"[Epoch {epoch:02d}] "
        f"Train loss={tr_loss:.4f} acc={tr_acc:.4f} f1={tr_f1:.4f} | "
        f"Val loss={val_loss:.4f} acc={val_acc:.4f} f1={val_f1:.4f}"
    )

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_state = resnet_model.state_dict()
        print("  -> 保存当前最好模型")


In [None]:
class TruncatedResNet18Classifier(nn.Module):
    """
    用 ResNet18 的 conv1 ~ layer3 作为 backbone（删掉 layer4），
    再接一个自己的分类头。
    """
    def __init__(self, num_classes, pretrained=True, dropout_p=0.5, hidden_dim=256):
        super().__init__()
        weights = ResNet18_Weights.IMAGENET1K_V1 if pretrained else None
        base = resnet18(weights=weights)

        # stem + layer1~3 作为 backbone
        self.stem = nn.Sequential(
            base.conv1,
            base.bn1,
            base.relu,
            base.maxpool,
        )
        self.layer1 = base.layer1
        self.layer2 = base.layer2
        self.layer3 = base.layer3   # 不再用 layer4

        # 全局池化
        self.gap = nn.AdaptiveAvgPool2d(1)  # -> [B, 256, 1, 1]

        # 256 是 layer3 输出通道数
        in_features = 256

        # 自定义 head：一个小 MLP + Dropout
        self.classifier = nn.Sequential(
            nn.Linear(in_features, hidden_dim),
            nn.Linear(hidden_dim, num_classes)
        )

    def forward(self, x):
        x = self.stem(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)      # 现在的特征图更大、层数更浅
        x = self.gap(x).view(x.size(0), -1)  # [B, 256]
        logits = self.classifier(x)          # [B, num_classes]
        return logits


In [None]:
# 训练Resnet18+DropMLP
resnet_model = TruncatedResNet18Classifier(num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet_model.parameters(), lr=1e-4, weight_decay=1e-4)

num_epochs = 10
best_val_acc = 0.0
best_state = None
history_resnet = {"train_loss": [], "val_loss": [], "train_acc": [], "val_acc": []}

for name, param in resnet_model.named_parameters():
    print(name, param.requires_grad)

In [None]:
log_path = "trainlog_resnet.log"
best_val_acc = 0.0
best_state_dict = None
torch.cuda.empty_cache()
for epoch in range(1, num_epochs + 1):
    tr_loss, tr_acc, tr_f1 = train_one_epoch(
        resnet_model, resnet_train_loader, optimizer, criterion, device
    )
    val_loss, val_acc, val_f1, _, _ = eval_one_epoch(
        resnet_model, resnet_val_loader, criterion, device
    )

    line = (
        f"[Epoch {epoch}] "
        f"train_loss={tr_loss:.4f} train_acc={tr_acc:.4f} | "
        f"val_loss={val_loss:.4f} val_acc={val_acc:.4f}"
    )

    print(line)

    with open(log_path, "a", encoding="utf-8") as f:
        f.write(line + "\n")

    history_resnet["train_loss"].append(tr_loss)
    history_resnet["val_loss"].append(val_loss)
    history_resnet["train_acc"].append(tr_acc)
    history_resnet["val_acc"].append(val_acc)

    print(
        f"[Epoch {epoch:02d}] "
        f"Train loss={tr_loss:.4f} acc={tr_acc:.4f} f1={tr_f1:.4f} | "
        f"Val loss={val_loss:.4f} acc={val_acc:.4f} f1={val_f1:.4f}"
    )

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_state = resnet_model.state_dict()
        print("  -> 保存当前最好模型")
