In [52]:
import os
import random
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import cv2
import albumentations as A
from albumentations.pytorch import ToTensorV2
import timm
from tqdm import tqdm
from torch.cuda.amp import autocast, GradScaler
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

In [53]:
class Config:
    seed = 42
    data_root = 'D:\\Leko\\medical_model\\task3\\images'  # 图像路径
    label_csv = 'D:\\Leko\\medical_model\\task3\\labels.csv'  # CSV文件路径
    img_size = 224
    batch_size = 32
    num_epochs = 30
    lr = 1e-4
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model_save_path = 'best_model.pth'

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(Config.seed)

In [54]:
df = pd.read_csv(Config.label_csv)

df = df.iloc[:, [0, 2]]
df.columns = ['filename', 'label']  

print("数据预览：")
print(df.head())

print("\n各类别数量：")
print(df['label'].value_counts())


train_df, val_df = train_test_split(
    df,
    test_size=0.2,
    stratify=df['label'],  
    random_state=Config.seed
)

print(f"\n训练集大小: {len(train_df)}")
print(f"验证集大小: {len(val_df)}")


数据预览：
         filename  label
0  image_0001.jpg      1
1  image_0002.jpg      1
2  image_0003.jpg      1
3  image_0004.jpg      1
4  image_0005.jpg      1

各类别数量：
label
1    846
0    421
Name: count, dtype: int64

训练集大小: 1013
验证集大小: 254


In [None]:
class BladderTumorDataset(Dataset):
    def __init__(self, df, image_dir, transform=None):
        self.df = df.reset_index(drop=True)
        self.image_dir = image_dir
        self.transform = transform
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        img_name = self.df.loc[idx, 'filename']
        label = self.df.loc[idx, 'label']

        img_path = os.path.join(self.image_dir, img_name)
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # OpenCV默认是BGR，需要转成RGB

        if self.transform:
            image = self.transform(image=image)['image']
        
        label = torch.tensor(label, dtype=torch.float32)

        return image, label


In [56]:
def get_transforms(train=True):
    if train:
        return A.Compose([
            A.Resize(Config.img_size, Config.img_size),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(p=0.2),
            A.Rotate(limit=10, p=0.3),
            A.Normalize(mean=(0.485, 0.456, 0.406),
                        std=(0.229, 0.224, 0.225)),
            ToTensorV2()
        ])
    else:
        return A.Compose([
            A.Resize(Config.img_size, Config.img_size),
            A.Normalize(mean=(0.485, 0.456, 0.406),
                        std=(0.229, 0.224, 0.225)),
            ToTensorV2()
        ])


In [57]:
train_dataset = BladderTumorDataset(
    df=train_df,
    image_dir=Config.data_root,
    transform=get_transforms(train=True)
)

val_dataset = BladderTumorDataset(
    df=val_df,
    image_dir=Config.data_root,
    transform=get_transforms(train=False)
)

train_loader = DataLoader(
    train_dataset,
    batch_size=Config.batch_size,
    shuffle=True,          
    num_workers=0,          
    pin_memory=True         
)

val_loader = DataLoader(
    val_dataset,
    batch_size=Config.batch_size,
    shuffle=False,          
    num_workers=0,
    pin_memory=True
)

print(f"训练样本数: {len(train_dataset)}")
print(f"验证样本数: {len(val_dataset)}")
train_labels = train_df['label'].value_counts()
val_labels = val_df['label'].value_counts()

print("\n训练集类别分布:")
for label, count in train_labels.items():
    print(f"  类别 {label}: {count} 张")

print("\n验证集类别分布:")
for label, count in val_labels.items():
    print(f"  类别 {label}: {count} 张")


训练样本数: 1013
验证样本数: 254

训练集类别分布:
  类别 1: 676 张
  类别 0: 337 张

验证集类别分布:
  类别 1: 170 张
  类别 0: 84 张


In [None]:
# 搭建模型（ConvNeXt-Tiny版）
class BladderClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        # 使用 ConvNeXt-Tiny 作为backbone
        self.backbone = timm.create_model(
            'convnext_tiny', pretrained=True, num_classes=0, global_pool='avg'
        )
        # 分类头
        self.classifier = nn.Sequential(
            nn.Linear(self.backbone.num_features, 256),
            nn.ReLU(),
            nn.Dropout(0.6),
            nn.Linear(256, 1)
        )

    
    def forward(self, x):
        x = self.backbone(x)
        x = self.classifier(x)
        return x


In [59]:
criterion = nn.BCEWithLogitsLoss()

model = BladderClassifier().to(Config.device)
optimizer = optim.Adam(model.parameters(), lr=Config.lr)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 
    mode='min',       # 监测指标是 val_loss
    factor=0.5,       # 每次降低到一半
    patience=3,       # 连续3个epoch没有提升就降低
    verbose=True
)



In [60]:
scaler = GradScaler()

import random

def rand_bbox(size, lam):
    """生成CutMix的bounding box区域"""
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = int(W * cut_rat)
    cut_h = int(H * cut_rat)

    # 随机中心点
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    # 边界计算
    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

def train_one_epoch(model, train_loader, optimizer, criterion, device, cutmix_prob=0.5):
    model.train()
    running_loss = 0.0
    loop = tqdm(train_loader, desc="Training", leave=False)

    for images, labels in loop:
        images = images.to(device)
        labels = labels.to(device).unsqueeze(1)

        optimizer.zero_grad()

        r = random.random()
        if r < cutmix_prob:#做消融实验
            # --------- CutMix ----------
            lam = np.random.beta(1.0, 1.0)  
            rand_index = torch.randperm(images.size(0)).to(device)

            target_a = labels
            target_b = labels[rand_index]

            bbx1, bby1, bbx2, bby2 = rand_bbox(images.size(), lam)
            images[:, :, bbx1:bbx2, bby1:bby2] = images[rand_index, :, bbx1:bbx2, bby1:bby2]

            with autocast():
                outputs = model(images)
                loss = lam * criterion(outputs, target_a) + (1 - lam) * criterion(outputs, target_b)
        else:
            # --------- Normal ----------
            with autocast():
                outputs = model(images)
                loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item()

    avg_loss = running_loss / len(train_loader)
    return avg_loss



def validate(model, val_loader, criterion, device, threshold=0.5):
    model.eval()
    running_loss = 0.0
    preds = []
    trues = []

    with torch.no_grad():
        loop = tqdm(val_loader, desc="Validation", leave=False)
        for images, labels in loop:
            images = images.to(device)
            labels = labels.to(device).unsqueeze(1)

            with autocast():
                outputs = model(images)
                loss = criterion(outputs, labels)

            running_loss += loss.item()

            probs = torch.sigmoid(outputs)
            preds.append(probs.cpu())
            trues.append(labels.cpu())

    preds = torch.cat(preds).numpy()
    trues = torch.cat(trues).numpy()

    avg_loss = running_loss / len(val_loader)

    pred_labels = (preds >= threshold).astype(int)

    val_auc = roc_auc_score(trues, preds)
    val_precision = precision_score(trues, pred_labels)
    val_recall = recall_score(trues, pred_labels)
    val_f1 = f1_score(trues, pred_labels)

    tn, fp, fn, tp = confusion_matrix(trues, pred_labels).ravel()
    val_specificity = tn / (tn + fp)

    metrics = {
        'loss': avg_loss,
        'auc': val_auc,
        'precision': val_precision,
        'recall': val_recall,
        'f1': val_f1,
        'specificity': val_specificity
    }

    return metrics


  scaler = GradScaler()


In [61]:
def plot_training_curves(train_losses, val_losses, val_aucs, val_f1s):
    epochs = range(1, len(train_losses) + 1)
    
    plt.figure(figsize=(18, 5))

    plt.subplot(1, 3, 1)
    plt.plot(epochs, train_losses, label='Train Loss', marker='o')
    plt.plot(epochs, val_losses, label='Val Loss', marker='s')
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Loss 曲线")
    plt.grid(True)
    plt.legend()

    plt.subplot(1, 3, 2)
    plt.plot(epochs, val_aucs, label='Val AUC', color='orange', marker='o')
    plt.xlabel("Epoch")
    plt.ylabel("AUC")
    plt.title("验证集 AUC 曲线")
    plt.grid(True)
    plt.ylim(0.5, 1.0)
    plt.legend()

    plt.subplot(1, 3, 3)
    plt.plot(epochs, val_f1s, label='Val F1 Score', color='green', marker='o')
    plt.xlabel("Epoch")
    plt.ylabel("F1 Score")
    plt.title("验证集 F1 Score 曲线")
    plt.grid(True)
    plt.ylim(0.0, 1.0)
    plt.legend()

    plt.tight_layout()
    plt.show()


In [62]:
from PIL import Image

def predict_image(model, image_path, device, threshold=0.5):
    model.eval()

    image = Image.open(image_path).convert('RGB')

    # 定义和验证集一样的预处理
    transform = A.Compose([
        A.Resize(Config.img_size, Config.img_size),
        A.Normalize(mean=(0.485, 0.456, 0.406),
                    std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ])

    # 应用预处理
    image = np.array(image)
    image = transform(image=image)['image']
    image = image.unsqueeze(0).to(device)  

    with torch.no_grad():
        with autocast():
            output = model(image)
            prob = torch.sigmoid(output).cpu().item()

    pred_label = 1 if prob >= threshold else 0

    print(f"推理完成：概率={prob:.4f} | 预测类别={'高等级(1)' if pred_label==1 else '低等级(0)'}")

    return prob, pred_label


In [None]:
best_auc = 0

train_losses = []
val_losses = []
val_aucs = []
val_precisions = []
val_recalls = []
val_f1s = []
val_specificities = []

for epoch in range(Config.num_epochs):
    print(f"\nEpoch [{epoch+1}/{Config.num_epochs}]")

    train_loss = train_one_epoch(model, train_loader, optimizer, criterion, Config.device)

    val_metrics = validate(model, val_loader, criterion, Config.device)

    scheduler.step(val_metrics['loss'])

    train_losses.append(train_loss)
    val_losses.append(val_metrics['loss'])
    val_aucs.append(val_metrics['auc'])
    val_precisions.append(val_metrics['precision'])
    val_recalls.append(val_metrics['recall'])
    val_f1s.append(val_metrics['f1'])
    val_specificities.append(val_metrics['specificity'])

    print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_metrics['loss']:.4f} | "
          f"Val AUC: {val_metrics['auc']:.4f} | F1: {val_metrics['f1']:.4f} | "
          f"Precision: {val_metrics['precision']:.4f} | Recall: {val_metrics['recall']:.4f} | "
          f"Specificity: {val_metrics['specificity']:.4f}")

    if val_metrics['auc'] > best_auc:
        best_auc = val_metrics['auc']
        torch.save(model.state_dict(), Config.model_save_path)
        print(f"Best model saved with AUC: {best_auc:.4f}")

plot_training_curves(train_losses, val_losses, val_aucs, val_f1s)



Epoch [1/30]


  with autocast():
  with autocast():
  with autocast():
                                                         

Train Loss: 0.6609 | Val Loss: 0.6355 | Val AUC: 0.4956 | F1: 0.8019 | Precision: 0.6693 | Recall: 1.0000 | Specificity: 0.0000
Best model saved with AUC: 0.4956

Epoch [2/30]


  with autocast():
  with autocast():
  with autocast():
                                                         

Train Loss: 0.6513 | Val Loss: 0.6446 | Val AUC: 0.5036 | F1: 0.8019 | Precision: 0.6693 | Recall: 1.0000 | Specificity: 0.0000
Best model saved with AUC: 0.5036

Epoch [3/30]


  with autocast():
  with autocast():
  with autocast():
                                                         

Train Loss: 0.6474 | Val Loss: 0.6362 | Val AUC: 0.5421 | F1: 0.8019 | Precision: 0.6693 | Recall: 1.0000 | Specificity: 0.0000
Best model saved with AUC: 0.5421

Epoch [4/30]


  with autocast():
  with autocast():
  with autocast():
                                                         

Train Loss: 0.6489 | Val Loss: 0.6352 | Val AUC: 0.5134 | F1: 0.8019 | Precision: 0.6693 | Recall: 1.0000 | Specificity: 0.0000

Epoch [5/30]


  with autocast():
  with autocast():
  with autocast():
Validation:  75%|███████▌  | 6/8 [00:43<00:14,  7.37s/it]

In [None]:
# 加载模型
model = BladderClassifier().to(Config.device)
model.load_state_dict(torch.load(Config.model_save_path))
model.eval()

# 推理一张图
test_image_path = ''  
predict_image(model, test_image_path, Config.device, threshold=0.5)

FileNotFoundError: [Errno 2] No such file or directory: 'dataset/images/img001.png'