In [None]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import torchvision.models as models

from sklearn.metrics import roc_auc_score

In [None]:
DATA_CSV = "labels.csv"   # CSV: image_path,label1,label2,... (values 0/1)
IMG_DIR = "images"        # если пути в CSV относительные
NUM_CLASSES = 5           # поменяй на число твоих меток
BATCH_SIZE = 16
NUM_EPOCHS = 20
LR = 1e-4
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL_SAVE = "best_densenet121.pth"
IMAGE_SIZE = 224

In [None]:
class ChestPNGDataset(Dataset):
    def __init__(self, csv_file, img_dir="", transform=None):
        self.df = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform

        # Expect columns: image_path, label_0, label_1, ...
        self.image_paths = self.df.iloc[:,0].values
        self.labels = self.df.iloc[:,1:].values.astype(np.float32)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        p = self.image_paths[idx]
        if not os.path.isabs(p):
            p = os.path.join(self.img_dir, p)
        img = Image.open(p).convert("RGB")
        if self.transform:
            img = self.transform(img)
        label = torch.tensor(self.labels[idx])
        return img, label

In [None]:
# Transforms
# --------------------
train_transform = T.Compose([
    T.RandomResizedCrop(IMAGE_SIZE),
    T.RandomHorizontalFlip(),
    T.ToTensor(),
    T.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])

val_transform = T.Compose([
    T.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    T.ToTensor(),
    T.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])

In [None]:
# Model
# --------------------
def get_model(num_classes, pretrained=True):
    model = models.densenet121(pretrained=pretrained)
    in_f = model.classifier.in_features
    model.classifier = nn.Linear(in_f, num_classes)
    return model


In [None]:
# Training / Validation
# --------------------
def train_one_epoch(model, loader, criterion, optimizer):
    model.train()
    running_loss = 0.0
    for imgs, labels in tqdm(loader):
        imgs = imgs.to(DEVICE)
        labels = labels.to(DEVICE)

        optimizer.zero_grad()
        outputs = model(imgs)  # shape (B, num_classes)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * imgs.size(0)
    return running_loss / len(loader.dataset)

@torch.no_grad()
def validate(model, loader, criterion):
    model.eval()
    total_loss = 0.0
    all_targets = []
    all_outputs = []
    for imgs, labels in tqdm(loader):
        imgs = imgs.to(DEVICE)
        labels = labels.to(DEVICE)
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        total_loss += loss.item() * imgs.size(0)

        all_targets.append(labels.cpu().numpy())
        all_outputs.append(torch.sigmoid(outputs).cpu().numpy())

    all_targets = np.concatenate(all_targets, axis=0)
    all_outputs = np.concatenate(all_outputs, axis=0)

    # Compute per-class AUROC if possible
    aurocs = []
    for i in range(all_targets.shape[1]):
        try:
            au = roc_auc_score(all_targets[:,i], all_outputs[:,i])
        except ValueError:
            au = np.nan
        aurocs.append(au)
    mean_auroc = np.nanmean(aurocs)

    return total_loss / len(loader.dataset), mean_auroc, aurocs

In [None]:
# Main
# --------------------
def main():
    # Read CSV and split (simple split)
    df = pd.read_csv(DATA_CSV)
    # basic split: 80/20
    n = len(df)
    idx = np.arange(n)
    np.random.shuffle(idx)
    train_idx = idx[:int(0.8*n)]
    val_idx = idx[int(0.8*n):]

    df.iloc[train_idx].to_csv("train_split.csv", index=False)
    df.iloc[val_idx].to_csv("val_split.csv", index=False)

    train_ds = ChestPNGDataset("train_split.csv", img_dir=IMG_DIR, transform=train_transform)
    val_ds = ChestPNGDataset("val_split.csv", img_dir=IMG_DIR, transform=val_transform)

    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

    model = get_model(NUM_CLASSES, pretrained=True).to(DEVICE)
    criterion = nn.BCEWithLogitsLoss()   # multi-label
    optimizer = torch.optim.Adam(model.parameters(), lr=LR)

    best_auroc = 0.0
    for epoch in range(NUM_EPOCHS):
        print(f"Epoch {epoch+1}/{NUM_EPOCHS}")
        train_loss = train_one_epoch(model, train_loader, criterion, optimizer)
        val_loss, mean_auroc, aurocs = validate(model, val_loader, criterion)
        print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val AUROC: {mean_auroc:.4f}")
        print("Per-class AUROC:", aurocs)

        if mean_auroc > best_auroc:
            best_auroc = mean_auroc
            torch.save({
                "model_state": model.state_dict(),
                "optimizer_state": optimizer.state_dict(),
                "epoch": epoch,
                "auroc": mean_auroc
            }, MODEL_SAVE)
            print("Saved best model.")

if __name__ == "__main__":
    main()