In [1]:
!pip install torchxrayvision==1.2.1 torchvision
!pip install torch
!pip install scikit-learn
!pip install tqdm
!pip install pandas

Collecting torchxrayvision==1.2.1
  Downloading torchxrayvision-1.2.1-py3-none-any.whl.metadata (18 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1->torchxrayvision==1.2.1)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1->torchxrayvision==1.2.1)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1->torchxrayvision==1.2.1)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1->torchxrayvision==1.2.1)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1->torchxrayvision==1.2.1)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)


In [1]:
import os
os.listdir("/kaggle/input/chexpert")


['valid.csv', 'valid', 'train.csv', 'train']

In [2]:
!ln -s /kaggle/input/chexpert /kaggle/working/CheXpert-v1.0-small

In [1]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from tqdm import tqdm
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.metrics import roc_auc_score

In [3]:
import os
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset
from PIL import Image

CHEXPERT14 = [
    "Enlarged Cardiomediastinum", "Cardiomegaly", "Lung Opacity",
    "Lung Lesion", "Edema", "Consolidation", "Atelectasis",
    "Pneumothorax", "Pleural Effusion", "Support Devices"
]

class CheXpertDataset(Dataset):
    def __init__(self, csv_path, img_dir, transform=None):
        self.df = pd.read_csv(csv_path)
        self.img_dir = img_dir
        self.transform = transform

        if "Path" not in self.df.columns and "path" in self.df.columns:
            self.df.rename(columns={"path": "Path"}, inplace=True)

        drop_cols = ["Sex", "Age", "Frontal/Lateral", "AP/PA", "View", "Unnamed: 0"]
        self.df = self.df.drop(columns=[c for c in drop_cols if c in self.df.columns], errors="ignore")

        present_labels = [c for c in CHEXPERT14 if c in self.df.columns]
        self.df = self.df[["Path"] + present_labels].copy()
        self.label_cols = present_labels

        for c in self.label_cols:
            self.df[c] = pd.to_numeric(self.df[c], errors="coerce")  # 非法转成 NaN
            self.df[c] = self.df[c].replace(-1, 0)                   # -1 -> 0
            self.df[c] = self.df[c].fillna(0).astype(np.float32)     # NaN -> 0, 并转 float32


    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(self.img_dir, row["Path"].lstrip("/"))

        if not os.path.exists(img_path):
            raise FileNotFoundError(f"File not found: {img_path}")

        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        labels_np = row[self.label_cols].to_numpy(dtype=np.float32, copy=True)
        labels = torch.from_numpy(labels_np)  # dtype=float32
        return image, labels


In [5]:
from torchvision import transforms
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

train_dataset = CheXpertDataset(
    csv_path="/kaggle/input/chexpert/train.csv",
    img_dir="/kaggle/working",
    transform=train_transform
)
val_dataset = CheXpertDataset(
    csv_path="/kaggle/input/chexpert/valid.csv",
    img_dir="/kaggle/working",
    transform=val_transform
)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=0)

In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.densenet121(weights='IMAGENET1K_V1')  

num_ftrs = model.classifier.in_features
model.classifier = nn.Linear(num_ftrs, 10)  
model = model.to(device)

In [20]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ۱. مدل رو تعریف کن
model = models.densenet121(pretrained=False) # یا هر مدلی که ساختی
num_ftrs = model.classifier.in_features
model.classifier = nn.Linear(num_ftrs, 10) # چون ۱۰ تا لیبل داری

# آدرسی که کپی کردی رو دقیقاً اینجا بذار
checkpoint_path = '/kaggle/input/my-classification-model/pytorch/default/1/best_densenet_chexpert.pt'

# لود کردن وزن‌ها
model.load_state_dict(torch.load(checkpoint_path, map_location=device))
model.to(device)
model.eval()

print("وزن‌ها با موفقیت از بخش Input لود شدند!")


وزن‌ها با موفقیت از بخش Input لود شدند!


In [22]:
import os
import cv2
from tqdm import tqdm
# اول باید کتابخونه رو نصب کنی (اگه نصب نیست)
!pip install grad-cam

# حالا این‌ها رو وارد کن
import torch
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
from pytorch_grad_cam.utils.image import show_cam_on_image
import cv2
import numpy as np
import matplotlib.pyplot as plt
# ایجاد پوشه برای ذخیره ماسک‌های تولید شده توسط XAI
mask_save_path = "generated_masks"
os.makedirs(mask_save_path, exist_ok=True)

model.eval()
target_layers = [model.features.norm5]
cam = GradCAM(model=model, target_layers=target_layers)

# تولید ماسک برای کل دیتاست (یا بخشی از اون)
# فرض می‌کنیم می‌خوای برای کلاس شماره ۱ (مثلاً Effusion) ماسک بسازی
target_class = 1 

print("در حال تولید هیت‌مپ‌ها به عنوان ماسک...")
for i in tqdm(range(len(train_dataset))):
    img_tensor, _ = train_dataset[i]
    input_tensor = img_tensor.unsqueeze(0).to(device)
    
    # تولید گریدکم
    grayscale_cam = cam(input_tensor=input_tensor, targets=[ClassifierOutputTarget(target_class)])[0, :]
    
    # ذخیره ماسک (به صورت تصویر سیاه و سفید ۸ بیتی)
    mask_to_save = (grayscale_cam * 255).astype(np.uint8)
    cv2.imwrite(os.path.join(mask_save_path, f"mask_{i}.png"), mask_to_save)

Collecting grad-cam
  Downloading grad-cam-1.5.5.tar.gz (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m61.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting ttach (from grad-cam)
  Downloading ttach-0.0.3-py3-none-any.whl.metadata (5.2 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.7.1->grad-cam)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.7.1->grad-cam)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.7.1->grad-cam)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collect

  0%|          | 0/223414 [00:00<?, ?it/s]


FileNotFoundError: File not found: /kaggle/working/CheXpert-v1.0-small/train/patient00001/study1/view1_frontal.jpg

In [None]:
from torch.utils.data import Dataset
from PIL import Image

class XAIGuidedDataset(Dataset):
    def __init__(self, original_dataset, mask_folder, transform=None):
        self.original_dataset = original_dataset
        self.mask_folder = mask_folder
        self.transform = transform
        # لیست نام فایل‌ها رو مرتب نگه می‌داریم
        self.mask_filenames = sorted(os.listdir(mask_folder))

    def __len__(self):
        return len(self.mask_filenames)

    def __getitem__(self, idx):
        # ۱. گرفتن تصویر اصلی از دیتاست قبلی (بدون لیبل طبقه‌بندی)
        image, _ = self.original_dataset[idx]
        
        # ۲. لود کردن ماسک (هیت‌مپ ذخیره شده)
        mask_path = os.path.join(self.mask_folder, f"mask_{idx}.png")
        mask = Image.open(mask_path).convert('L') # لود به صورت خاکستری
        
        # تبدیل ماسک به تنشور
        mask = torch.from_numpy(np.array(mask)).float() / 255.0
        mask = mask.unsqueeze(0) # اضافه کردن کانال: [1, H, W]
        
        if self.transform:
            # اینجا اگه آکمنتیشن داری باید روی هر دو (تصویر و ماسک) یکسان اعمال بشه
            pass 

        return image, mask

# حالا دیتاست جدیدت حاضره!
seg_dataset = XAIGuidedDataset(train_dataset, mask_save_path)
seg_loader = torch.utils.data.DataLoader(seg_dataset, batch_size=16, shuffle=True)

In [1]:
criterion = nn.BCEWithLogitsLoss()  
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

NameError: name 'nn' is not defined

In [None]:
best_auc = 0.0
EPOCHS = 3

for epoch in range(EPOCHS):

    # ---------------- TRAINING ----------------
    model.train()
    running_loss = 0.0

    for imgs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}"):
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()

        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_train_loss = running_loss / len(train_loader)
    print(f"\nEpoch {epoch+1}: Train Loss = {avg_train_loss:.4f}")

    # ---------------- VALIDATION ----------------
    model.eval()
    val_loss = 0
    preds_all, labels_all = [], []

    from sklearn.metrics import roc_auc_score
    import numpy as np

    with torch.no_grad():
        for imgs, labels in tqdm(val_loader, desc="Validation"):
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)

            loss = criterion(outputs, labels)
            val_loss += loss.item()

            preds = torch.sigmoid(outputs).cpu().numpy()
            preds_all.append(preds)
            labels_all.append(labels.cpu().numpy())

    # Combine batch results
    avg_val_loss = val_loss / len(val_loader)
    preds_all = np.concatenate(preds_all, axis=0)
    labels_all = np.concatenate(labels_all, axis=0)

    # ---------------- COMPUTE AUC ----------------
    valid_aucs = []
    for i in range(labels_all.shape[1]):
        y_true = labels_all[:, i]
        y_pred = preds_all[:, i]

        if len(np.unique(y_true)) < 2:  
            print(f"Label {i} has only one class in validation set — skipping AUC.")
            continue

        auc_i = roc_auc_score(y_true, y_pred)
        valid_aucs.append(auc_i)

    if len(valid_aucs) > 0:
        val_auc = np.mean(valid_aucs)
    else:
        val_auc = np.nan
        print("All labels had only one class — AUC cannot be computed.")

    print(f"Validation: Loss = {avg_val_loss:.4f} | AUC = {val_auc:.4f}")

    # ---------------- SAVE MODEL ----------------
    # Save model for each epoch
    model_path = f"model_epoch_{epoch+1}_auc_{val_auc:.4f}.pt"
    torch.save(model.state_dict(), model_path)
    print(f" Model saved: {model_path}")

    # Save best model
    if val_auc > best_auc:
        best_auc = val_auc
        torch.save(model.state_dict(), "best_densenet_chexpert.pt")
        print(f" Best model updated — AUC improved to {best_auc:.4f}")

    # Learning rate scheduler
    scheduler.step()

In [None]:
import os, shutil

os.makedirs("/kaggle/outputs", exist_ok=True)

for file in os.listdir("/kaggle/working"):
    if file.endswith(".pt"):
        src = os.path.join("/kaggle/working", file)
        dst = os.path.join("/kaggle/outputs", file)
        shutil.copy(src, dst)
        print(f"{file}")

print("\n All models have been successfully copied to /kaggle/outputs. You can download them from the Output → Files page.")

In [None]:
!ls -lh /kaggle/outputs

In [None]:
import torch
import numpy as np
from tqdm import tqdm
from sklearn.metrics import roc_auc_score

ckpt_path = "best_densenet_chexpert.pt"  
state = torch.load(ckpt_path, map_location=device)
model.load_state_dict(state)
model.to(device)
model.eval()

val_loss = 0.0
preds_all, labels_all = [], []

with torch.no_grad():
    for imgs, labels in tqdm(val_loader, desc="Validation"):
        imgs = imgs.to(device)
        labels = labels.to(device)

        logits = model(imgs)                      
        loss = criterion(logits, labels)         
        val_loss += loss.item()

        probs = torch.sigmoid(logits).cpu().numpy()   
        preds_all.append(probs)
        labels_all.append(labels.cpu().numpy())

avg_val_loss = val_loss / len(val_loader)
preds_all = np.concatenate(preds_all, axis=0)   # [N, C]
labels_all = np.concatenate(labels_all, axis=0) # [N, C]

valid_aucs = []
skipped = []

num_classes = labels_all.shape[1]
for i in range(num_classes):
    y_true = labels_all[:, i]
    y_pred = preds_all[:, i]
    if len(np.unique(y_true)) < 2:
        skipped.append(i)
        continue
    auc_i = roc_auc_score(y_true, y_pred)
    valid_aucs.append(auc_i)

val_auc = np.mean(valid_aucs) if len(valid_aucs) > 0 else np.nan

print("========== Validation Summary ==========")
print(f"Loss: {avg_val_loss:.4f}")
print(f"Macro AUC (valid cols): {val_auc:.4f}" if not np.isnan(val_auc) else "Macro AUC: NaN")
if skipped:
    print(f"Skipped label indices (no 0/1 mix in val): {skipped}")
print(f"Used {len(valid_aucs)}/{num_classes} labels for AUC.")


In [None]:
label_names = train_dataset.label_cols   # the 10 CheXLocalize labels

per_label_auc = {}

for i, label in enumerate(label_names):
    y_true = labels_all[:, i]
    y_pred = preds_all[:, i]

    # Skip labels with only one class in validation (all 0 or all 1)
    if len(np.unique(y_true)) < 2:
        per_label_auc[label] = "N/A (only one class present)"
        continue

    auc_i = roc_auc_score(y_true, y_pred)
    per_label_auc[label] = auc_i

print("\n========== PER-LABEL AUC ==========\n")
for label, auc in per_label_auc.items():
    print(f"{label:30} : {auc}")

# Mean AUC over valid labels
valid_aucs = [v for v in per_label_auc.values() if isinstance(v, float)]
print("\nMacro AUC:", np.mean(valid_aucs))