In [None]:
!pip install torchxrayvision==1.2.1 torchvision
!pip install torch
!pip install scikit-learn
!pip install tqdm
!pip install pandas
!pip install grad-cam
# ۱. نصب نسخه‌هایی که با هم سازگار هستند
!pip install --upgrade "numpy<1.24.0" "scipy<1.14.0"

# ۲. ریستارت کردن محیط (اجباری)
# بعد از اجرای کد بالا، از منوی بالای صفحه Kaggle گزینه:
# Run -> Restart Session
# رو بزن تا تغییرات اعمال بشه.

In [1]:
import os
os.listdir("/kaggle/input/chexpert")


['valid.csv', 'valid', 'train.csv', 'train']

In [3]:
# اول اگر لینک یا پوشه‌ای با این اسم هست رو پاک کن
!rm -rf /kaggle/working/CheXpert-v1.0-small

# حالا دوباره لینک رو بساز
!ln -s /kaggle/input/chexpert /kaggle/working/CheXpert-v1.0-small

In [4]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from tqdm import tqdm
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.metrics import roc_auc_score

In [5]:
import os
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset
from PIL import Image

CHEXPERT14 = [
    "Enlarged Cardiomediastinum", "Cardiomegaly", "Lung Opacity",
    "Lung Lesion", "Edema", "Consolidation", "Atelectasis",
    "Pneumothorax", "Pleural Effusion", "Support Devices"
]

class CheXpertDataset(Dataset):
    def __init__(self, csv_path, img_dir, transform=None):
        self.df = pd.read_csv(csv_path)
        self.img_dir = img_dir
        self.transform = transform

        if "Path" not in self.df.columns and "path" in self.df.columns:
            self.df.rename(columns={"path": "Path"}, inplace=True)

        drop_cols = ["Sex", "Age", "Frontal/Lateral", "AP/PA", "View", "Unnamed: 0"]
        self.df = self.df.drop(columns=[c for c in drop_cols if c in self.df.columns], errors="ignore")

        present_labels = [c for c in CHEXPERT14 if c in self.df.columns]
        self.df = self.df[["Path"] + present_labels].copy()
        self.label_cols = present_labels

        for c in self.label_cols:
            self.df[c] = pd.to_numeric(self.df[c], errors="coerce")  # 非法转成 NaN
            self.df[c] = self.df[c].replace(-1, 0)                   # -1 -> 0
            self.df[c] = self.df[c].fillna(0).astype(np.float32)     # NaN -> 0, 并转 float32


    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(self.img_dir, row["Path"].lstrip("/"))

        if not os.path.exists(img_path):
            raise FileNotFoundError(f"File not found: {img_path}")

        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        labels_np = row[self.label_cols].to_numpy(dtype=np.float32, copy=True)
        labels = torch.from_numpy(labels_np)  # dtype=float32
        return image, labels


In [6]:
from torchvision import transforms
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

train_dataset = CheXpertDataset(
    csv_path="/kaggle/input/chexpert/train.csv",
    img_dir="/kaggle/working",
    transform=train_transform
)
val_dataset = CheXpertDataset(
    csv_path="/kaggle/input/chexpert/valid.csv",
    img_dir="/kaggle/working",
    transform=val_transform
)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=0)

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ۱. مدل رو تعریف کن
model = models.densenet121(pretrained=False) # یا هر مدلی که ساختی
num_ftrs = model.classifier.in_features
model.classifier = nn.Linear(num_ftrs, 10) # چون ۱۰ تا لیبل داری

# آدرسی که کپی کردی رو دقیقاً اینجا بذار
checkpoint_path = '/kaggle/input/my-classification-model2/pytorch/default/1/best_densenet_chexpert.pt'

# لود کردن وزن‌ها
model.load_state_dict(torch.load(checkpoint_path, map_location=device))
model.to(device)
model.eval()

print("وزن‌ها با موفقیت از بخش Input لود شدند!")



وزن‌ها با موفقیت از بخش Input لود شدند!


In [8]:
import os, shutil

os.makedirs("/kaggle/outputs", exist_ok=True)

for file in os.listdir("/kaggle/working"):
    if file.endswith(".pt"):
        src = os.path.join("/kaggle/working", file)
        dst = os.path.join("/kaggle/outputs", file)
        shutil.copy(src, dst)
        print(f"{file}")

print("\n All models have been successfully copied to /kaggle/outputs. You can download them from the Output → Files page.")


 All models have been successfully copied to /kaggle/outputs. You can download them from the Output → Files page.


In [12]:
import os
import cv2
import pandas as pd
import numpy as np
from tqdm import tqdm
import torch
from pytorch_grad_cam import GradCAM

# ۱. تنظیمات پوشه‌ها
base_path = '/kaggle/working/CheXpert-v1.0-small'
output_folder = '/kaggle/working/my_custom_dataset'
masks_path = os.path.join(output_folder, 'masks')
os.makedirs(masks_path, exist_ok=True)

print("در حال جستجوی سریع برای پیدا کردن ۱۰۰۰ عکس...", flush=True)

# ۲. پیدا کردن سریع ۱۰۰۰ عکس (بدون اسکن کردن کل دایرکتوری)
all_image_info = []
found_count = 0
for root, _, files in os.walk(base_path):
    for file in files:
        if file.endswith(".jpg"):
            full_path = os.path.join(root, file)
            rel_path = os.path.relpath(full_path, base_path)
            all_image_info.append((full_path, rel_path))
            found_count += 1
        if found_count >= 1000: break # به محض رسیدن به ۱۰۰۰ تا استاپ کن
    if found_count >= 1000: break

print(f"یافت شد! شروع تولید ماسک برای {len(all_image_info)} عکس...", flush=True)

# ۳. تنظیم Grad-CAM
target_layers = [model.features.norm5]
cam = GradCAM(model=model, target_layers=target_layers)

dataset_records = []

# ۴. حلقه تولید با tqdm
for img_path, rel_path in tqdm(all_image_info):
    try:
        rgb_img = cv2.imread(img_path)
        if rgb_img is None: continue
        
        rgb_img_resized = cv2.resize(rgb_img, (224, 224))
        img_tensor = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])(rgb_img_resized).unsqueeze(0).to(device)

        # تولید ماسک
        mask = cam(input_tensor=img_tensor, targets=None)[0, :]
        mask_uint8 = (mask * 255).astype(np.uint8)

        # نام‌گذاری و ذخیره
        mask_filename = rel_path.replace(os.sep, '_') 
        mask_save_path = os.path.join(masks_path, mask_filename)
        cv2.imwrite(mask_save_path, mask_uint8)

        dataset_records.append({
            'original_image_path': img_path,
            'mask_path': mask_save_path
        })
    except Exception as e:
        continue

# ۵. ذخیره فایل CSV
df = pd.DataFrame(dataset_records)
df.to_csv(os.path.join(output_folder, 'dataset_info.csv'), index=False)

print(f"\nتموم شد! دیتاست در {output_folder} آماده استفاده است.", flush=True)

در حال جستجوی سریع برای پیدا کردن ۱۰۰۰ عکس...
یافت شد! شروع تولید ماسک برای 1000 عکس...


100%|██████████| 1000/1000 [01:09<00:00, 14.35it/s]


تموم شد! دیتاست در /kaggle/working/my_custom_dataset آماده استفاده است.





In [None]:
!ls -lh /kaggle/outputs

In [None]:
import torch
import numpy as np
from tqdm import tqdm
from sklearn.metrics import roc_auc_score

ckpt_path = "best_densenet_chexpert.pt"  
state = torch.load(ckpt_path, map_location=device)
model.load_state_dict(state)
model.to(device)
model.eval()

val_loss = 0.0
preds_all, labels_all = [], []

with torch.no_grad():
    for imgs, labels in tqdm(val_loader, desc="Validation"):
        imgs = imgs.to(device)
        labels = labels.to(device)

        logits = model(imgs)                      
        loss = criterion(logits, labels)         
        val_loss += loss.item()

        probs = torch.sigmoid(logits).cpu().numpy()   
        preds_all.append(probs)
        labels_all.append(labels.cpu().numpy())

avg_val_loss = val_loss / len(val_loader)
preds_all = np.concatenate(preds_all, axis=0)   # [N, C]
labels_all = np.concatenate(labels_all, axis=0) # [N, C]

valid_aucs = []
skipped = []

num_classes = labels_all.shape[1]
for i in range(num_classes):
    y_true = labels_all[:, i]
    y_pred = preds_all[:, i]
    if len(np.unique(y_true)) < 2:
        skipped.append(i)
        continue
    auc_i = roc_auc_score(y_true, y_pred)
    valid_aucs.append(auc_i)

val_auc = np.mean(valid_aucs) if len(valid_aucs) > 0 else np.nan

print("========== Validation Summary ==========")
print(f"Loss: {avg_val_loss:.4f}")
print(f"Macro AUC (valid cols): {val_auc:.4f}" if not np.isnan(val_auc) else "Macro AUC: NaN")
if skipped:
    print(f"Skipped label indices (no 0/1 mix in val): {skipped}")
print(f"Used {len(valid_aucs)}/{num_classes} labels for AUC.")


In [None]:
label_names = train_dataset.label_cols   # the 10 CheXLocalize labels

per_label_auc = {}

for i, label in enumerate(label_names):
    y_true = labels_all[:, i]
    y_pred = preds_all[:, i]

    # Skip labels with only one class in validation (all 0 or all 1)
    if len(np.unique(y_true)) < 2:
        per_label_auc[label] = "N/A (only one class present)"
        continue

    auc_i = roc_auc_score(y_true, y_pred)
    per_label_auc[label] = auc_i

print("\n========== PER-LABEL AUC ==========\n")
for label, auc in per_label_auc.items():
    print(f"{label:30} : {auc}")

# Mean AUC over valid labels
valid_aucs = [v for v in per_label_auc.values() if isinstance(v, float)]
print("\nMacro AUC:", np.mean(valid_aucs))