In [4]:
import os
import random
import torch
from torch.utils.data import Dataset, random_split, DataLoader, Subset
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms
from PIL import Image
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, roc_auc_score, roc_curve
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
BATCH_SIZE = 16
LR = 0.001
EPOCHS = 10

folder_ranges = [
    (1335, 6, 'images_001'),
    (3923, 13, 'images_002'),
    (6585, 6, 'images_003'),
    (9232, 3, 'images_004'),
    (11558, 7, 'images_005'),
    (13774, 25, 'images_006'),
    (16051, 9, 'images_007'),
    (18387, 34, 'images_008'),
    (20945, 49, 'images_009'),
    (24717, 0, 'images_010'),
    (28173, 2, 'images_011'),
    (30805, 0, 'images_012')
]

source_base_dir = '/kaggle/input/data/'
main_dest_dir = '/kaggle/working/'

class ChestXRayDataset(Dataset):
    def __init__(self):
        self.image_paths = []
        self.labels = []
        self.normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                     
        self.transform_positive = transforms.Compose([
            transforms.Resize(224),
            transforms.ToTensor(),
            self.normalize
        ])
        
        self.transform_negative = transforms.Compose([
            transforms.Resize(224),
            transforms.ToTensor(),
            self.normalize
        ])
        
        with open('/kaggle/input/data/Data_Entry_2017.csv', 'r') as f:
            next(f)
            for line in f:
                items = line.strip().split(',')
                image_filename = items[0].strip('"')
                pathologies = items[1].strip('"').split('|')
                label = 1.0 if 'Nodule' in pathologies else 0.0
                
                base_name = os.path.splitext(image_filename)[0]
                part1_str, part2_str = base_name.split('_')
                part1 = int(part1_str)
                part2 = int(part2_str)
                source_folder = None
                
                for f_part1, f_part2, f_name in folder_ranges:
                    if part1 < f_part1 or (part1 == f_part1 and part2 <= f_part2):
                        source_folder = f_name
                        break
                        
                if not source_folder:
                    continue 
                    
                img_path = os.path.join(source_base_dir, source_folder, 'images', image_filename)
                
                self.image_paths.append(img_path)
                self.labels.append(label)

    def __getitem__(self, index):
        img_path = self.image_paths[index]
        label = self.labels[index]
        image = Image.open(img_path).convert('RGB')
        
        if label == 1.0:
            image = self.transform_positive(image)
        else:
            image = self.transform_negative(image)
        
        return image, torch.tensor(label, dtype=torch.float32)

    def __len__(self):
        return len(self.image_paths)
        
    def tackle_idxs(self, idxs):
        image_paths_temp = []
        labels_temp = []
        
        for i in idxs:
            label = self.labels[i]
            img_path = self.image_paths[i]
            
            image_paths_temp.append(img_path)
            labels_temp.append(label)
        
        combined = list(zip(image_paths_temp, labels_temp))
        random.shuffle(combined)
        self.image_paths, self.labels = map(list, zip(*combined))
        
    def tackle_train(self, idxs):
        image_paths_temp = []
        labels_temp = []
        
        for i in idxs:
            label = self.labels[i]
            img_path = self.image_paths[i]
            
            image_paths_temp.append(img_path)
            labels_temp.append(label)
            
            if label == 1.0:
                for _ in range(need_to_add - 1):
                    image_paths_temp.append(img_path)
                    labels_temp.append(label)
                    
        self.transform_positive = transforms.Compose([
            transforms.Resize(224),
            transforms.RandomHorizontalFlip(p=0.6),
            transforms.RandomRotation(degrees=15),
            transforms.RandomAffine(degrees=0, translate=(0.05, 0.05)),
            transforms.ToTensor(),
            self.normalize
        ])
        
        combined = list(zip(image_paths_temp, labels_temp))
        random.shuffle(combined)
        self.image_paths, self.labels = map(list, zip(*combined))

class DenseNet121(nn.Module):
    def __init__(self):
        super().__init__()
        self.densenet = torchvision.models.densenet121(weights="IMAGENET1K_V1")

        num_features = self.densenet.classifier.in_features
        #self.densenet.classifier = nn.Linear(num_features, 1)
        
        self.densenet.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(num_features, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 1)
        )
        
        for param in self.densenet.parameters():
            param.requires_grad = False

        to_unfreeze = [
            "features.denseblock2",
            "features.transition2",
            "features.denseblock3",
            "features.transition3",
            "features.denseblock4",
            "features.norm5",
            "classifier",
        ]
        
        for name, param in self.densenet.named_parameters():
            if any(name.startswith(layer) for layer in to_unfreeze):
                param.requires_grad = True
    
    def forward(self, x):
        return self.densenet(x)
        
dataset_all = ChestXRayDataset()
loader_all = DataLoader(dataset_all, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

model = DenseNet121().to(device)
checkpoint = torch.load("/kaggle/input/densenet121adam/pytorch/densenet121_final_model_new1-2.pth/1/DenseNet121_final_model_new1 (2).pth", map_location=device)
model.load_state_dict(checkpoint)
model.eval()

all_labels = []
all_preds = []
all_probs = []

with torch.no_grad():
    for imgs, labels in loader_all:
        imgs = imgs.to(device)
        labels = labels.to(device)
        logits = model(imgs).view(-1)
        probs = torch.sigmoid(logits)
        preds = (probs >= 0.5).float()
        print(labels, probs, preds)
        all_labels.extend(labels.cpu().numpy())
        all_probs.extend(probs.cpu().numpy())
        all_preds.extend(preds.cpu().numpy())

accuracy  = accuracy_score(all_labels, all_preds)
precision = precision_score(all_labels, all_preds, zero_division=0)
recall    = recall_score(all_labels, all_preds, zero_division=0)
f1        = f1_score(all_labels, all_preds, zero_division=0)
auc       = roc_auc_score(all_labels, all_probs)
fpr, tpr, _ = roc_curve(all_labels, all_probs)

print("===== Full-Dataset Evaluation =====")
print(f"Samples: {len(dataset_all)}")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")
print(f"ROC AUC  : {auc:.4f}")

plt.figure()
plt.plot(fpr, tpr, label=f'ROC AUC = {auc:.4f}')
plt.plot([0,1], [0,1], '--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve — Full Dataset')
plt.legend(loc='lower right')
plt.savefig('DenseNet121_full_dataset_roc.png')
plt.close()