<a href="https://colab.research.google.com/github/Sandeep-4469/Chexpert_solution/blob/main/DSA_Single_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

sandeep4469_chexpert_path = kagglehub.dataset_download('sandeep4469/chexpert')
sandeep4469_atelectasis_best_fine_tuned_model_pytorch_default_1_path = kagglehub.model_download('sandeep4469/atelectasis_best_fine_tuned_model/PyTorch/default/1')

print('Data source import complete.')


In [None]:
import os
import shutil

train_dir = '/kaggle/input/chexpert/Dataset/train'
valid_dir = '/kaggle/input/chexpert/Dataset/valid'
output_dir = '/kaggle/working'

diseases = ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Pleural_Effusion', 'Pneumonia']

def create_new_structure(output_dir, diseases):
    for disease in diseases:
        # Create new disease folder structure in the output directory
        disease_folder = os.path.join(output_dir, f"{disease}_")
        os.makedirs(os.path.join(disease_folder, 'train', f'{disease}'), exist_ok=True)
        os.makedirs(os.path.join(disease_folder, 'train', 'no_finding'), exist_ok=True)
        os.makedirs(os.path.join(disease_folder, 'valid', f'{disease}'), exist_ok=True)
        os.makedirs(os.path.join(disease_folder, 'valid', 'no_finding'), exist_ok=True)

        disease_train_path = os.path.join(train_dir, disease)
        if os.path.exists(disease_train_path):
            for file_name in os.listdir(disease_train_path):
                src = os.path.join(disease_train_path, file_name)
                dst = os.path.join(disease_folder, 'train', f'{disease}', file_name)
                shutil.copy(src, dst)

        disease_valid_path = os.path.join(valid_dir, disease)
        if os.path.exists(disease_valid_path):
            for file_name in os.listdir(disease_valid_path):
                src = os.path.join(disease_valid_path, file_name)
                dst = os.path.join(disease_folder, 'valid', f'{disease}', file_name)
                shutil.copy(src, dst)

        # Copy 'No_Finding' images into 'no_finding'
        no_finding_train_path = os.path.join(train_dir, 'No_Finding')
        if os.path.exists(no_finding_train_path):
            for file_name in os.listdir(no_finding_train_path):
                src = os.path.join(no_finding_train_path, file_name)
                dst = os.path.join(disease_folder, 'train', 'no_finding', file_name)
                shutil.copy(src, dst)

        no_finding_valid_path = os.path.join(valid_dir, 'No_Finding')
        if os.path.exists(no_finding_valid_path):
            for file_name in os.listdir(no_finding_valid_path):
                src = os.path.join(no_finding_valid_path, file_name)
                dst = os.path.join(disease_folder, 'valid', 'no_finding', file_name)
                shutil.copy(src, dst)

create_new_structure(output_dir, diseases)

print("New folder structure created successfully.")


New folder structure created successfully.


In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import models, transforms
from torchvision.datasets import ImageFolder
from tqdm import tqdm

# Paths to directories and other configurations
output_dir = '/kaggle/working/chexpert_reorganized'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

batch_size = 32
epochs = 10
learning_rate = 1e-3

# Define data augmentation and preprocessing
train_transforms = transforms.Compose([
    transforms.RandomRotation(20),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

valid_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# List of diseases
diseases = ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Pleural_Effusion', 'Pneumonia']
fine_tuned_models = []

# Loop through each disease
for disease in diseases:
    disease_train_dir = f'/kaggle/working/{disease}_/train'
    disease_valid_dir = f'/kaggle/working/{disease}_/valid'

    if not os.path.exists(disease_train_dir) or not os.path.exists(disease_valid_dir):
        print(f"Data for {disease} not found in the expected directory structure.")
        continue

    # Create train and validation datasets and dataloaders
    train_dataset = ImageFolder(disease_train_dir, transform=train_transforms)
    valid_dataset = ImageFolder(disease_valid_dir, transform=valid_transforms)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

    # Model setup with DenseNet and additional layers
    base_model = models.densenet121(pretrained=True)
    for param in list(base_model.parameters())[:-5]:
        param.requires_grad = False  # Freeze all except the last 5 layers

    # Modify the last layers of DenseNet121 model
    num_features = base_model.classifier.in_features
    base_model.classifier = nn.Sequential(
        nn.Linear(num_features, 512),
        nn.ReLU(),
        nn.Dropout(0.3),
        nn.Linear(512, 1),
        nn.Sigmoid()
    )

    # Move model to GPU if available
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    base_model.to(device)

    # Define loss function and optimizer
    criterion = nn.BCELoss()
    optimizer = optim.Adam(base_model.parameters(), lr=learning_rate)

    best_val_accuracy = 0.0  # Track the best validation accuracy
    best_model = None

    # Training loop
    for epoch in range(epochs):
        base_model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        with tqdm(train_loader, desc=f"Training Epoch {epoch + 1}/{epochs}", ncols=100) as t:
            for inputs, labels in t:
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()

                outputs = base_model(inputs)
                loss = criterion(outputs, labels.unsqueeze(1).float())

                loss.backward()
                optimizer.step()

                running_loss += loss.item()

                preds = (outputs > 0.5).float()
                correct += (preds == labels.unsqueeze(1)).sum().item()
                total += labels.size(0)

                t.set_postfix(loss=running_loss / (t.n + 1), accuracy=100 * correct / total)

        epoch_loss = running_loss / len(train_loader)
        epoch_accuracy = correct / total * 100
        print(f"Epoch [{epoch + 1}/{epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%")

        # Validation
        base_model.eval()
        val_loss = 0.0
        correct = 0
        total = 0

        with tqdm(valid_loader, desc=f"Validating Epoch {epoch + 1}/{epochs}", ncols=100) as t:
            with torch.no_grad():
                for inputs, labels in t:
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = base_model(inputs)
                    loss = criterion(outputs, labels.unsqueeze(1).float())

                    val_loss += loss.item()
                    preds = (outputs > 0.5).float()
                    correct += (preds == labels.unsqueeze(1)).sum().item()
                    total += labels.size(0)

                    t.set_postfix(loss=val_loss / (t.n + 1), accuracy=100 * correct / total)

        val_loss /= len(valid_loader)
        val_accuracy = correct / total * 100
        print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%")

        # Update the best model if validation accuracy improves
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            best_model = base_model.state_dict()

    # Save the best model for the current disease
    model_path = os.path.join(output_dir, f"{disease}_best_fine_tuned_model.pth")
    torch.save(best_model, model_path)
    print(f"Best fine-tuned model for {disease} saved successfully at {model_path}.")

    # Store the best model in the list
    fine_tuned_models.append(best_model)

print("All best models have been fine-tuned and saved.")


Training Epoch 1/10: 100%|███████████| 1250/1250 [01:34<00:00, 13.29it/s, accuracy=68.9, loss=0.585]


Epoch [1/10], Loss: 0.5854, Accuracy: 68.91%


Validating Epoch 1/10: 100%|███████████████| 4/4 [00:00<00:00,  6.67it/s, accuracy=85.6, loss=0.404]


Validation Loss: 0.4040, Validation Accuracy: 85.59%


Training Epoch 2/10: 100%|███████████| 1250/1250 [01:34<00:00, 13.24it/s, accuracy=71.1, loss=0.565]


Epoch [2/10], Loss: 0.5652, Accuracy: 71.09%


Validating Epoch 2/10: 100%|███████████████| 4/4 [00:00<00:00,  6.63it/s, accuracy=79.7, loss=0.427]


Validation Loss: 0.4269, Validation Accuracy: 79.66%


Training Epoch 3/10: 100%|███████████| 1250/1250 [01:34<00:00, 13.26it/s, accuracy=71.5, loss=0.559]


Epoch [3/10], Loss: 0.5591, Accuracy: 71.52%


Validating Epoch 3/10: 100%|███████████████| 4/4 [00:00<00:00,  6.56it/s, accuracy=83.1, loss=0.436]


Validation Loss: 0.4363, Validation Accuracy: 83.05%


Training Epoch 4/10: 100%|███████████| 1250/1250 [01:34<00:00, 13.28it/s, accuracy=71.8, loss=0.556]


Epoch [4/10], Loss: 0.5560, Accuracy: 71.77%


Validating Epoch 4/10: 100%|███████████████| 4/4 [00:00<00:00,  5.15it/s, accuracy=83.9, loss=0.394]


Validation Loss: 0.3936, Validation Accuracy: 83.90%


Training Epoch 5/10: 100%|████████████| 1250/1250 [01:34<00:00, 13.26it/s, accuracy=72.1, loss=0.55]


Epoch [5/10], Loss: 0.5502, Accuracy: 72.08%


Validating Epoch 5/10: 100%|███████████████| 4/4 [00:00<00:00,  6.45it/s, accuracy=82.2, loss=0.389]


Validation Loss: 0.3888, Validation Accuracy: 82.20%


Training Epoch 6/10: 100%|███████████| 1250/1250 [01:34<00:00, 13.24it/s, accuracy=72.1, loss=0.549]


Epoch [6/10], Loss: 0.5493, Accuracy: 72.12%


Validating Epoch 6/10: 100%|███████████████| 4/4 [00:00<00:00,  6.25it/s, accuracy=79.7, loss=0.462]


Validation Loss: 0.4623, Validation Accuracy: 79.66%


Training Epoch 7/10: 100%|███████████| 1250/1250 [01:34<00:00, 13.25it/s, accuracy=72.3, loss=0.545]


Epoch [7/10], Loss: 0.5452, Accuracy: 72.31%


Validating Epoch 7/10: 100%|███████████████| 4/4 [00:00<00:00,  6.54it/s, accuracy=82.2, loss=0.368]


Validation Loss: 0.3682, Validation Accuracy: 82.20%


Training Epoch 8/10: 100%|███████████| 1250/1250 [01:34<00:00, 13.22it/s, accuracy=72.6, loss=0.547]


Epoch [8/10], Loss: 0.5466, Accuracy: 72.57%


Validating Epoch 8/10: 100%|███████████████| 4/4 [00:00<00:00,  6.37it/s, accuracy=83.9, loss=0.427]


Validation Loss: 0.4274, Validation Accuracy: 83.90%


Training Epoch 9/10: 100%|███████████| 1250/1250 [01:34<00:00, 13.25it/s, accuracy=72.6, loss=0.544]


Epoch [9/10], Loss: 0.5439, Accuracy: 72.64%


Validating Epoch 9/10: 100%|███████████████| 4/4 [00:00<00:00,  6.73it/s, accuracy=81.4, loss=0.429]


Validation Loss: 0.4293, Validation Accuracy: 81.36%


Training Epoch 10/10: 100%|████████████| 1250/1250 [01:34<00:00, 13.28it/s, accuracy=73, loss=0.541]


Epoch [10/10], Loss: 0.5401, Accuracy: 72.99%


Validating Epoch 10/10: 100%|██████████████| 4/4 [00:00<00:00,  6.14it/s, accuracy=79.7, loss=0.395]


Validation Loss: 0.3954, Validation Accuracy: 79.66%
Best fine-tuned model for Atelectasis saved successfully at /kaggle/working/chexpert_reorganized/Atelectasis_best_fine_tuned_model.pth.


Training Epoch 1/10: 100%|███████████| 1250/1250 [01:34<00:00, 13.27it/s, accuracy=72.4, loss=0.548]


Epoch [1/10], Loss: 0.5478, Accuracy: 72.44%


Validating Epoch 1/10: 100%|███████████████| 4/4 [00:00<00:00,  6.85it/s, accuracy=81.1, loss=0.434]


Validation Loss: 0.4338, Validation Accuracy: 81.13%


Training Epoch 2/10: 100%|███████████| 1250/1250 [01:34<00:00, 13.26it/s, accuracy=74.1, loss=0.524]


Epoch [2/10], Loss: 0.5244, Accuracy: 74.11%


Validating Epoch 2/10: 100%|███████████████| 4/4 [00:00<00:00,  7.06it/s, accuracy=76.4, loss=0.461]


Validation Loss: 0.4609, Validation Accuracy: 76.42%


Training Epoch 3/10: 100%|███████████| 1250/1250 [01:34<00:00, 13.25it/s, accuracy=75.2, loss=0.514]


Epoch [3/10], Loss: 0.5140, Accuracy: 75.17%


Validating Epoch 3/10: 100%|███████████████| 4/4 [00:00<00:00,  7.11it/s, accuracy=74.5, loss=0.485]


Validation Loss: 0.4852, Validation Accuracy: 74.53%


Training Epoch 4/10: 100%|████████████| 1250/1250 [01:34<00:00, 13.27it/s, accuracy=75.4, loss=0.51]


Epoch [4/10], Loss: 0.5100, Accuracy: 75.38%


Validating Epoch 4/10: 100%|███████████████| 4/4 [00:00<00:00,  6.84it/s, accuracy=75.5, loss=0.457]


Validation Loss: 0.4570, Validation Accuracy: 75.47%


Training Epoch 5/10: 100%|███████████| 1250/1250 [01:34<00:00, 13.25it/s, accuracy=75.7, loss=0.507]


Epoch [5/10], Loss: 0.5071, Accuracy: 75.66%


Validating Epoch 5/10: 100%|███████████████| 4/4 [00:00<00:00,  7.47it/s, accuracy=74.5, loss=0.506]


Validation Loss: 0.5059, Validation Accuracy: 74.53%


Training Epoch 6/10: 100%|███████████| 1250/1250 [01:33<00:00, 13.41it/s, accuracy=75.7, loss=0.506]


Epoch [6/10], Loss: 0.5064, Accuracy: 75.65%


Validating Epoch 6/10: 100%|█████████████████| 4/4 [00:00<00:00,  7.02it/s, accuracy=84, loss=0.415]


Validation Loss: 0.4150, Validation Accuracy: 83.96%


Training Epoch 7/10: 100%|███████████| 1250/1250 [01:33<00:00, 13.37it/s, accuracy=76.1, loss=0.501]


Epoch [7/10], Loss: 0.5011, Accuracy: 76.14%


Validating Epoch 7/10: 100%|███████████████| 4/4 [00:00<00:00,  6.82it/s, accuracy=75.5, loss=0.486]


Validation Loss: 0.4859, Validation Accuracy: 75.47%


Training Epoch 8/10: 100%|███████████| 1250/1250 [01:33<00:00, 13.40it/s, accuracy=76.4, loss=0.497]


Epoch [8/10], Loss: 0.4972, Accuracy: 76.41%


Validating Epoch 8/10: 100%|███████████████| 4/4 [00:00<00:00,  6.89it/s, accuracy=80.2, loss=0.418]


Validation Loss: 0.4181, Validation Accuracy: 80.19%


Training Epoch 9/10: 100%|███████████| 1250/1250 [01:33<00:00, 13.38it/s, accuracy=76.6, loss=0.497]


Epoch [9/10], Loss: 0.4968, Accuracy: 76.59%


Validating Epoch 9/10: 100%|███████████████| 4/4 [00:00<00:00,  5.96it/s, accuracy=75.5, loss=0.476]


Validation Loss: 0.4763, Validation Accuracy: 75.47%


Training Epoch 10/10: 100%|██████████| 1250/1250 [01:34<00:00, 13.21it/s, accuracy=76.8, loss=0.494]


Epoch [10/10], Loss: 0.4942, Accuracy: 76.76%


Validating Epoch 10/10: 100%|██████████████| 4/4 [00:00<00:00,  7.18it/s, accuracy=81.1, loss=0.431]


Validation Loss: 0.4310, Validation Accuracy: 81.13%
Best fine-tuned model for Cardiomegaly saved successfully at /kaggle/working/chexpert_reorganized/Cardiomegaly_best_fine_tuned_model.pth.


Training Epoch 1/10: 100%|███████████| 1087/1087 [01:22<00:00, 13.21it/s, accuracy=74.1, loss=0.526]


Epoch [1/10], Loss: 0.5253, Accuracy: 74.14%


Validating Epoch 1/10: 100%|███████████████| 3/3 [00:00<00:00,  6.47it/s, accuracy=94.4, loss=0.444]


Validation Loss: 0.2961, Validation Accuracy: 94.37%


Training Epoch 2/10: 100%|█████████████| 1087/1087 [01:21<00:00, 13.28it/s, accuracy=76, loss=0.502]


Epoch [2/10], Loss: 0.5019, Accuracy: 76.03%


Validating Epoch 2/10: 100%|████████████████| 3/3 [00:00<00:00,  6.53it/s, accuracy=94.4, loss=0.44]


Validation Loss: 0.2936, Validation Accuracy: 94.37%


Training Epoch 3/10: 100%|███████████| 1087/1087 [01:21<00:00, 13.33it/s, accuracy=76.6, loss=0.493]


Epoch [3/10], Loss: 0.4925, Accuracy: 76.63%


Validating Epoch 3/10: 100%|███████████████| 3/3 [00:00<00:00,  6.41it/s, accuracy=91.5, loss=0.366]


Validation Loss: 0.2438, Validation Accuracy: 91.55%


Training Epoch 4/10: 100%|███████████| 1087/1087 [01:21<00:00, 13.29it/s, accuracy=76.9, loss=0.491]


Epoch [4/10], Loss: 0.4901, Accuracy: 76.93%


Validating Epoch 4/10: 100%|███████████████| 3/3 [00:00<00:00,  6.63it/s, accuracy=91.5, loss=0.409]


Validation Loss: 0.2725, Validation Accuracy: 91.55%


Training Epoch 5/10: 100%|███████████| 1087/1087 [01:21<00:00, 13.29it/s, accuracy=77.1, loss=0.483]


Epoch [5/10], Loss: 0.4833, Accuracy: 77.10%


Validating Epoch 5/10: 100%|███████████████| 3/3 [00:00<00:00,  6.26it/s, accuracy=88.7, loss=0.403]


Validation Loss: 0.2686, Validation Accuracy: 88.73%


Training Epoch 6/10: 100%|████████████| 1087/1087 [01:21<00:00, 13.28it/s, accuracy=77.8, loss=0.48]


Epoch [6/10], Loss: 0.4791, Accuracy: 77.75%


Validating Epoch 6/10: 100%|███████████████| 3/3 [00:00<00:00,  6.37it/s, accuracy=88.7, loss=0.382]


Validation Loss: 0.2548, Validation Accuracy: 88.73%


Training Epoch 7/10: 100%|███████████| 1087/1087 [01:21<00:00, 13.29it/s, accuracy=77.7, loss=0.478]


Epoch [7/10], Loss: 0.4778, Accuracy: 77.67%


Validating Epoch 7/10: 100%|███████████████| 3/3 [00:00<00:00,  6.40it/s, accuracy=91.5, loss=0.364]


Validation Loss: 0.2427, Validation Accuracy: 91.55%


Training Epoch 8/10: 100%|█████████████| 1087/1087 [01:22<00:00, 13.23it/s, accuracy=78, loss=0.476]


Epoch [8/10], Loss: 0.4758, Accuracy: 77.97%


Validating Epoch 8/10: 100%|███████████████| 3/3 [00:00<00:00,  6.39it/s, accuracy=94.4, loss=0.359]


Validation Loss: 0.2395, Validation Accuracy: 94.37%


Training Epoch 9/10: 100%|█████████████| 1087/1087 [01:21<00:00, 13.31it/s, accuracy=78, loss=0.471]


Epoch [9/10], Loss: 0.4710, Accuracy: 78.04%


Validating Epoch 9/10: 100%|███████████████| 3/3 [00:00<00:00,  6.18it/s, accuracy=90.1, loss=0.402]


Validation Loss: 0.2681, Validation Accuracy: 90.14%


Training Epoch 10/10: 100%|██████████| 1087/1087 [01:22<00:00, 13.24it/s, accuracy=77.9, loss=0.474]


Epoch [10/10], Loss: 0.4745, Accuracy: 77.89%


Validating Epoch 10/10: 100%|██████████████| 3/3 [00:00<00:00,  6.73it/s, accuracy=87.3, loss=0.351]


Validation Loss: 0.2341, Validation Accuracy: 87.32%
Best fine-tuned model for Consolidation saved successfully at /kaggle/working/chexpert_reorganized/Consolidation_best_fine_tuned_model.pth.


Training Epoch 1/10: 100%|███████████| 1250/1250 [01:34<00:00, 13.28it/s, accuracy=77.6, loss=0.474]


Epoch [1/10], Loss: 0.4733, Accuracy: 77.61%


Validating Epoch 1/10: 100%|███████████████| 3/3 [00:00<00:00,  6.21it/s, accuracy=86.7, loss=0.476]


Validation Loss: 0.3176, Validation Accuracy: 86.75%


Training Epoch 2/10: 100%|███████████| 1250/1250 [01:34<00:00, 13.22it/s, accuracy=79.4, loss=0.446]


Epoch [2/10], Loss: 0.4455, Accuracy: 79.41%


Validating Epoch 2/10: 100%|█████████████████| 3/3 [00:00<00:00,  5.72it/s, accuracy=88, loss=0.464]


Validation Loss: 0.3091, Validation Accuracy: 87.95%


Training Epoch 3/10: 100%|███████████| 1250/1250 [01:34<00:00, 13.22it/s, accuracy=80.1, loss=0.435]


Epoch [3/10], Loss: 0.4351, Accuracy: 80.09%


Validating Epoch 3/10: 100%|█████████████████| 3/3 [00:00<00:00,  5.66it/s, accuracy=88, loss=0.381]


Validation Loss: 0.2542, Validation Accuracy: 87.95%


Training Epoch 4/10: 100%|███████████| 1250/1250 [01:34<00:00, 13.20it/s, accuracy=80.2, loss=0.434]


Epoch [4/10], Loss: 0.4337, Accuracy: 80.23%


Validating Epoch 4/10: 100%|███████████████| 3/3 [00:00<00:00,  6.38it/s, accuracy=86.7, loss=0.469]


Validation Loss: 0.3124, Validation Accuracy: 86.75%


Training Epoch 5/10: 100%|███████████| 1250/1250 [01:35<00:00, 13.15it/s, accuracy=80.6, loss=0.428]


Epoch [5/10], Loss: 0.4281, Accuracy: 80.58%


Validating Epoch 5/10: 100%|████████████████| 3/3 [00:00<00:00,  5.95it/s, accuracy=86.7, loss=0.45]


Validation Loss: 0.3002, Validation Accuracy: 86.75%


Training Epoch 6/10: 100%|█████████████| 1250/1250 [01:34<00:00, 13.16it/s, accuracy=81, loss=0.422]


Epoch [6/10], Loss: 0.4224, Accuracy: 80.99%


Validating Epoch 6/10: 100%|███████████████| 3/3 [00:00<00:00,  5.98it/s, accuracy=86.7, loss=0.478]


Validation Loss: 0.3190, Validation Accuracy: 86.75%


Training Epoch 7/10: 100%|██████████████| 1250/1250 [01:34<00:00, 13.26it/s, accuracy=81, loss=0.42]


Epoch [7/10], Loss: 0.4202, Accuracy: 81.04%


Validating Epoch 7/10: 100%|█████████████████| 3/3 [00:00<00:00,  5.26it/s, accuracy=88, loss=0.463]


Validation Loss: 0.3084, Validation Accuracy: 87.95%


Training Epoch 8/10: 100%|███████████| 1250/1250 [01:34<00:00, 13.23it/s, accuracy=81.4, loss=0.414]


Epoch [8/10], Loss: 0.4141, Accuracy: 81.38%


Validating Epoch 8/10: 100%|█████████████████| 3/3 [00:00<00:00,  5.83it/s, accuracy=88, loss=0.406]


Validation Loss: 0.2708, Validation Accuracy: 87.95%


Training Epoch 9/10: 100%|███████████| 1250/1250 [01:34<00:00, 13.19it/s, accuracy=81.4, loss=0.415]


Epoch [9/10], Loss: 0.4152, Accuracy: 81.41%


Validating Epoch 9/10: 100%|███████████████| 3/3 [00:00<00:00,  5.43it/s, accuracy=86.7, loss=0.406]


Validation Loss: 0.2705, Validation Accuracy: 86.75%


Training Epoch 10/10: 100%|██████████| 1250/1250 [01:34<00:00, 13.19it/s, accuracy=81.4, loss=0.415]


Epoch [10/10], Loss: 0.4149, Accuracy: 81.44%


Validating Epoch 10/10: 100%|██████████████| 3/3 [00:00<00:00,  5.67it/s, accuracy=92.8, loss=0.385]


Validation Loss: 0.2570, Validation Accuracy: 92.77%
Best fine-tuned model for Edema saved successfully at /kaggle/working/chexpert_reorganized/Edema_best_fine_tuned_model.pth.


Training Epoch 1/10: 100%|███████████| 1250/1250 [01:34<00:00, 13.17it/s, accuracy=73.3, loss=0.535]


Epoch [1/10], Loss: 0.5350, Accuracy: 73.34%


Validating Epoch 1/10: 100%|███████████████| 4/4 [00:00<00:00,  6.81it/s, accuracy=77.1, loss=0.418]


Validation Loss: 0.4178, Validation Accuracy: 77.14%


Training Epoch 2/10: 100%|████████████| 1250/1250 [01:34<00:00, 13.23it/s, accuracy=75.4, loss=0.51]


Epoch [2/10], Loss: 0.5100, Accuracy: 75.37%


Validating Epoch 2/10: 100%|████████████████| 4/4 [00:00<00:00,  6.83it/s, accuracy=82.9, loss=0.42]


Validation Loss: 0.4201, Validation Accuracy: 82.86%


Training Epoch 3/10: 100%|███████████| 1250/1250 [01:34<00:00, 13.22it/s, accuracy=76.2, loss=0.498]


Epoch [3/10], Loss: 0.4981, Accuracy: 76.19%


Validating Epoch 3/10: 100%|█████████████████| 4/4 [00:00<00:00,  7.30it/s, accuracy=81, loss=0.394]


Validation Loss: 0.3938, Validation Accuracy: 80.95%


Training Epoch 4/10: 100%|███████████| 1250/1250 [01:34<00:00, 13.24it/s, accuracy=76.7, loss=0.491]


Epoch [4/10], Loss: 0.4914, Accuracy: 76.71%


Validating Epoch 4/10: 100%|███████████████| 4/4 [00:00<00:00,  7.20it/s, accuracy=76.2, loss=0.404]


Validation Loss: 0.4038, Validation Accuracy: 76.19%


Training Epoch 5/10: 100%|█████████████| 1250/1250 [01:34<00:00, 13.25it/s, accuracy=77, loss=0.489]


Epoch [5/10], Loss: 0.4891, Accuracy: 76.99%


Validating Epoch 5/10: 100%|███████████████| 4/4 [00:00<00:00,  6.87it/s, accuracy=85.7, loss=0.398]


Validation Loss: 0.3981, Validation Accuracy: 85.71%


Training Epoch 6/10: 100%|███████████| 1250/1250 [01:34<00:00, 13.21it/s, accuracy=77.3, loss=0.484]


Epoch [6/10], Loss: 0.4843, Accuracy: 77.28%


Validating Epoch 6/10: 100%|███████████████| 4/4 [00:00<00:00,  6.75it/s, accuracy=84.8, loss=0.373]


Validation Loss: 0.3728, Validation Accuracy: 84.76%


Training Epoch 7/10: 100%|███████████| 1250/1250 [01:34<00:00, 13.22it/s, accuracy=77.5, loss=0.482]


Epoch [7/10], Loss: 0.4820, Accuracy: 77.53%


Validating Epoch 7/10: 100%|███████████████| 4/4 [00:00<00:00,  7.01it/s, accuracy=83.8, loss=0.378]


Validation Loss: 0.3782, Validation Accuracy: 83.81%


Training Epoch 8/10: 100%|███████████| 1250/1250 [01:34<00:00, 13.22it/s, accuracy=77.7, loss=0.477]


Epoch [8/10], Loss: 0.4764, Accuracy: 77.72%


Validating Epoch 8/10: 100%|███████████████| 4/4 [00:00<00:00,  6.74it/s, accuracy=81.9, loss=0.394]


Validation Loss: 0.3937, Validation Accuracy: 81.90%


Training Epoch 9/10: 100%|███████████| 1250/1250 [01:34<00:00, 13.25it/s, accuracy=77.8, loss=0.475]


Epoch [9/10], Loss: 0.4747, Accuracy: 77.84%


Validating Epoch 9/10: 100%|█████████████████| 4/4 [00:00<00:00,  6.90it/s, accuracy=83.8, loss=0.4]


Validation Loss: 0.4005, Validation Accuracy: 83.81%


Training Epoch 10/10: 100%|██████████| 1250/1250 [01:33<00:00, 13.30it/s, accuracy=77.9, loss=0.475]


Epoch [10/10], Loss: 0.4742, Accuracy: 77.90%


Validating Epoch 10/10: 100%|███████████████| 4/4 [00:00<00:00,  7.45it/s, accuracy=82.9, loss=0.41]


Validation Loss: 0.4104, Validation Accuracy: 82.86%
Best fine-tuned model for Pleural_Effusion saved successfully at /kaggle/working/chexpert_reorganized/Pleural_Effusion_best_fine_tuned_model.pth.


Training Epoch 1/10: 100%|█████████████| 814/814 [01:01<00:00, 13.18it/s, accuracy=78.7, loss=0.483]


Epoch [1/10], Loss: 0.4833, Accuracy: 78.71%


Validating Epoch 1/10: 100%|███████████████| 2/2 [00:00<00:00,  5.06it/s, accuracy=93.5, loss=0.245]


Validation Loss: 0.2455, Validation Accuracy: 93.48%


Training Epoch 2/10: 100%|█████████████| 814/814 [01:01<00:00, 13.18it/s, accuracy=79.4, loss=0.464]


Epoch [2/10], Loss: 0.4641, Accuracy: 79.45%


Validating Epoch 2/10: 100%|███████████████| 2/2 [00:00<00:00,  4.98it/s, accuracy=93.5, loss=0.218]


Validation Loss: 0.2181, Validation Accuracy: 93.48%


Training Epoch 3/10: 100%|█████████████| 814/814 [01:01<00:00, 13.25it/s, accuracy=79.7, loss=0.461]


Epoch [3/10], Loss: 0.4600, Accuracy: 79.75%


Validating Epoch 3/10: 100%|███████████████| 2/2 [00:00<00:00,  4.59it/s, accuracy=93.5, loss=0.187]


Validation Loss: 0.1871, Validation Accuracy: 93.48%


Training Epoch 4/10: 100%|█████████████| 814/814 [01:01<00:00, 13.28it/s, accuracy=79.9, loss=0.459]


Epoch [4/10], Loss: 0.4586, Accuracy: 79.89%


Validating Epoch 4/10: 100%|███████████████| 2/2 [00:00<00:00,  5.21it/s, accuracy=91.3, loss=0.239]


Validation Loss: 0.2390, Validation Accuracy: 91.30%


Training Epoch 5/10: 100%|█████████████| 814/814 [01:01<00:00, 13.13it/s, accuracy=79.7, loss=0.456]


Epoch [5/10], Loss: 0.4556, Accuracy: 79.72%


Validating Epoch 5/10: 100%|███████████████| 2/2 [00:00<00:00,  5.25it/s, accuracy=89.1, loss=0.216]


Validation Loss: 0.2162, Validation Accuracy: 89.13%


Training Epoch 6/10: 100%|█████████████| 814/814 [01:01<00:00, 13.16it/s, accuracy=79.9, loss=0.453]


Epoch [6/10], Loss: 0.4533, Accuracy: 79.95%


Validating Epoch 6/10: 100%|███████████████| 2/2 [00:00<00:00,  5.06it/s, accuracy=91.3, loss=0.204]


Validation Loss: 0.2043, Validation Accuracy: 91.30%


Training Epoch 7/10: 100%|█████████████| 814/814 [01:01<00:00, 13.15it/s, accuracy=79.9, loss=0.451]


Epoch [7/10], Loss: 0.4509, Accuracy: 79.88%


Validating Epoch 7/10: 100%|███████████████| 2/2 [00:00<00:00,  5.04it/s, accuracy=95.7, loss=0.213]


Validation Loss: 0.2134, Validation Accuracy: 95.65%


Training Epoch 8/10: 100%|█████████████| 814/814 [01:01<00:00, 13.17it/s, accuracy=80.1, loss=0.449]


Epoch [8/10], Loss: 0.4493, Accuracy: 80.08%


Validating Epoch 8/10: 100%|███████████████| 2/2 [00:00<00:00,  5.11it/s, accuracy=95.7, loss=0.192]


Validation Loss: 0.1921, Validation Accuracy: 95.65%


Training Epoch 9/10: 100%|███████████████| 814/814 [01:01<00:00, 13.20it/s, accuracy=80, loss=0.453]


Epoch [9/10], Loss: 0.4528, Accuracy: 80.04%


Validating Epoch 9/10: 100%|███████████████| 2/2 [00:00<00:00,  5.10it/s, accuracy=91.3, loss=0.309]


Validation Loss: 0.3092, Validation Accuracy: 91.30%


Training Epoch 10/10: 100%|████████████| 814/814 [01:01<00:00, 13.17it/s, accuracy=80.3, loss=0.446]


Epoch [10/10], Loss: 0.4460, Accuracy: 80.34%


Validating Epoch 10/10: 100%|██████████████| 2/2 [00:00<00:00,  5.14it/s, accuracy=95.7, loss=0.178]

Validation Loss: 0.1782, Validation Accuracy: 95.65%
Best fine-tuned model for Pneumonia saved successfully at /kaggle/working/chexpert_reorganized/Pneumonia_best_fine_tuned_model.pth.
All best models have been fine-tuned and saved.





In [None]:
import os
import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Path to saved models and validation images
output_dir = '/kaggle/working/chexpert_reorganized'  # Directory where models are saved
disease_valid_dirs = {
    'Atelectasis': '/kaggle/input/chexpert/Dataset/train/No_Finding',
    'Cardiomegaly': '/kaggle/input/chexpert/Dataset/valid/Cardiomegaly',
    'Consolidation': '/kaggle/input/chexpert/Dataset/valid/Consolidation',
    'Edema': '/kaggle/input/chexpert/Dataset/valid/Edema',
    'Pleural_Effusion': '/kaggle/input/chexpert/Dataset/valid/Pleural_Effusion',
    'Pneumonia': '/kaggle/input/chexpert/Dataset/valid/Pneumonia'
}

# Transform for validation images
valid_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Loop through each disease
for disease, valid_dir in disease_valid_dirs.items():
    # Update the model path to match the correct filename pattern
    model_path = os.path.join(output_dir, f"{disease}_best_fine_tuned_model.pth")

    # Initialize DenseNet121 model
    base_model = models.densenet121(pretrained=True)
    for param in list(base_model.parameters())[:-5]:
        param.requires_grad = False  # Freeze all except the last 5 layers

    # Modify the classifier layer
    num_features = base_model.classifier.in_features
    base_model.classifier = nn.Sequential(
        nn.Linear(num_features, 512),
        nn.ReLU(),
        nn.Dropout(0.3),
        nn.Linear(512, 1),
        nn.Sigmoid()
    )

    # Load model weights and move to device
    base_model.load_state_dict(torch.load(model_path, map_location=device))
    base_model.to(device)
    base_model.eval()

    # List files in the validation directory, filtering out directories
    image_files = [f for f in os.listdir(valid_dir) if os.path.isfile(os.path.join(valid_dir, f))]
    if not image_files:
        print(f"No images found in {valid_dir}")
        continue

    # Loop to process only the first image
    for i, image_file in enumerate(image_files):
        if i >= 1:
            break  # Stop after processing the first image only

        image_path = os.path.join(valid_dir, image_file)
        image = Image.open(image_path).convert('RGB')
        image = valid_transforms(image).unsqueeze(0).to(device)  # Add batch dimension and move to device

        # Make prediction
        with torch.no_grad():
            output = base_model(image)
            probability = output.item()
            prediction = "Positive" if probability > 0.5 else "Negative"

        print(f"{disease} - First image: {image_file} - Prediction: {prediction} - Probability: {probability:.4f}")


  base_model.load_state_dict(torch.load(model_path, map_location=device))


Atelectasis - First image: patient14030_study5_view1_frontal.jpg - Prediction: Positive - Probability: 0.6232
Cardiomegaly - First image: patient64624_study1_view1_frontal.jpg - Prediction: Negative - Probability: 0.2684
Consolidation - First image: patient64624_study1_view1_frontal.jpg - Prediction: Negative - Probability: 0.3180
Edema - First image: patient64563_study1_view1_frontal.jpg - Prediction: Negative - Probability: 0.4270
Pleural_Effusion - First image: patient64644_study1_view1_frontal.jpg - Prediction: Negative - Probability: 0.0834
Pneumonia - First image: patient64692_study1_view1_frontal.jpg - Prediction: Negative - Probability: 0.3825


In [None]:
/kaggle/working/chexpert_reorganized/Pneumonia_best_fine_tuned_model.pth

In [None]:
import os

output_dir = '/kaggle/working/chexpert_reorganized'
print("Files in output_dir:", os.listdir(output_dir))

Files in output_dir: ['Consolidation_best_fine_tuned_model.pth', 'Pleural_Effusion_best_fine_tuned_model.pth', 'Atelectasis_best_fine_tuned_model.pth', 'Pneumonia_best_fine_tuned_model.pth', 'Cardiomegaly_best_fine_tuned_model.pth', 'Edema_best_fine_tuned_model.pth']


In [None]:
train_dataset.classes

['Pneumonia', 'no_finding']

In [None]:
import os
import zipfile

# Path to the directory where models are saved
output_dir = '/kaggle/working/chexpert_reorganized'

# Path to the zip file you want to create
zip_file_path = '/kaggle/working/fine_tuned_models.zip'

# Create a zip file containing all .pth files in the output directory
with zipfile.ZipFile(zip_file_path, 'w') as zipf:
    for filename in os.listdir(output_dir):
        if filename.endswith('.pth'):
            model_file = os.path.join(output_dir, filename)
            zipf.write(model_file, os.path.basename(model_file))  # Add the model file to the zip

print(f"All .pth files have been zipped and saved to {zip_file_path}")


All .pth files have been zipped and saved to /kaggle/working/fine_tuned_models.zip


In [None]:
import torch
from torchvision import models, transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd
import os

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Image transformations
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load datasets using ImageFolder
results = []
for disease in diseases:
    # Set paths for train and validation
    train_path = os.path.join(output_dir, f"{disease}_", "train")
    valid_path = os.path.join(output_dir, f"{disease}_", "valid")

    # Use ImageFolder to load datasets
    valid_dataset = ImageFolder(valid_path, transform=transform)
    valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)

    # Class mapping (0 = no_finding, 1 = disease)
    class_mapping = {v: k for k, v in valid_dataset.class_to_idx.items()}
    print(f"{disease} class mapping: {class_mapping}")

    # Load model
    model_path = f"/kaggle/input/atelectasis_best_fine_tuned_model/pytorch/default/1/{disease}_best_fine_tuned_model.pth"
    base_model = models.densenet121(pretrained=True)
    for param in list(base_model.parameters())[:-5]:
        param.requires_grad = False

    num_features = base_model.classifier.in_features
    base_model.classifier = torch.nn.Sequential(
        torch.nn.Linear(num_features, 512),
        torch.nn.ReLU(),
        torch.nn.Dropout(0.3),
        torch.nn.Linear(512, 1),
        torch.nn.Sigmoid()
    )

    base_model.load_state_dict(torch.load(model_path, map_location=device))
    base_model.to(device)
    base_model.eval()

    # Evaluate model on validation data
    y_true = []
    y_pred = []
    with torch.no_grad():
        for images, labels in valid_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = base_model(images)
            preds = (outputs > 0.5).float().squeeze(1)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

    # Calculate metrics
    acc = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, zero_division=0)
    recall = recall_score(y_true, y_pred, zero_division=0)
    f1 = f1_score(y_true, y_pred, zero_division=0)

    # Append results
    results.append({
        "Disease": disease,
        "Accuracy": acc,
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1
    })

# Create a DataFrame with results
results_df = pd.DataFrame(results)
print(results_df)


Atelectasis class mapping: {0: 'Atelectasis', 1: 'no_finding'}


Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to /root/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth
100%|██████████| 30.8M/30.8M [00:00<00:00, 149MB/s] 
  base_model.load_state_dict(torch.load(model_path, map_location=device))


Cardiomegaly class mapping: {0: 'Cardiomegaly', 1: 'no_finding'}


  base_model.load_state_dict(torch.load(model_path, map_location=device))


Consolidation class mapping: {0: 'Consolidation', 1: 'no_finding'}


  base_model.load_state_dict(torch.load(model_path, map_location=device))


Edema class mapping: {0: 'Edema', 1: 'no_finding'}


  base_model.load_state_dict(torch.load(model_path, map_location=device))


Pleural_Effusion class mapping: {0: 'Pleural_Effusion', 1: 'no_finding'}


  base_model.load_state_dict(torch.load(model_path, map_location=device))


Pneumonia class mapping: {0: 'Pneumonia', 1: 'no_finding'}


  base_model.load_state_dict(torch.load(model_path, map_location=device))


            Disease  Accuracy  Precision    Recall  F1 Score
0       Atelectasis  0.796610   0.634615  0.868421  0.733333
1      Cardiomegaly  0.811321   0.680000  0.894737  0.772727
2     Consolidation  0.873239   0.808511  1.000000  0.894118
3             Edema  0.927711   0.863636  1.000000  0.926829
4  Pleural_Effusion  0.828571   0.738095  0.815789  0.775000
5         Pneumonia  0.956522   0.950000  1.000000  0.974359
