In [3]:
import os
import pandas as pd
import config
from utils.dataset import chest_xray_with_mask_datasplit, ChestXrayDatasetWithMask
from utils.model import ChestXrayDenseNet121WithMask
from utils.train import train, validate, compute_pos_weight
import torch

In [4]:
test_path = os.path.join(config.DATASET_DIR, 'miccai2023_nih-cxr-lt_labels_test.csv')
train_path = os.path.join(config.DATASET_DIR, 'miccai2023_nih-cxr-lt_labels_train.csv')
val_path = os.path.join(config.DATASET_DIR, 'miccai2023_nih-cxr-lt_labels_val.csv')

# Load all CSVs
df_train = pd.read_csv(train_path)
df_val = pd.read_csv(val_path)
df_test = pd.read_csv(test_path)

# Combine them
full_df = pd.concat([df_train, df_val, df_test], ignore_index=True)

In [5]:
from torch.utils.data import DataLoader
device = torch.device("cuda:7" if torch.cuda.is_available() else "cpu")
full_dataset = ChestXrayDatasetWithMask(dataframe=full_df, img_dir=os.path.join(config.DATASET_DIR, 'cxr', 'images'), device=device)

train_dataset, val_dataset, test_dataset = chest_xray_with_mask_datasplit(full_df, full_dataset, dataset_dir=os.path.join(config.DATASET_DIR, 'cxr', 'images'), device=device)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [6]:
# Initialize model
model = ChestXrayDenseNet121WithMask(num_classes=19)

In [7]:
def compute_pos_weight_from_df(full_df, label_cols):
    num_samples = len(full_df)

    # Sum positives per class
    pos_counts = full_df[label_cols].sum().values  # shape [num_classes]
    neg_counts = num_samples - pos_counts

    pos_weight = neg_counts / (pos_counts + 1e-6)  # avoid div by zero
    return torch.tensor(pos_weight, dtype=torch.float32)

classes_df = full_df.drop(columns=['id', 'No Finding', 'subj_id'])
pos_weight = compute_pos_weight_from_df(classes_df, list(classes_df.columns))

In [8]:
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.deterministic = False

# Train
train(model, train_loader, val_loader, device, epochs=10, lr=1e-4, save_path=config.MODEL_FOLDER, file_name="PulmoScanX_v1.1", pos_weight=pos_weight)

Epoch 1/10: 100%|██████████| 2453/2453 [52:40<00:00,  1.29s/it, loss=0.707]



Epoch 1 Training Loss: 1.1138
Validation Loss: 1.0210
F1: 0.1799 | AUC: 0.7396 | Accuracy: 0.2136



Epoch 2/10: 100%|██████████| 2453/2453 [52:52<00:00,  1.29s/it, loss=0.893]



Epoch 2 Training Loss: 1.0007
Validation Loss: 0.9941
F1: 0.1975 | AUC: 0.7526 | Accuracy: 0.2159



Epoch 3/10: 100%|██████████| 2453/2453 [52:30<00:00,  1.28s/it, loss=0.649]



Epoch 3 Training Loss: 0.9575
Validation Loss: 0.9975
F1: 0.2269 | AUC: 0.7661 | Accuracy: 0.2310



Epoch 4/10: 100%|██████████| 2453/2453 [53:37<00:00,  1.31s/it, loss=0.682]



Epoch 4 Training Loss: 0.9193
Validation Loss: 1.0104
F1: 0.2267 | AUC: 0.7591 | Accuracy: 0.2612



Epoch 5/10: 100%|██████████| 2453/2453 [53:08<00:00,  1.30s/it, loss=0.711]



Epoch 5 Training Loss: 0.8852
Validation Loss: 1.0252
F1: 0.2209 | AUC: 0.7588 | Accuracy: 0.2507



Epoch 6/10: 100%|██████████| 2453/2453 [52:48<00:00,  1.29s/it, loss=0.752]



Epoch 6 Training Loss: 0.7966
Validation Loss: 0.9733
F1: 0.2411 | AUC: 0.7733 | Accuracy: 0.2653



Epoch 7/10: 100%|██████████| 2453/2453 [53:15<00:00,  1.30s/it, loss=0.526]



Epoch 7 Training Loss: 0.7515
Validation Loss: 1.0343
F1: 0.2331 | AUC: 0.7714 | Accuracy: 0.2538



Epoch 8/10: 100%|██████████| 2453/2453 [53:12<00:00,  1.30s/it, loss=0.402]



Epoch 8 Training Loss: 0.7311
Validation Loss: 1.0407
F1: 0.2452 | AUC: 0.7704 | Accuracy: 0.2291



Epoch 9/10: 100%|██████████| 2453/2453 [53:38<00:00,  1.31s/it, loss=0.647]



Epoch 9 Training Loss: 0.7005
Validation Loss: 1.0469
F1: 0.2513 | AUC: 0.7715 | Accuracy: 0.2079



Epoch 10/10: 100%|██████████| 2453/2453 [53:27<00:00,  1.31s/it, loss=0.407]



Epoch 10 Training Loss: 0.6280
Validation Loss: 1.0627
F1: 0.2519 | AUC: 0.7716 | Accuracy: 0.2392



In [9]:
from utils.evaluate import evaluate, evaluate_per_class
evals_per_class = evaluate_per_class(model, test_loader, device, list(classes_df.columns))




Per-Class F1 Scores:
                 Pathology  F1 Score
                  Effusion  0.506024
    Subcutaneous Emphysema  0.436364
              Infiltration  0.410205
                 Emphysema  0.370482
              Pneumothorax  0.366145
               Atelectasis  0.353092
         Pneumomediastinum  0.331551
                      Mass  0.331541
                    Hernia  0.285714
              Cardiomegaly  0.284472
                    Nodule  0.234330
                     Edema  0.201820
             Consolidation  0.192145
          Pneumoperitoneum  0.181132
        Pleural Thickening  0.160781
            Tortuous Aorta  0.144279
                  Fibrosis  0.128770
Calcification of the Aorta  0.089494
                 Pneumonia  0.082143
