In [1]:
import os
import pandas as pd
import config
from utils.dataset import chest_xray_datasplit, ChestXrayDataset, ChestXrayDatasetWithMask
from utils.model import ChestXrayDenseNet121
from utils.train import train, validate, compute_pos_weight

In [2]:
test_path = os.path.join(config.DATASET_DIR, 'miccai2023_nih-cxr-lt_labels_test.csv')
train_path = os.path.join(config.DATASET_DIR, 'miccai2023_nih-cxr-lt_labels_train.csv')
val_path = os.path.join(config.DATASET_DIR, 'miccai2023_nih-cxr-lt_labels_val.csv')

# Load all CSVs
df_train = pd.read_csv(train_path)
df_val = pd.read_csv(val_path)
df_test = pd.read_csv(test_path)

# Combine them
full_df = pd.concat([df_train, df_val, df_test], ignore_index=True)

In [3]:
classes_df = full_df.drop(columns=['id', 'No Finding', 'subj_id'])

In [4]:
classes_df.sum()

Atelectasis                   11559
Cardiomegaly                   2776
Consolidation                  4667
Edema                          2303
Effusion                      13317
Emphysema                      2516
Fibrosis                       1686
Hernia                          227
Infiltration                  19894
Mass                           5782
Nodule                         6331
Pleural Thickening             3385
Pneumonia                      1431
Pneumothorax                   5302
Pneumoperitoneum                316
Pneumomediastinum               253
Subcutaneous Emphysema         1991
Tortuous Aorta                  742
Calcification of the Aorta      455
dtype: int64

In [None]:
from torch.utils.data import DataLoader
full_dataset = ChestXrayDatasetWithMask(dataframe=full_df, img_dir=os.path.join(config.DATASET_DIR, 'cxr', 'images'), mask_model=)

train_dataset, val_dataset, test_dataset = chest_xray_datasplit(full_df, full_dataset)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [6]:
# Initialize model
model = ChestXrayDenseNet121(num_classes=19)

In [7]:
import torch
def compute_pos_weight_from_df(full_df, label_cols):
    num_samples = len(full_df)

    # Sum positives per class
    pos_counts = full_df[label_cols].sum().values  # shape [num_classes]
    neg_counts = num_samples - pos_counts

    pos_weight = neg_counts / (pos_counts + 1e-6)  # avoid div by zero
    return torch.tensor(pos_weight, dtype=torch.float32)

pos_weight = compute_pos_weight_from_df(classes_df, list(classes_df.columns))

In [8]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.deterministic = False

# Train
train(model, train_loader, val_loader, device, epochs=20, lr=1e-4, save_path=config.MODEL_FOLDER, file_name="PulmoScanX_v2.1", pos_weight=pos_weight)

Epoch 1/20: 100%|██████████| 2453/2453 [29:32<00:00,  1.38it/s, loss=1.32]  



Epoch 1 Training Loss: 1.1065
Validation Loss: 0.9946
F1: 0.1838 | AUC: 0.7425 | Accuracy: 0.2066



Epoch 2/20: 100%|██████████| 2453/2453 [27:43<00:00,  1.47it/s, loss=0.441]



Epoch 2 Training Loss: 0.9935
Validation Loss: 0.9653
F1: 0.1920 | AUC: 0.7557 | Accuracy: 0.1853



Epoch 3/20: 100%|██████████| 2453/2453 [27:16<00:00,  1.50it/s, loss=0.703]



Epoch 3 Training Loss: 0.9576
Validation Loss: 1.0538
F1: 0.2258 | AUC: 0.7550 | Accuracy: 0.2841



Epoch 4/20: 100%|██████████| 2453/2453 [27:34<00:00,  1.48it/s, loss=1.19] 



Epoch 4 Training Loss: 0.9220
Validation Loss: 1.0054
F1: 0.2157 | AUC: 0.7648 | Accuracy: 0.1983



Epoch 5/20: 100%|██████████| 2453/2453 [28:00<00:00,  1.46it/s, loss=3.88] 



Epoch 5 Training Loss: 0.8841
Validation Loss: 1.0429
F1: 0.2490 | AUC: 0.7602 | Accuracy: 0.2685



Epoch 6/20: 100%|██████████| 2453/2453 [28:07<00:00,  1.45it/s, loss=1.16] 



Epoch 6 Training Loss: 0.8009
Validation Loss: 0.9633
F1: 0.2338 | AUC: 0.7770 | Accuracy: 0.2475



Epoch 7/20: 100%|██████████| 2453/2453 [27:52<00:00,  1.47it/s, loss=0.749]



Epoch 7 Training Loss: 0.7474
Validation Loss: 1.0307
F1: 0.2406 | AUC: 0.7688 | Accuracy: 0.2114



Epoch 8/20: 100%|██████████| 2453/2453 [28:12<00:00,  1.45it/s, loss=0.705]



Epoch 8 Training Loss: 0.7180
Validation Loss: 1.0361
F1: 0.2467 | AUC: 0.7698 | Accuracy: 0.2567



Epoch 9/20: 100%|██████████| 2453/2453 [28:01<00:00,  1.46it/s, loss=0.673]



Epoch 9 Training Loss: 0.6929
Validation Loss: 1.1443
F1: 0.2624 | AUC: 0.7620 | Accuracy: 0.2632



Epoch 10/20: 100%|██████████| 2453/2453 [28:02<00:00,  1.46it/s, loss=0.54] 



Epoch 10 Training Loss: 0.6232
Validation Loss: 1.1220
F1: 0.2632 | AUC: 0.7645 | Accuracy: 0.2367



Epoch 11/20: 100%|██████████| 2453/2453 [27:59<00:00,  1.46it/s, loss=0.406]



Epoch 11 Training Loss: 0.5949
Validation Loss: 1.2157
F1: 0.2689 | AUC: 0.7588 | Accuracy: 0.2583



Epoch 12/20: 100%|██████████| 2453/2453 [28:01<00:00,  1.46it/s, loss=0.499]



Epoch 12 Training Loss: 0.5712
Validation Loss: 1.2130
F1: 0.2641 | AUC: 0.7612 | Accuracy: 0.2372



Epoch 13/20: 100%|██████████| 2453/2453 [28:15<00:00,  1.45it/s, loss=0.911]



Epoch 13 Training Loss: 0.5326
Validation Loss: 1.2923
F1: 0.2819 | AUC: 0.7524 | Accuracy: 0.2567



RuntimeError: [enforce fail at inline_container.cc:659] . unexpected pos 128 vs 0

In [9]:
from utils.evaluate import evaluate, evaluate_per_class
evals_per_class = evaluate_per_class(model, test_loader, device, list(classes_df.columns))


Per-Class F1 Scores:
                 Pathology  F1 Score
                  Effusion  0.505556
    Subcutaneous Emphysema  0.468314
                 Emphysema  0.413209
              Infiltration  0.405411
                    Hernia  0.400000
              Pneumothorax  0.379723
               Atelectasis  0.374114
         Pneumomediastinum  0.366864
                      Mass  0.335202
              Cardiomegaly  0.290840
                    Nodule  0.243829
                     Edema  0.217028
          Pneumoperitoneum  0.205405
             Consolidation  0.195868
        Pleural Thickening  0.188668
            Tortuous Aorta  0.165581
                  Fibrosis  0.149390
                 Pneumonia  0.091616
Calcification of the Aorta  0.085603


In [None]:
list(classes_df.columns)

['Atelectasis',
 'Cardiomegaly',
 'Consolidation',
 'Edema',
 'Effusion',
 'Emphysema',
 'Fibrosis',
 'Hernia',
 'Infiltration',
 'Mass',
 'Nodule',
 'Pleural Thickening',
 'Pneumonia',
 'Pneumothorax',
 'Pneumoperitoneum',
 'Pneumomediastinum',
 'Subcutaneous Emphysema',
 'Tortuous Aorta',
 'Calcification of the Aorta']