In [None]:
import os
import pandas as pd
import config
from utils.dataset import ChestXrayDataset 
from utils.transform import train_transform, val_test_transform
from utils.model import ChestXrayDenseNet121
from utils.train import train, validate

In [None]:
test_path = os.path.join(config.DATASET_DIR, 'miccai2023_nih-cxr-lt_labels_test.csv')
train_path = os.path.join(config.DATASET_DIR, 'miccai2023_nih-cxr-lt_labels_train.csv')
val_path = os.path.join(config.DATASET_DIR, 'miccai2023_nih-cxr-lt_labels_val.csv')

# Load all CSVs
df_train = pd.read_csv(train_path)
df_val = pd.read_csv(val_path)
df_test = pd.read_csv(test_path)

# Combine them
full_df = pd.concat([df_train, df_val, df_test], ignore_index=True)

In [12]:
classes_df = full_df.drop(columns=['id', 'No Finding', 'subj_id'])

In [14]:
classes_df.sum()

Atelectasis                   11559
Cardiomegaly                   2776
Consolidation                  4667
Edema                          2303
Effusion                      13317
Emphysema                      2516
Fibrosis                       1686
Hernia                          227
Infiltration                  19894
Mass                           5782
Nodule                         6331
Pleural Thickening             3385
Pneumonia                      1431
Pneumothorax                   5302
Pneumoperitoneum                316
Pneumomediastinum               253
Subcutaneous Emphysema         1991
Tortuous Aorta                  742
Calcification of the Aorta      455
dtype: int64

In [None]:
from torch.utils.data import random_split, DataLoader, Subset
import torch

full_dataset = ChestXrayDataset(dataframe=full_df, img_dir=os.path.join(config.DATASET_DIR, 'cxr', 'images'), transform=None)


total_size = len(full_dataset)
train_size = int(0.7 * total_size)
val_size = int(0.15 * total_size)
test_size = total_size - train_size - val_size

train_subset, val_subset, test_subset = random_split(
    full_dataset,
    [train_size, val_size, test_size],
    generator=torch.Generator().manual_seed(42)
)

train_dataset = Subset(
    ChestXrayDataset(dataframe=full_df, img_dir=os.path.join(config.DATASET_DIR, 'cxr', 'images'), transform=train_transform),
    train_subset.indices
)

val_dataset = Subset(
    ChestXrayDataset(dataframe=full_df, img_dir=os.path.join(config.DATASET_DIR, 'cxr', 'images'), transform=val_test_transform),
    val_subset.indices
)

test_dataset = Subset(
    ChestXrayDataset(dataframe=full_df, img_dir=os.path.join(config.DATASET_DIR, 'cxr', 'images'), transform=val_test_transform),
    test_subset.indices
)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [4]:
# Initialize model
model = ChestXrayDenseNet121(num_classes=19)

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.deterministic = False

# Train
train(model, train_loader, val_loader, device, epochs=10, lr=1e-4)

Epoch 1/10: 100%|██████████| 2453/2453 [27:28<00:00,  1.49it/s, loss=0.0934]



Epoch 1 Training Loss: 0.1314
Validation Loss: 0.1190



Epoch 2/10: 100%|██████████| 2453/2453 [27:33<00:00,  1.48it/s, loss=0.0904]



Epoch 2 Training Loss: 0.1178
Validation Loss: 0.1170



Epoch 3/10: 100%|██████████| 2453/2453 [28:02<00:00,  1.46it/s, loss=0.0938]



Epoch 3 Training Loss: 0.1150
Validation Loss: 0.1150



Epoch 4/10: 100%|██████████| 2453/2453 [27:39<00:00,  1.48it/s, loss=0.0819]



Epoch 4 Training Loss: 0.1127
Validation Loss: 0.1147



Epoch 5/10: 100%|██████████| 2453/2453 [26:46<00:00,  1.53it/s, loss=0.103] 



Epoch 5 Training Loss: 0.1108
Validation Loss: 0.1148



Epoch 6/10: 100%|██████████| 2453/2453 [30:02<00:00,  1.36it/s, loss=0.1]   



Epoch 6 Training Loss: 0.1088
Validation Loss: 0.1138



Epoch 7/10: 100%|██████████| 2453/2453 [38:33<00:00,  1.06it/s, loss=0.144] 



Epoch 7 Training Loss: 0.1072
Validation Loss: 0.1140



Epoch 8/10: 100%|██████████| 2453/2453 [31:09<00:00,  1.31it/s, loss=0.0655]



Epoch 8 Training Loss: 0.1053
Validation Loss: 0.1139



Epoch 9/10: 100%|██████████| 2453/2453 [29:54<00:00,  1.37it/s, loss=0.179] 



Epoch 9 Training Loss: 0.1031
Validation Loss: 0.1150



Epoch 10/10: 100%|██████████| 2453/2453 [30:03<00:00,  1.36it/s, loss=0.0924]



Epoch 10 Training Loss: 0.1009
Validation Loss: 0.1159



In [9]:
import config
MODEL_FOLDER = os.path.join(config.PROJECT_ROOT, 'models')
if not os.path.exists(MODEL_FOLDER):
    os.makedirs(MODEL_FOLDER)
filepath = os.path.join(MODEL_FOLDER, "best_model.pth")
torch.save(model.state_dict(), filepath)

In [16]:
from utils.evaluate import evaluate

eval = evaluate(model, test_loader, device)


Evaluation Results:
Avg Loss   : 0.1145
F1 Score   : 0.2101
Precision  : 0.3837
Recall     : 0.1593
Accuracy   : 0.5281


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
