In [1]:
import os

import pandas as pd
import torch
import torch.nn as nn
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report
from torch.utils.data import DataLoader
from torchvision import transforms, models
from tqdm import tqdm

from ChestXRayDataset import ChestXRayDataset

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
# Load test Data
df_test = pd.read_csv('../data/data_entries/miccai2023_nih-cxr-lt_labels_test.csv')

In [4]:
# Image directories
image_dir_test = '../data/test_images'

In [5]:
def get_valid_image_ids(df, image_dir):
    image_files = set(os.listdir(image_dir))
    return df[df['id'].isin(image_files)]['id']

In [6]:
# Filter valid images, only images where their IDs find in the image folder
valid_test_ids = get_valid_image_ids(df_test, image_dir_test)

df_test_valid = df_test[df_test['id'].isin(valid_test_ids)]

In [7]:
df_test_valid = df_test_valid.copy()
df_test_valid.drop(columns=['subj_id'], inplace=True)
df_test_valid

Unnamed: 0,id,Atelectasis,Cardiomegaly,Consolidation,Edema,Effusion,Emphysema,Fibrosis,Hernia,Infiltration,...,Nodule,Pleural Thickening,Pneumonia,Pneumothorax,Pneumoperitoneum,Pneumomediastinum,Subcutaneous Emphysema,Tortuous Aorta,Calcification of the Aorta,No Finding
0,00000013_000.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,00000013_001.png,0,0,0,0,0,1,0,0,0,...,0,0,0,1,0,0,1,0,0,0
2,00000013_002.png,0,0,0,0,0,1,0,0,0,...,0,0,0,1,0,0,1,0,0,0
3,00000013_003.png,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
4,00000013_004.png,0,0,0,0,1,1,0,0,1,...,0,0,0,1,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21076,00030800_000.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
21077,00030802_000.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
21078,00030803_000.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
21079,00030804_000.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


### Create minority column

In [8]:
majority_classes = ['Atelectasis', 'Effusion', 'Infiltration', 'Mass', 'Nodule', 'No Finding']

In [9]:
def is_minority(row):
    # Check if any category outside majority_classes has a value of 1
    for col in row.index:
        if col not in ['id'] + majority_classes and row[col] == 1: # check values for categories that not in majority and 'id'
            return 1
    return 0

In [10]:
df_test_valid['minority'] = df_test_valid.apply(is_minority, axis=1)
df_test_valid

Unnamed: 0,id,Atelectasis,Cardiomegaly,Consolidation,Edema,Effusion,Emphysema,Fibrosis,Hernia,Infiltration,...,Pleural Thickening,Pneumonia,Pneumothorax,Pneumoperitoneum,Pneumomediastinum,Subcutaneous Emphysema,Tortuous Aorta,Calcification of the Aorta,No Finding,minority
0,00000013_000.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,00000013_001.png,0,0,0,0,0,1,0,0,0,...,0,0,1,0,0,1,0,0,0,1
2,00000013_002.png,0,0,0,0,0,1,0,0,0,...,0,0,1,0,0,1,0,0,0,1
3,00000013_003.png,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
4,00000013_004.png,0,0,0,0,1,1,0,0,1,...,0,0,1,0,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21076,00030800_000.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
21077,00030802_000.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
21078,00030803_000.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
21079,00030804_000.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [11]:
df_prediction = df_test_valid.copy()
df_prediction.loc[:, df_prediction.columns != 'id'] = 0
df_prediction

Unnamed: 0,id,Atelectasis,Cardiomegaly,Consolidation,Edema,Effusion,Emphysema,Fibrosis,Hernia,Infiltration,...,Pleural Thickening,Pneumonia,Pneumothorax,Pneumoperitoneum,Pneumomediastinum,Subcutaneous Emphysema,Tortuous Aorta,Calcification of the Aorta,No Finding,minority
0,00000013_000.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,00000013_001.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,00000013_002.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,00000013_003.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,00000013_004.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21076,00030800_000.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
21077,00030802_000.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
21078,00030803_000.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
21079,00030804_000.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
df_prediction.columns

Index(['id', 'Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema',
       'Effusion', 'Emphysema', 'Fibrosis', 'Hernia', 'Infiltration', 'Mass',
       'Nodule', 'Pleural Thickening', 'Pneumonia', 'Pneumothorax',
       'Pneumoperitoneum', 'Pneumomediastinum', 'Subcutaneous Emphysema',
       'Tortuous Aorta', 'Calcification of the Aorta', 'No Finding',
       'minority'],
      dtype='object')

#### Step 1: create mappings variable for all categories we have

In [13]:
# Extract all categories (exclude 'id' and 'is_augmented')
categories = df_test_valid.columns[1:]

# Create a mapping dictionary for categories to numbers
category_mapping = {category: idx for idx, category in enumerate(categories)}

print("Category Mapping:")
print(category_mapping)

Category Mapping:
{'Atelectasis': 0, 'Cardiomegaly': 1, 'Consolidation': 2, 'Edema': 3, 'Effusion': 4, 'Emphysema': 5, 'Fibrosis': 6, 'Hernia': 7, 'Infiltration': 8, 'Mass': 9, 'Nodule': 10, 'Pleural Thickening': 11, 'Pneumonia': 12, 'Pneumothorax': 13, 'Pneumoperitoneum': 14, 'Pneumomediastinum': 15, 'Subcutaneous Emphysema': 16, 'Tortuous Aorta': 17, 'Calcification of the Aorta': 18, 'No Finding': 19, 'minority': 20}


### Step 2: Create finding column

In [14]:
# Function to create 'finding' and 'finding_encoded' as a string based on category values
def create_finding(row):
    # Generate lists of findings and their encoded values
    findings = [category for category in categories if row[category] == 1]
    encoded_findings = [str(category_mapping[category]) for category in findings]
    
    # Join encoded findings as a single string for stratification
    return findings, ','.join(encoded_findings)

In [15]:
# Apply function to create 'finding' and 'finding_encoded' columns in test data
df_test_valid[['finding', 'finding_encoded']] = df_test_valid.apply(
    lambda row: pd.Series(create_finding(row)), axis=1
)

df_test_valid[['id', 'finding', 'finding_encoded']]

Unnamed: 0,id,finding,finding_encoded
0,00000013_000.png,[No Finding],19
1,00000013_001.png,"[Emphysema, Pneumothorax, Subcutaneous Emphyse...",5131620
2,00000013_002.png,"[Emphysema, Pneumothorax, Subcutaneous Emphyse...",5131620
3,00000013_003.png,"[Pleural Thickening, minority]",1120
4,00000013_004.png,"[Effusion, Emphysema, Infiltration, Pneumothor...",458131620
...,...,...,...
21076,00030800_000.png,[No Finding],19
21077,00030802_000.png,[No Finding],19
21078,00030803_000.png,[No Finding],19
21079,00030804_000.png,[No Finding],19


In [16]:
print(df_test_valid.iloc[1])

id                                                             00000013_001.png
Atelectasis                                                                   0
Cardiomegaly                                                                  0
Consolidation                                                                 0
Edema                                                                         0
Effusion                                                                      0
Emphysema                                                                     1
Fibrosis                                                                      0
Hernia                                                                        0
Infiltration                                                                  0
Mass                                                                          0
Nodule                                                                        0
Pleural Thickening                      

### Create dataset and dataloader

In [17]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [18]:
test_dataset = ChestXRayDataset(dataframe=df_test_valid, image_dir=image_dir_test, category_mapping=category_mapping, transform=transform)

In [19]:
batch_size = 32
dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

## Create pipeline function to all three models

In [20]:
# This function used to debug, see and understand output of each model
def debug_test_pipeline(model_1, model_2, model_3, test_loader, device):
    for images, labels, image_ids in tqdm(test_loader, desc="Testing"):
    
        images = images.to(device)
        labels = labels.to(device)
        
        for i in range(images.size(0)):
            if i == 3:
                break  # Exit the loop when i == 5
            image_id = image_ids[i]
            print(f"Image id: {image_id}")
            
            image = images[i].unsqueeze(0)  # Single image
            label = labels[i]
            
            print("label")
            print(label)
            
            # Forward pass
            with torch.no_grad():
                output = model_1(image)
                pred = (output > 0.6).cpu().numpy()[0][0]  # Apply threshold
                print(f"Model Output (before thresholding): {output}")
                print(f"Prediction (after thresholding): {pred}")
                print("")

                if not pred:  # Model 1 predict 'No Finding' == 0, meaning there is other labels
                    output_2 = model_2(image)
                    pred_2 = (output_2 > 0.3).cpu().numpy()[0]  # Apply threshold
                    print(f"Model 2 Output (before thresholding): {output_2}")
                    print(f"Model 2 Prediction (after thresholding): {pred_2}")
                    print("")

                    # Update df_prediction for Model 2 labels
                    model_2_labels = ['Atelectasis', 'Effusion', 'Infiltration', 'Mass', 'Nodule', 'minority']
                    for idx, label in enumerate(model_2_labels):
                        if pred_2[idx]:
                            print(f"pred_2 for {model_2_labels[idx]} : {pred_2[idx]} ")

                    if pred_2[-1] == True: # Minority founded
                        output_3 = model_3(image)
                        pred_3 = (output_3 > 0.3).cpu().numpy()[0]  # Apply threshold
                        print(f"Model 3 Output (before thresholding): {output_3}")
                        print(f"Model 3 Prediction (after thresholding): {pred_3}")
                        print("")

            print("-" * 50)

        print("-" * 50)
        break 


In [21]:
def hierarchical_test_pipeline1(model_1, model_2, model_3, test_loader, device, thresholds_model_2, thresholds_model_3, threshold_model_1=0.7):
    for images, labels, image_ids in tqdm(test_loader, desc="Testing"):
        images = images.to(device)
        
        for i in range(images.size(0)):
            image_id = image_ids[i]
            image = images[i].unsqueeze(0)  # Single image
            
            # Forward pass for Model 1
            with torch.no_grad():
                output_1 = model_1(image)
                pred_1 = (output_1 > threshold_model_1).cpu().numpy()[0][0]  # prediction [[True]] or [[False]]
            
            if not pred_1:  # Model 1 predicts 'No Finding == False' --> forward to other models
                with torch.no_grad():
                    output_2 = model_2(image)
                    pred_2 = output_2.cpu().numpy()[0]  # Flatten to 1D array
                    
                # Labels for Model 2
                model_2_labels = ['Atelectasis', 'Effusion', 'Infiltration', 'Mass', 'Nodule', 'minority']
                
                # Update df_prediction for Model 2 labels with dynamic thresholds
                for idx, label in enumerate(model_2_labels):
                    if pred_2[idx] > thresholds_model_2[label]:
                        df_prediction.loc[df_prediction['id'] == image_id, label] = 1
                
                if pred_2[-1] > thresholds_model_2['minority']:  # Proceed with Model 3 if 'minority' exceeds its threshold
                    with torch.no_grad():
                        output_3 = model_3(image).cpu().numpy()[0]  # Flatten to 1D array
                    
                    # Labels for Model 3
                    model_3_labels = [
                        'Cardiomegaly', 'Consolidation', 'Edema', 'Emphysema', 'Fibrosis', 
                        'Hernia', 'Pleural Thickening', 'Pneumonia', 'Pneumothorax', 
                        'Pneumoperitoneum', 'Pneumomediastinum', 'Subcutaneous Emphysema', 
                        'Tortuous Aorta', 'Calcification of the Aorta'
                    ]
                    
                    # Update df_prediction for Model 3 labels with dynamic thresholds
                    for idx, label in enumerate(model_3_labels):
                        if output_3[idx] > thresholds_model_3[label]:
                            df_prediction.loc[df_prediction['id'] == image_id, label] = 1
            else:
                df_prediction.loc[df_prediction['id'] == image_id, 'No Finding'] = 1


### Load models

In [22]:
# Paths to the saved model files
binary_model_path = 'best_model_a_v3.pth'
majority_model_path = 'best_model_b_last.pth'
minority_model_path = 'best_mode_c_v2.pth'

In [24]:
def binary_model():
    model = models.resnet152(weights='IMAGENET1K_V2') 
    model.fc = nn.Sequential(
        nn.Linear(model.fc.in_features, 512),
        nn.BatchNorm1d(512),
        nn.ReLU(), 
        nn.Dropout(0.4),
        nn.Linear(512, 256),
        nn.BatchNorm1d(256),
        nn.ReLU(), 
        nn.Dropout(0.4),
        nn.Linear(256, 1),
        nn.Sigmoid()
    )
    return model

In [25]:
# Majority model
def majority_model():
    model = models.resnet152(weights='IMAGENET1K_V2') 
    model.fc = nn.Sequential(
        nn.Linear(model.fc.in_features, 512),
        nn.BatchNorm1d(512),
        nn.ReLU(), 
        nn.Dropout(0.3),
        nn.Linear(512, 256),
        nn.BatchNorm1d(256),
        nn.ReLU(), 
        nn.Dropout(0.2),
        nn.Linear(256, 6),  
        nn.Sigmoid()
    )

    return model


In [26]:
# Minority model
def minority_model():
    model = models.resnet101(weights='IMAGENET1K_V2') 
    model.fc = nn.Sequential(
        nn.Linear(model.fc.in_features, 512), 
        nn.BatchNorm1d(512),
        nn.ReLU(), 
        nn.Dropout(0.3),
        nn.Linear(512, 256),
        nn.BatchNorm1d(256),
        nn.ReLU(), 
        nn.Dropout(0.2),
        nn.Linear(256, 14),
        nn.Sigmoid()
    )
    return model
    

In [27]:
# Initialize models
model_binary = binary_model().to(device)
majority_model = majority_model().to(device)
minority_model = minority_model().to(device)

In [28]:
# Load weights and set to evaluation mode
model_binary.load_state_dict(torch.load(binary_model_path, weights_only=True, map_location=device))
majority_model.load_state_dict(torch.load(majority_model_path, weights_only=True, map_location=device))
minority_model.load_state_dict(torch.load(minority_model_path, weights_only=True, map_location=device))

<All keys matched successfully>

In [29]:
# Set models to evaluation mode
model_binary.eval()
majority_model.eval()
minority_model.eval()
print("Models are ready to be used!")

Models are ready to be used!


## Test all local models together

**NOTE:** The thresholds below are based on training data, and not tuned on validate or test
They can be viewed in local models: a, b and c

In [30]:
thresholds_model_2 = {
    'Atelectasis': 0.5067, 'Effusion': 0.6838, 'Infiltration': 0.5010,
    'Mass': 0.7233, 'Nodule': 0.6384, 'minority': 0.5005
}


In [31]:
thresholds_model_3 = {
    'Cardiomegaly': 0.5613,
    'Consolidation': 0.6032,
    'Edema': 0.5220,
    'Emphysema': 0.5180,
    'Fibrosis': 0.5191,
    'Hernia': 0.6901,
    'Pleural Thickening': 0.5082,
    'Pneumonia': 0.5070,
    'Pneumothorax': 0.5101,
    'Pneumoperitoneum': 0.7197,
    'Pneumomediastinum': 0.5029,
    'Subcutaneous Emphysema': 0.5423,
    'Tortuous Aorta': 0.5044,
    'Calcification of the Aorta': 0.5135,
}

In [32]:
test_labels = [
    'Atelectasis', 'Effusion', 'Infiltration', 'Mass', 'Nodule', 'minority',
    'Cardiomegaly', 'Consolidation', 'Edema', 'Emphysema', 'Fibrosis', 
    'Hernia', 'Pleural Thickening', 'Pneumonia', 'Pneumothorax', 
    'Pneumoperitoneum', 'Pneumomediastinum', 'Subcutaneous Emphysema', 
    'Tortuous Aorta', 'Calcification of the Aorta', 'No Finding'
]

In [33]:
debug_test_pipeline(model_binary, majority_model, minority_model, dataloader, device)

Testing:   0%|          | 0/659 [00:00<?, ?it/s]

Image id: 00000013_000.png
label
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 1., 0.], device='cuda:0')


Testing:   0%|          | 0/659 [00:01<?, ?it/s]

Model Output (before thresholding): tensor([[0.5758]], device='cuda:0')
Prediction (after thresholding): False

Model 2 Output (before thresholding): tensor([[2.2061e-02, 8.6422e-07, 3.8515e-03, 7.1298e-05, 4.0396e-03, 9.9817e-01]],
       device='cuda:0')
Model 2 Prediction (after thresholding): [False False False False False  True]

pred_2 for minority : True 
Model 3 Output (before thresholding): tensor([[3.2991e-03, 1.0315e-01, 4.9547e-04, 1.4180e-05, 9.9966e-01, 3.9839e-04,
         1.8836e-03, 4.7042e-05, 3.3317e-06, 5.0336e-05, 4.4033e-06, 1.1076e-05,
         6.7771e-05, 3.1960e-04]], device='cuda:0')
Model 3 Prediction (after thresholding): [False False False False  True False False False False False False False
 False False]

--------------------------------------------------
Image id: 00000013_001.png
label
tensor([0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0.,
        0., 0., 1.], device='cuda:0')
Model Output (before thresholding): tensor([[0.7049]




In [34]:
hierarchical_test_pipeline1(model_binary, majority_model, minority_model, dataloader, device, thresholds_model_2, thresholds_model_3 )

Testing: 100%|██████████| 659/659 [09:34<00:00,  1.15it/s]


In [35]:
df_prediction.columns

Index(['id', 'Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema',
       'Effusion', 'Emphysema', 'Fibrosis', 'Hernia', 'Infiltration', 'Mass',
       'Nodule', 'Pleural Thickening', 'Pneumonia', 'Pneumothorax',
       'Pneumoperitoneum', 'Pneumomediastinum', 'Subcutaneous Emphysema',
       'Tortuous Aorta', 'Calcification of the Aorta', 'No Finding',
       'minority'],
      dtype='object')

In [36]:
df_prediction

Unnamed: 0,id,Atelectasis,Cardiomegaly,Consolidation,Edema,Effusion,Emphysema,Fibrosis,Hernia,Infiltration,...,Pleural Thickening,Pneumonia,Pneumothorax,Pneumoperitoneum,Pneumomediastinum,Subcutaneous Emphysema,Tortuous Aorta,Calcification of the Aorta,No Finding,minority
0,00000013_000.png,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1
1,00000013_001.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,00000013_002.png,0,0,0,0,1,1,0,0,0,...,0,0,0,0,0,1,0,0,0,1
3,00000013_003.png,0,0,0,0,1,1,0,0,0,...,1,0,1,0,0,1,0,0,0,1
4,00000013_004.png,0,0,0,0,1,1,0,0,1,...,0,0,0,0,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21076,00030800_000.png,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1
21077,00030802_000.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
21078,00030803_000.png,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
21079,00030804_000.png,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


## 1. Number of rows equal for all columns:


In [37]:
# Exclude the last two columns from df_test_valid
df_test_valid_trimmed = df_test_valid.iloc[:, :-2]
df_test_valid_trimmed

Unnamed: 0,id,Atelectasis,Cardiomegaly,Consolidation,Edema,Effusion,Emphysema,Fibrosis,Hernia,Infiltration,...,Pleural Thickening,Pneumonia,Pneumothorax,Pneumoperitoneum,Pneumomediastinum,Subcutaneous Emphysema,Tortuous Aorta,Calcification of the Aorta,No Finding,minority
0,00000013_000.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,00000013_001.png,0,0,0,0,0,1,0,0,0,...,0,0,1,0,0,1,0,0,0,1
2,00000013_002.png,0,0,0,0,0,1,0,0,0,...,0,0,1,0,0,1,0,0,0,1
3,00000013_003.png,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
4,00000013_004.png,0,0,0,0,1,1,0,0,1,...,0,0,1,0,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21076,00030800_000.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
21077,00030802_000.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
21078,00030803_000.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
21079,00030804_000.png,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [38]:
# Compare equality across all columns
equal_all_columns = (df_prediction == df_test_valid_trimmed).all(axis=1)
num_equal_rows_all_columns = equal_all_columns.sum()

print(f"Number of rows equal for all columns: {num_equal_rows_all_columns}")

Number of rows equal for all columns: 2703


## 2. Number of rows equal for 'No Finding'

In [40]:
model_1_label = ['No Finding']
equal_given_columns = (df_prediction[model_1_label] == df_test_valid_trimmed[model_1_label]).all(axis=1)
num_equal_rows_given_columns = equal_given_columns.sum()

print(f"Number of rows equal for given columns: {num_equal_rows_given_columns}")

Number of rows equal for given columns: 7915


## 3. Number of rows equal for 'majority classes'

In [41]:
model_2_labels = ['Atelectasis', 'Effusion', 'Infiltration', 'Mass', 'Nodule', 'minority']
equal_given_columns = (df_prediction[model_2_labels] == df_test_valid_trimmed[model_2_labels]).all(axis=1)
num_equal_rows_given_columns = equal_given_columns.sum()

print(f"Number of rows equal for given columns: {num_equal_rows_given_columns}")

Number of rows equal for given columns: 3035


## 4. Number of rows equal for 'minority classes'

In [42]:
model_3_labels = [
    'Cardiomegaly', 'Consolidation', 'Edema', 'Emphysema', 'Fibrosis', 
    'Hernia', 'Pleural Thickening', 'Pneumonia', 'Pneumothorax', 
    'Pneumoperitoneum', 'Pneumomediastinum', 'Subcutaneous Emphysema', 
    'Tortuous Aorta', 'Calcification of the Aorta'
]

equal_given_columns = (df_prediction[model_3_labels] == df_test_valid_trimmed[model_3_labels]).all(axis=1)
num_equal_rows_given_columns = equal_given_columns.sum()

print(f"Number of rows equal for given columns: {num_equal_rows_given_columns}")

Number of rows equal for given columns: 11396


## 5. Classification report

In [43]:
target_labels = ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema',
       'Effusion', 'Emphysema', 'Fibrosis', 'Hernia', 'Infiltration', 'Mass',
       'Nodule', 'Pleural Thickening', 'Pneumonia', 'Pneumothorax',
       'Pneumoperitoneum', 'Pneumomediastinum', 'Subcutaneous Emphysema',
       'Tortuous Aorta', 'Calcification of the Aorta', 'No Finding']

In [44]:
# Exclude the first column (ID column)
preds = df_prediction.iloc[:, 1:-1].values.astype(int) 
labels = df_test_valid_trimmed.iloc[:, 1:-1].values.astype(int)

In [45]:
preds[:5]

array([[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]])

In [46]:
labels[:5]

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0]])

In [47]:
# Generate classification report
report = classification_report(labels, preds, target_names=target_labels, zero_division=0)
print(report)

                            precision    recall  f1-score   support

               Atelectasis       0.17      0.22      0.19      2700
              Cardiomegaly       0.21      0.28      0.24       868
             Consolidation       0.10      0.13      0.11      1497
                     Edema       0.12      0.14      0.13       751
                  Effusion       0.21      0.13      0.16      3735
                 Emphysema       0.34      0.18      0.23       917
                  Fibrosis       0.09      0.08      0.08       365
                    Hernia       0.35      0.13      0.19        62
              Infiltration       0.21      0.22      0.21      5159
                      Mass       0.14      0.19      0.16      1329
                    Nodule       0.10      0.13      0.11      1305
        Pleural Thickening       0.09      0.09      0.09       902
                 Pneumonia       0.06      0.04      0.05       452
              Pneumothorax       0.29      0.15