In [1]:
!pip install torchxrayvision

Collecting torchxrayvision
  Downloading torchxrayvision-1.2.2-py3-none-any.whl.metadata (18 kB)
Downloading torchxrayvision-1.2.2-py3-none-any.whl (29.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m29.0/29.0 MB[0m [31m29.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torchxrayvision
Successfully installed torchxrayvision-1.2.2


# Pre-Process Image

In [2]:
import cv2
import matplotlib.pyplot as plt

I = cv2.imread("/kaggle/input/chexpert/train/patient00007/study1/view1_frontal.jpg")
print(I.shape)

(320, 390, 3)


In [3]:
import torchxrayvision as xrv
import skimage, torch, torchvision, cv2
import numpy as np

# Prepare the image:
img = cv2.imread("/kaggle/input/chexpert/train/patient00024/study1/view1_frontal.jpg") #changed skimage to cv2 as skimage was loading greyscale image
print(img.shape)

img = xrv.datasets.normalize(img, 255) # convert 8-bit image to [-1024, 1024] range
img = img.mean(2)[None, ...] # Make single color channel

transform = torchvision.transforms.Compose([xrv.datasets.XRayCenterCrop(),xrv.datasets.XRayResizer(224)])

img = transform(img)
img = torch.from_numpy(img)

(320, 390, 3)
Setting XRayResizer engine to cv2 could increase performance.


# Load Model

18 Labels total , but here only 11 exists. So need to get rid of excess values

In [4]:
model = xrv.models.DenseNet(weights="densenet121-res224-chex")
outputs = model(img[None,...]) # or model.features(img[None,...])

Downloading weights...
If this fails you can run `wget https://github.com/mlmed/torchxrayvision/releases/download/v1/chex-densenet121-d121-tw-lr001-rot45-tr15-sc15-seed0-best.pt -O /root/.torchxrayvision/models_data/chex-densenet121-d121-tw-lr001-rot45-tr15-sc15-seed0-best.pt`
[██████████████████████████████████████████████████]


In [5]:
model.pathologies

['Atelectasis',
 'Consolidation',
 '',
 'Pneumothorax',
 'Edema',
 '',
 '',
 'Effusion',
 'Pneumonia',
 '',
 'Cardiomegaly',
 '',
 '',
 '',
 'Lung Lesion',
 'Fracture',
 'Lung Opacity',
 'Enlarged Cardiomediastinum']

In [6]:
print(outputs)

tensor([[0.7603, 0.5345, 0.5000, 0.1325, 0.8471, 0.5000, 0.5000, 0.6517, 0.5424,
         0.5000, 0.8696, 0.5000, 0.5000, 0.5000, 0.2810, 0.3818, 0.7698, 0.7517]],
       grad_fn=<IndexPutBackward0>)


In [7]:
import torch

# Assuming 'my_tensor' is your PyTorch tensor
array = outputs.detach().numpy()

array

array([[0.76030165, 0.53449607, 0.5       , 0.13253503, 0.84710014,
        0.5       , 0.5       , 0.6516502 , 0.5424422 , 0.5       ,
        0.8695963 , 0.5       , 0.5       , 0.5       , 0.2809821 ,
        0.38183057, 0.76981044, 0.7517107 ]], dtype=float32)

# Print Results

In [8]:
out = dict(zip(model.pathologies,outputs[0].detach().numpy()))
out.pop('')

out

{'Atelectasis': 0.76030165,
 'Consolidation': 0.53449607,
 'Pneumothorax': 0.13253503,
 'Edema': 0.84710014,
 'Effusion': 0.6516502,
 'Pneumonia': 0.5424422,
 'Cardiomegaly': 0.8695963,
 'Lung Lesion': 0.2809821,
 'Fracture': 0.38183057,
 'Lung Opacity': 0.76981044,
 'Enlarged Cardiomediastinum': 0.7517107}

In [9]:
import re

# Regular expression to match a number after a colon
pattern = r':\s*(\d+(\.\d+)?)'

# Extract the second element after the colon for each value in the dictionary
out_arr = [float(re.search(pattern, str(v)).group(1)) if re.search(pattern, str(v)) else float(v) for v in out.values()]

out_arr

[0.7603016495704651,
 0.5344960689544678,
 0.13253502547740936,
 0.8471001386642456,
 0.6516501903533936,
 0.5424422025680542,
 0.8695963025093079,
 0.280982106924057,
 0.3818305730819702,
 0.7698104381561279,
 0.7517107129096985]

In [10]:
out_binary = [(0 if x < 0.5 else 1) for x in out_arr]
out_binary = np.array(out_binary)

out_binary

array([1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1])

In [11]:
import pandas as pd

df = pd.DataFrame(out, index=[0])

# Transpose the DataFrame to have labels as columns and values as rows
df = df.T.reset_index()

# Rename columns
df.columns = ['Label', 'Value']

print(df)

                         Label     Value
0                  Atelectasis  0.760302
1                Consolidation  0.534496
2                 Pneumothorax  0.132535
3                        Edema  0.847100
4                     Effusion  0.651650
5                    Pneumonia  0.542442
6                 Cardiomegaly  0.869596
7                  Lung Lesion  0.280982
8                     Fracture  0.381831
9                 Lung Opacity  0.769810
10  Enlarged Cardiomediastinum  0.751711


# Chexpert Data Extraction

In [12]:
import pandas as pd

In [13]:
test = pd.read_csv('/kaggle/input/chexpert/valid.csv')

test = test.replace('CheXpert-v1.0-small/','/kaggle/input/chexpert/', regex=True)

test = test.fillna(0)
test['No Finding'] = test['No Finding'].replace({-1: 1}, inplace=False)
test['Cardiomegaly'] = test['Cardiomegaly'].replace({-1: 1}, inplace=False)
test['Enlarged Cardiomediastinum'] = test['Enlarged Cardiomediastinum'].replace({-1: 1}, inplace=False)
test['Lung Opacity'] = test['Lung Opacity'].replace({-1: 1}, inplace=False)
test['Lung Lesion'] = test['Lung Lesion'].replace({-1: 1}, inplace=False)
test['Edema'] = test['Edema'].replace({-1: 1}, inplace=False)
test['Pneumonia'] = test['Pneumonia'].replace({-1: 1}, inplace=False)
test['Pneumothorax'] = test['Pneumothorax'].replace({-1: 1}, inplace=False)
test['Atelectasis'] = test['Atelectasis'].replace({-1: 1}, inplace=False)
test['Consolidation'] = test['Consolidation'].replace({-1: 1}, inplace=False)
test['Pleural Effusion'] = test['Pleural Effusion'].replace({-1: 1}, inplace=False)
test['Pleural Other'] = test['Pleural Other'].replace({-1: 1}, inplace=False)
test['Fracture'] = test['Fracture'].replace({-1: 1}, inplace=False)
test['Support Devices'] = test['Support Devices'].replace({-1: 1}, inplace=False)

test

Unnamed: 0,Path,Sex,Age,Frontal/Lateral,AP/PA,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices
0,/kaggle/input/chexpert/valid/patient64541/stud...,Male,73,Frontal,AP,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,/kaggle/input/chexpert/valid/patient64542/stud...,Male,70,Frontal,PA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,/kaggle/input/chexpert/valid/patient64542/stud...,Male,70,Lateral,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,/kaggle/input/chexpert/valid/patient64543/stud...,Male,85,Frontal,AP,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,/kaggle/input/chexpert/valid/patient64544/stud...,Female,42,Frontal,AP,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,/kaggle/input/chexpert/valid/patient64736/stud...,Female,57,Frontal,AP,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
230,/kaggle/input/chexpert/valid/patient64737/stud...,Male,65,Frontal,AP,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
231,/kaggle/input/chexpert/valid/patient64738/stud...,Male,71,Frontal,AP,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
232,/kaggle/input/chexpert/valid/patient64739/stud...,Female,45,Frontal,AP,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
test = test.drop(["Sex","Age","Frontal/Lateral","AP/PA",'No Finding','Pleural Other','Support Devices'],axis=1)
test

Unnamed: 0,Path,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Fracture
0,/kaggle/input/chexpert/valid/patient64541/stud...,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,/kaggle/input/chexpert/valid/patient64542/stud...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,/kaggle/input/chexpert/valid/patient64542/stud...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,/kaggle/input/chexpert/valid/patient64543/stud...,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,/kaggle/input/chexpert/valid/patient64544/stud...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
229,/kaggle/input/chexpert/valid/patient64736/stud...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
230,/kaggle/input/chexpert/valid/patient64737/stud...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
231,/kaggle/input/chexpert/valid/patient64738/stud...,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
232,/kaggle/input/chexpert/valid/patient64739/stud...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
test.rename(columns={'Pleural Effusion': 'Effusion'}, inplace=True)
test

Unnamed: 0,Path,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Effusion,Fracture
0,/kaggle/input/chexpert/valid/patient64541/stud...,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,/kaggle/input/chexpert/valid/patient64542/stud...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,/kaggle/input/chexpert/valid/patient64542/stud...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,/kaggle/input/chexpert/valid/patient64543/stud...,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,/kaggle/input/chexpert/valid/patient64544/stud...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
229,/kaggle/input/chexpert/valid/patient64736/stud...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
230,/kaggle/input/chexpert/valid/patient64737/stud...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
231,/kaggle/input/chexpert/valid/patient64738/stud...,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
232,/kaggle/input/chexpert/valid/patient64739/stud...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
test = test[['Path','Atelectasis','Consolidation','Pneumothorax','Edema','Effusion','Pneumonia','Cardiomegaly','Lung Lesion','Fracture','Lung Opacity','Enlarged Cardiomediastinum']]

test

Unnamed: 0,Path,Atelectasis,Consolidation,Pneumothorax,Edema,Effusion,Pneumonia,Cardiomegaly,Lung Lesion,Fracture,Lung Opacity,Enlarged Cardiomediastinum
0,/kaggle/input/chexpert/valid/patient64541/stud...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0
1,/kaggle/input/chexpert/valid/patient64542/stud...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,/kaggle/input/chexpert/valid/patient64542/stud...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,/kaggle/input/chexpert/valid/patient64543/stud...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
4,/kaggle/input/chexpert/valid/patient64544/stud...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
229,/kaggle/input/chexpert/valid/patient64736/stud...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
230,/kaggle/input/chexpert/valid/patient64737/stud...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
231,/kaggle/input/chexpert/valid/patient64738/stud...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0
232,/kaggle/input/chexpert/valid/patient64739/stud...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [17]:
label_columns = test.drop(['Path'], axis=1)
image_path = test['Path']

new_test = pd.DataFrame({
    'ImagePath': image_path,
    'Labels': label_columns.apply(lambda row: list(row[row == 1].index), axis=1)
})

new_test

Unnamed: 0,ImagePath,Labels
0,/kaggle/input/chexpert/valid/patient64541/stud...,"[Cardiomegaly, Lung Opacity, Enlarged Cardiome..."
1,/kaggle/input/chexpert/valid/patient64542/stud...,[]
2,/kaggle/input/chexpert/valid/patient64542/stud...,[]
3,/kaggle/input/chexpert/valid/patient64543/stud...,"[Edema, Lung Opacity, Enlarged Cardiomediastinum]"
4,/kaggle/input/chexpert/valid/patient64544/stud...,[]
...,...,...
229,/kaggle/input/chexpert/valid/patient64736/stud...,[]
230,/kaggle/input/chexpert/valid/patient64737/stud...,[]
231,/kaggle/input/chexpert/valid/patient64738/stud...,"[Edema, Cardiomegaly, Lung Opacity, Enlarged C..."
232,/kaggle/input/chexpert/valid/patient64739/stud...,[Enlarged Cardiomediastinum]


In [18]:
image = []

j = 0 
for i in new_test['ImagePath']:   
    img = cv2.imread(i)
    #img = cv2.resize(img, (224,224))
    image.append(img)
    j = j+1
    
labels = new_test['Labels'].apply(lambda x: x if isinstance(x, list) else [x]).tolist()

In [19]:
#X = np.array(image) / 255.0
Y = [np.array(label) for label in labels]

#print(X.shape)

In [20]:
from sklearn.metrics import confusion_matrix,  multilabel_confusion_matrix
from sklearn.preprocessing import MultiLabelBinarizer

classes = ['Atelectasis','Consolidation','Pneumothorax','Edema','Effusion','Pneumonia','Cardiomegaly','Lung Lesion','Fracture','Lung Opacity','Enlarged Cardiomediastinum']
mlb = MultiLabelBinarizer(classes=classes)
Y = mlb.fit_transform(Y)

In [21]:
print(mlb.classes_)
print(Y[3])

['Atelectasis' 'Consolidation' 'Pneumothorax' 'Edema' 'Effusion'
 'Pneumonia' 'Cardiomegaly' 'Lung Lesion' 'Fracture' 'Lung Opacity'
 'Enlarged Cardiomediastinum']
[0 0 0 1 0 0 0 0 0 1 1]


# Predictions and Metrices

In [22]:
import os
import pandas as pd
import re

classes = ['Atelectasis','Consolidation','Pneumothorax','Edema','Effusion','Pneumonia','Cardiomegaly','Lung Lesion','Fracture','Lung Opacity','Enlarged Cardiomediastinum']

# Regular expression to match a number after a colon
pattern = r':\s*(\d+(\.\d+)?)'

transform = torchvision.transforms.Compose([xrv.datasets.XRayCenterCrop(),xrv.datasets.XRayResizer(224)])

# List to store predictions and image names
predict = pd.DataFrame()

# Iterate over each image
for image_name in image: 

    img = transform(image_name)
    img = torch.from_numpy(img)
    
    # Obtain predictions for the image
    outputs = model(img[None,...])  # Assuming you have image_tensor for each image
    out = dict(zip(model.pathologies,outputs[0].detach().numpy()))
    out.pop('')
    outputs_array = [float(re.search(pattern, str(v)).group(1)) if re.search(pattern, str(v)) else float(v) for v in out.values()]
    outputs_array = np.array(outputs_array)
    #print(outputs_array.shape)
    
    # Convert the outputs_array to a DataFrame with a single row
    predict_row = pd.DataFrame([outputs_array], columns=classes)
    
    # Append the row to the DataFrame
    predict = pd.concat([predict, predict_row], ignore_index=True)
    
    

Setting XRayResizer engine to cv2 could increase performance.


# Prediction Table

In [23]:
print(predict)

     Atelectasis  Consolidation  Pneumothorax     Edema  Effusion  Pneumonia  \
0       0.856896       0.870527      0.545771  0.840264  0.919895   0.687017   
1       0.855081       0.870485      0.543774  0.843121  0.919826   0.683973   
2       0.835345       0.829712      0.564513  0.822916  0.882423   0.667245   
3       0.854888       0.870727      0.543470  0.843490  0.920004   0.683627   
4       0.867695       0.899080      0.528954  0.859729  0.943542   0.694640   
..           ...            ...           ...       ...       ...        ...   
229     0.868523       0.900997      0.527957  0.860882  0.945079   0.695115   
230     0.851707       0.860052      0.550915  0.834346  0.910902   0.682093   
231     0.839827       0.837257      0.562052  0.824666  0.889822   0.671413   
232     0.841898       0.840782      0.560600  0.825737  0.893257   0.673330   
233     0.859922       0.882855      0.537718  0.850455  0.930211   0.688830   

     Cardiomegaly  Lung Lesion  Fractur

# Binarized Prediction Table

In [24]:
predict_binary = predict.applymap(lambda x: 1 if x > 0.5 else 0)

# Print the resulting DataFrame
print(predict_binary)

     Atelectasis  Consolidation  Pneumothorax  Edema  Effusion  Pneumonia  \
0              1              1             1      1         1          1   
1              1              1             1      1         1          1   
2              1              1             1      1         1          1   
3              1              1             1      1         1          1   
4              1              1             1      1         1          1   
..           ...            ...           ...    ...       ...        ...   
229            1              1             1      1         1          1   
230            1              1             1      1         1          1   
231            1              1             1      1         1          1   
232            1              1             1      1         1          1   
233            1              1             1      1         1          1   

     Cardiomegaly  Lung Lesion  Fracture  Lung Opacity  \
0               1

# Compare labels with actual ones

In [25]:
label_columns = test.drop(['Path'], axis=1)
image_path = test['Path']
pred_columns = predict_binary[:]

predict_compare = pd.DataFrame({
    'ImagePath': image_path,
    'Labels': label_columns.apply(lambda row: list(row[row == 1].index), axis=1),
    'Predicted Labels' : pred_columns.apply(lambda row: list(row[row == 1].index), axis=1)
})

predict_compare

Unnamed: 0,ImagePath,Labels,Predicted Labels
0,/kaggle/input/chexpert/valid/patient64541/stud...,"[Cardiomegaly, Lung Opacity, Enlarged Cardiome...","[Atelectasis, Consolidation, Pneumothorax, Ede..."
1,/kaggle/input/chexpert/valid/patient64542/stud...,[],"[Atelectasis, Consolidation, Pneumothorax, Ede..."
2,/kaggle/input/chexpert/valid/patient64542/stud...,[],"[Atelectasis, Consolidation, Pneumothorax, Ede..."
3,/kaggle/input/chexpert/valid/patient64543/stud...,"[Edema, Lung Opacity, Enlarged Cardiomediastinum]","[Atelectasis, Consolidation, Pneumothorax, Ede..."
4,/kaggle/input/chexpert/valid/patient64544/stud...,[],"[Atelectasis, Consolidation, Pneumothorax, Ede..."
...,...,...,...
229,/kaggle/input/chexpert/valid/patient64736/stud...,[],"[Atelectasis, Consolidation, Pneumothorax, Ede..."
230,/kaggle/input/chexpert/valid/patient64737/stud...,[],"[Atelectasis, Consolidation, Pneumothorax, Ede..."
231,/kaggle/input/chexpert/valid/patient64738/stud...,"[Edema, Cardiomegaly, Lung Opacity, Enlarged C...","[Atelectasis, Consolidation, Pneumothorax, Ede..."
232,/kaggle/input/chexpert/valid/patient64739/stud...,[Enlarged Cardiomediastinum],"[Atelectasis, Consolidation, Pneumothorax, Ede..."


# Label-By-Label Model Evaluation

In [26]:
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score

In [27]:
# Create a dictionary to store the metrics for each label
metrics = {'Label': [], 'Accuracy': [], 'F1 Score': [], 'Recall': [], 'Precision': []}

In [28]:
# Loop through each label
for label in predict_binary.columns:
    # Calculate metrics for the label
    accuracy = accuracy_score(test[label], predict_binary[label])
    f1 = f1_score(test[label], predict_binary[label])
    recall = recall_score(test[label], predict_binary[label])
    precision = precision_score(test[label], predict_binary[label])
    
    # Append the metrics to the dictionary
    metrics['Label'].append(label)
    metrics['Accuracy'].append(accuracy)
    metrics['F1 Score'].append(f1)
    metrics['Recall'].append(recall)
    metrics['Precision'].append(precision)

In [29]:
# Convert the dictionary to a DataFrame
metrics_table = pd.DataFrame(metrics)

print(metrics_table)

                         Label  Accuracy  F1 Score  Recall  Precision
0                  Atelectasis  0.341880  0.509554     1.0   0.341880
1                Consolidation  0.141026  0.247191     1.0   0.141026
2                 Pneumothorax  0.034188  0.066116     1.0   0.034188
3                        Edema  0.192308  0.322581     1.0   0.192308
4                     Effusion  0.286325  0.445183     1.0   0.286325
5                    Pneumonia  0.034188  0.066116     1.0   0.034188
6                 Cardiomegaly  0.290598  0.450331     1.0   0.290598
7                  Lung Lesion  0.004274  0.008511     1.0   0.004274
8                     Fracture  0.000000  0.000000     0.0   0.000000
9                 Lung Opacity  0.538462  0.700000     1.0   0.538462
10  Enlarged Cardiomediastinum  0.465812  0.635569     1.0   0.465812


# Looping to find the Best Threshold for each label 

# Using F1-score

In [30]:
thresholds = np.arange(0.3, 1, 0.02)

results = pd.DataFrame(columns=['Label', 'Best Threshold', 'Best F1 Score'])

for label in predict.columns:
    # Initialize variables to store the best threshold and F1 score
    best_threshold = 0
    best_f1_score = 0
    
    # Iterate through a range of thresholds from 0 to 1 in steps of 0.01
    for threshold in thresholds:
        # Convert probabilities to binary predictions using the threshold
        y_pred = (predict[label] >= threshold).astype(int)
        
        # Calculate the F1 score for the current threshold
        f1 = f1_score(test[label], y_pred)
        
        # Update the best threshold and F1 score if the current F1 score is higher
        if f1 > best_f1_score:
            best_f1_score = f1
            best_threshold = threshold
    
    # Append the results to the DataFrame
    results.loc[len(results)] = [label, best_threshold, best_f1_score]
    

In [31]:
print(results)

                         Label  Best Threshold  Best F1 Score
0                  Atelectasis            0.84       0.514851
1                Consolidation            0.86       0.251256
2                 Pneumothorax            0.30       0.066116
3                        Edema            0.30       0.322581
4                     Effusion            0.90       0.465455
5                    Pneumonia            0.74       0.200000
6                 Cardiomegaly            0.82       0.459854
7                  Lung Lesion            0.74       0.009615
8                     Fracture            0.00       0.000000
9                 Lung Opacity            0.90       0.708215
10  Enlarged Cardiomediastinum            0.30       0.635569


# Using Precision

In [32]:
thresholds = np.arange(0.3, 1, 0.02)

results_2 = pd.DataFrame(columns=['Label', 'Best Threshold_2', 'Best Precision'])

for label in predict.columns:
    # Initialize variables to store the best threshold and F1 score
    best_threshold_2 = 0
    best_precision = 0
    
    # Iterate through a range of thresholds from 0 to 1 in steps of 0.01
    for threshold in thresholds:
        # Convert probabilities to binary predictions using the threshold
        y_pred = (predict[label] >= threshold).astype(int)
        
        # Calculate the F1 score for the current threshold
        p = precision_score(test[label], y_pred)
        
        # Update the best threshold and F1 score if the current F1 score is higher
        if p > best_precision:
            best_precision = p
            best_threshold_2 = threshold
    
    # Append the results to the DataFrame
    results_2.loc[len(results_2)] = [label, best_threshold_2, best_precision]

In [33]:
print(results_2)

                         Label  Best Threshold_2  Best Precision
0                  Atelectasis              0.90        0.500000
1                Consolidation              0.94        0.666667
2                 Pneumothorax              0.30        0.034188
3                        Edema              0.30        0.192308
4                     Effusion              0.98        0.500000
5                    Pneumonia              0.74        0.500000
6                 Cardiomegaly              0.88        0.333333
7                  Lung Lesion              0.74        0.004831
8                     Fracture              0.00        0.000000
9                 Lung Opacity              0.90        0.550661
10  Enlarged Cardiomediastinum              0.76        0.467532


# Using Accuracy

In [34]:
thresholds = np.arange(0.3, 1, 0.02)

results_3 = pd.DataFrame(columns=['Label', 'Best Threshold', 'Best Accuracy'])

for label in predict.columns:
    # Initialize variables to store the best threshold and F1 score
    best_threshold = 0
    best_accuracy = 0
    
    # Iterate through a range of thresholds from 0 to 1 in steps of 0.01
    for threshold in thresholds:
        # Convert probabilities to binary predictions using the threshold
        y_pred = (predict[label] >= threshold).astype(int)
        
        # Calculate the F1 score for the current threshold
        acc = accuracy_score(test[label], y_pred)
        
        # Update the best threshold and F1 score if the current F1 score is higher
        if acc > best_accuracy:
            best_accuracy = acc
            best_threshold = threshold
    
    # Append the results to the DataFrame
    results_3.loc[len(results_3)] = [label, best_threshold, best_accuracy]

In [35]:
print(results_3)

                         Label  Best Threshold  Best Accuracy
0                  Atelectasis            0.90       0.658120
1                Consolidation            0.94       0.863248
2                 Pneumothorax            0.58       0.965812
3                        Edema            0.90       0.807692
4                     Effusion            0.98       0.713675
5                    Pneumonia            0.74       0.965812
6                 Cardiomegaly            0.92       0.709402
7                  Lung Lesion            0.86       0.995726
8                     Fracture            0.72       1.000000
9                 Lung Opacity            0.90       0.559829
10  Enlarged Cardiomediastinum            0.84       0.534188
