# Best Thresholds

In this notebook, referencing the experiments results derived from validation dataset in the work of [CheXclusion: Fairness gaps in deep chest X-ray classifiers](https://github.dev/LalehSeyyed/CheXclusion/blob/main/MIMIC/Actual_TPR.py), the thresholds from each labels should be adapted for the best performance.

In the original paper, authors run with different random seed and then get different results

In [83]:
import os
import pandas as pd
import pickle


### MIMIC

In [84]:
MIMIC_PATH = "./MIMIC/"

In [85]:
def get_threshold_files(path):
  """ 获取所有 threshold.csv 文件的地址 """
  threshold_files = []
  for root, dirs, files in os.walk(path):
      # 检查当前文件夹中是否有 threshold.csv 文件
      if "Threshold.csv" in files:
          # 获取 threshold.csv 文件的路径
          threshold_file_path = os.path.join(root, "Threshold.csv")
          # 添加到列表中
          threshold_files.append(threshold_file_path)

  # 打印所有 threshold.csv 文件的地址
  for file_path in threshold_files:
      print(file_path)
  return threshold_files

def get_average_threshold(threshold_files):
    threshold = 0
    for file_path in threshold_files:
        df = pd.read_csv(file_path)
        label_name = df["label"].to_list()
        threshold += df["bestthr"].values
    return threshold / len(threshold_files), label_name

In [86]:
threshold_files = get_threshold_files(MIMIC_PATH)
threshold, label = get_average_threshold(threshold_files)


./MIMIC/results19\Threshold.csv
./MIMIC/results31\Threshold.csv
./MIMIC/results38\Threshold.csv
./MIMIC/results47\Threshold.csv
./MIMIC/results77\Threshold.csv


In [87]:
MIMIC_THRESHOLD = dict(zip(label, threshold))
# 保存字典到文件
with open('MIMIC_THRESHOLD.pkl', 'wb') as f:
    pickle.dump(MIMIC_THRESHOLD, f)

# 从文件中读取字典
with open('MIMIC_THRESHOLD.pkl', 'rb') as f:
    loaded_dict = pickle.load(f)

print(loaded_dict)  # 输出: {'name': 'John', 'age': 30, 'city': 'New York'}

MIMIC_THRESHOLD

{'Airspace Opacity': 0.2067894160747527, 'Atelectasis': 0.2571824133396149, 'Cardiomegaly': 0.25030514299869533, 'Consolidation': 0.13011438995599744, 'Edema': 0.2605415195226669, 'Enlarged Cardiomediastinum': 0.08953001797199245, 'Fracture': 0.08186926990747445, 'Lung Lesion': 0.13840098977088924, 'No Finding': 0.36988158226013185, 'Pleural Effusion': 0.37177377939224243, 'Pleural Other': 0.07955108284950249, 'Pneumonia': 0.14167169332504273, 'Pneumothorax': 0.20276318490505213, 'Support Devices': 0.38156365752220156}


{'Airspace Opacity': 0.2067894160747527,
 'Atelectasis': 0.2571824133396149,
 'Cardiomegaly': 0.25030514299869533,
 'Consolidation': 0.13011438995599744,
 'Edema': 0.2605415195226669,
 'Enlarged Cardiomediastinum': 0.08953001797199245,
 'Fracture': 0.08186926990747445,
 'Lung Lesion': 0.13840098977088924,
 'No Finding': 0.36988158226013185,
 'Pleural Effusion': 0.37177377939224243,
 'Pleural Other': 0.07955108284950249,
 'Pneumonia': 0.14167169332504273,
 'Pneumothorax': 0.20276318490505213,
 'Support Devices': 0.38156365752220156}

### NIH

In [88]:
NIH_PATH = "./NIH/"

threshold_files = get_threshold_files(NIH_PATH)
def get_average_threshold(threshold_files):
    threshold = 0
    for file_path in threshold_files:
        df = pd.read_csv(file_path)
        label_name = df["label"].to_list()
        threshold += df["bestthr"].values
    return threshold / len(threshold_files), label_name
  
threshold, label = get_average_threshold(threshold_files)

./NIH/results3\Threshold.csv
./NIH/results33\Threshold.csv
./NIH/results37\Threshold.csv
./NIH/results60\Threshold.csv
./NIH/results96\Threshold.csv


In [89]:
NIH_THRESHOLD = dict(zip(label, threshold))

# 保存字典到文件
with open('NIH_THRESHOLD.pkl', 'wb') as f:
    pickle.dump(NIH_THRESHOLD, f)

# 从文件中读取字典
with open('NIH_THRESHOLD.pkl', 'rb') as f:
    loaded_dict = pickle.load(f)

print(loaded_dict)  # 输出: {'name': 'John', 'age': 30, 'city': 'New York'}

NIH_THRESHOLD

{'Atelectasis': 0.21819314062595363, 'Cardiomegaly': 0.16013783663511272, 'Consolidation': 0.1485404103994369, 'Edema': 0.09559662565588946, 'Effusion': 0.30975477695465087, 'Emphysema': 0.18105267882347104, 'Fibrosis': 0.08295522779226297, 'Hernia': 0.5549078226089478, 'Infiltration': 0.2306660830974579, 'Mass': 0.2510164678096771, 'Nodule': 0.12614848017692562, 'Pleural_Thickening': 0.12851620316505424, 'Pneumonia': 0.06241033226251595, 'Pneumothorax': 0.17654986083507535}


{'Atelectasis': 0.21819314062595363,
 'Cardiomegaly': 0.16013783663511272,
 'Consolidation': 0.1485404103994369,
 'Edema': 0.09559662565588946,
 'Effusion': 0.30975477695465087,
 'Emphysema': 0.18105267882347104,
 'Fibrosis': 0.08295522779226297,
 'Hernia': 0.5549078226089478,
 'Infiltration': 0.2306660830974579,
 'Mass': 0.2510164678096771,
 'Nodule': 0.12614848017692562,
 'Pleural_Thickening': 0.12851620316505424,
 'Pneumonia': 0.06241033226251595,
 'Pneumothorax': 0.17654986083507535}

### CheXpert

In [90]:
CXP = "./CheXpert/"
threshold_files = get_threshold_files(CXP)
print(threshold_files)
def get_average_threshold(threshold_files):
    threshold = 0
    for file_path in threshold_files:
        df = pd.read_csv(file_path)
        label_name = df["label"].to_list()
        threshold += df["bestthr"].values
    return threshold / len(threshold_files), label_name
  
threshold, label = get_average_threshold(threshold_files)

./CheXpert/results32\Threshold.csv
./CheXpert/results40\Threshold.csv
./CheXpert/results56\Threshold.csv
./CheXpert/results60\Threshold.csv
./CheXpert/results90\Threshold.csv
['./CheXpert/results32\\Threshold.csv', './CheXpert/results40\\Threshold.csv', './CheXpert/results56\\Threshold.csv', './CheXpert/results60\\Threshold.csv', './CheXpert/results90\\Threshold.csv']


In [91]:
CXP_THRESHOLD = dict(zip(label, threshold))

# 保存字典到文件
with open('CXP_THRESHOLD.pkl', 'wb') as f:
    pickle.dump(CXP_THRESHOLD, f)

# 从文件中读取字典
with open('CXP_THRESHOLD.pkl', 'rb') as f:
    loaded_dict = pickle.load(f)

print(loaded_dict)  # 输出: {'name': 'John', 'age': 30, 'city': 'New York'}
CXP_THRESHOLD

{'Atelectasis': 0.1715822160243988, 'Cardiomegaly': 0.25941015779972076, 'Consolidation': 0.12079081088304515, 'Edema': 0.29250548481941224, 'Enlarged Cardiomediastinum': 0.11519031822681422, 'Fracture': 0.1463069587945938, 'Lung Lesion': 0.1433594509959221, 'Lung Opacity': 0.3219067692756653, 'No Finding': 0.2351817488670349, 'Pleural Effusion': 0.32850325107574463, 'Pleural Other': 0.13745994269847867, 'Pneumonia': 0.08874680846929543, 'Pneumothorax': 0.28035772740840914, 'Support Devices': 0.43483529686927797}


{'Atelectasis': 0.1715822160243988,
 'Cardiomegaly': 0.25941015779972076,
 'Consolidation': 0.12079081088304515,
 'Edema': 0.29250548481941224,
 'Enlarged Cardiomediastinum': 0.11519031822681422,
 'Fracture': 0.1463069587945938,
 'Lung Lesion': 0.1433594509959221,
 'Lung Opacity': 0.3219067692756653,
 'No Finding': 0.2351817488670349,
 'Pleural Effusion': 0.32850325107574463,
 'Pleural Other': 0.13745994269847867,
 'Pneumonia': 0.08874680846929543,
 'Pneumothorax': 0.28035772740840914,
 'Support Devices': 0.43483529686927797}