In [1]:
import pandas as pd 
import os
from sklearn.metrics import roc_curve, auc, precision_score, recall_score,roc_auc_score
import numpy as np


In [2]:
def calculate_AUC(df , gt_tag, pred_tag ):
    y_true = df[gt_tag]
    y_pred_prob = df[pred_tag]

    # Calculate the AUC
    auc = roc_auc_score(y_true, y_pred_prob)
    return auc
    
def get_AUC_score(df , gt_tag, pred_tag ):
    df = df[(df[gt_tag] != -100) & (df[pred_tag] != -100)]
    df = df.dropna(subset=[gt_tag, pred_tag])    
    auc = calculate_AUC(df , gt_tag, pred_tag )
    
    if 'fake' in df.columns: 
        df_fake = df[df['fake'] == 1] 
        df_real= df[df['fake'] == 0] 
        fake_auc =  calculate_AUC(df_fake , gt_tag, pred_tag )
        real_auc =  calculate_AUC(df_real , gt_tag, pred_tag )
        return auc, fake_auc, real_auc
    
    return auc

In [3]:
test_df_ = pd.read_csv("/fast_data_e2e_1/cxr/qxr_ln_data/LN_test/combined_test_csv_w_v4_preds_30-7-24.csv")
jsrt_nlst =  test_df_[test_df_['src'] == 'nlst_jsrt']
jsrt_nlst_test =  pd.read_csv("/fast_data_e2e11/qxr_ln_trainings/testing/nlst_jsrt.csv")
swin_base_ =  pd.read_csv("/fast_data_e2e11/qxr_ln_trainings/checkpoints/results/base_training/14_aug_swin_tiny_only_real_data/model_653-epoch=64-val_loss=1.452712/model_653-epoch=64-val_loss=1.452712.csv")

In [6]:
len(swin_base_)

533340

In [4]:
# v4_auc = get_AUC_score(jsrt_nlst,  gt_tag= 'nodule' , pred_tag = 'nodule_pred')
new_model_auc = get_AUC_score(swin_base_,  gt_tag= 'nodule_target' , pred_tag = 'nodule')

In [5]:
new_model_auc

0.7466873094651636

In [8]:
swin_base_ = swin_base_.merge(jsrt_nlst[["filename", "subtlety"]], on="filename", how="left")


In [7]:
def get_cutoff_youdens_index(fpr, tpr, thresholds) -> float:
    specificity = 1 - fpr
    youden_index = tpr + specificity - 1
    max_index = np.argmax(youden_index)
    best_threshold = thresholds[max_index]
    best_youden_index = youden_index[max_index]
    return best_youden_index, best_threshold



def get_tag_wise_yodens_threshold(df , tags_to_check = ["nodule"]):
    for tag in tags_to_check:

        print(tag)
        df_tag_results = df[df[f"{tag}_target"]!=-100]
        print(len(df_tag_results))
        y_true = df_tag_results[f"{tag}_target"].values
        y_pred = df_tag_results[tag].values

        # Calculate FPR, TPR, and thresholds
        fpr, tpr, thresholds = roc_curve(y_true, y_pred)

        # Calculate AUROC
        auroc = auc(fpr, tpr)

        # Calculate Youden's Index
        youden_index, youden_index_threshold = get_cutoff_youdens_index(fpr, tpr, thresholds)

        print(f"AUROC: {auroc}")
        print(f"Youden's Index threshold: {youden_index_threshold}", f"Youden's Index : {youden_index} ")

        specificity = 1 - fpr
        for th in np.arange(0, 1.01, 0.01):
            # Find the closest threshold index
            closest_index = np.argmin(np.abs(thresholds - th))
            sens = tpr[closest_index]
            spec = specificity[closest_index]
            youden_index = sens + spec - 1
            y_pred_binary = (y_pred >= th).astype(int)
            precision = precision_score(y_true, y_pred_binary)
            npv = np.sum((y_true == 0) & (y_pred_binary == 0)) / np.sum(y_pred_binary == 0)

            print(f"Threshold: {th:.2f}, Sensitivity: {sens:.2f}, Specificity: {spec:.2f}, "
                f"Youden Index: {youden_index:.2f}, Precision: {precision:.2f}, NPV: {npv:.2f}")

In [8]:
get_tag_wise_yodens_threshold(swin_base_)

nodule
512384
AUROC: 0.7466873094651636
Youden's Index threshold: 0.32619905 Youden's Index : 0.3611249188505692 
Threshold: 0.00, Sensitivity: 1.00, Specificity: 0.00, Youden Index: 0.00, Precision: 0.03, NPV: nan


  npv = np.sum((y_true == 0) & (y_pred_binary == 0)) / np.sum(y_pred_binary == 0)


Threshold: 0.01, Sensitivity: 0.94, Specificity: 0.25, Youden Index: 0.19, Precision: 0.04, NPV: 0.99
Threshold: 0.02, Sensitivity: 0.90, Specificity: 0.34, Youden Index: 0.25, Precision: 0.04, NPV: 0.99
Threshold: 0.03, Sensitivity: 0.87, Specificity: 0.40, Youden Index: 0.27, Precision: 0.04, NPV: 0.99
Threshold: 0.04, Sensitivity: 0.85, Specificity: 0.44, Youden Index: 0.29, Precision: 0.04, NPV: 0.99
Threshold: 0.05, Sensitivity: 0.84, Specificity: 0.47, Youden Index: 0.30, Precision: 0.04, NPV: 0.99
Threshold: 0.06, Sensitivity: 0.82, Specificity: 0.49, Youden Index: 0.31, Precision: 0.05, NPV: 0.99
Threshold: 0.07, Sensitivity: 0.81, Specificity: 0.51, Youden Index: 0.32, Precision: 0.05, NPV: 0.99
Threshold: 0.08, Sensitivity: 0.80, Specificity: 0.53, Youden Index: 0.33, Precision: 0.05, NPV: 0.99
Threshold: 0.09, Sensitivity: 0.79, Specificity: 0.54, Youden Index: 0.33, Precision: 0.05, NPV: 0.99
Threshold: 0.10, Sensitivity: 0.78, Specificity: 0.56, Youden Index: 0.34, Precisi

In [11]:
def _get_subtelity_count_fns(df, gt_tag , pred_tag, tag_thresh):
    # Assuming df has columns 'true_label' and 'predicted_score'
    y_true = df[gt_tag]
    y_pred = df[pred_tag]

    # Apply the threshold to get predicted labels
    y_pred_label = (y_pred >= tag_thresh).astype(int)

    # Calculate confusion matrix components
    TP = ((y_true == 1) & (y_pred_label == 1)).sum()
    TN = ((y_true == 0) & (y_pred_label == 0)).sum()
    FP = ((y_true == 0) & (y_pred_label == 1)).sum()
    FN = ((y_true == 1) & (y_pred_label == 0)).sum()

    # Calculate sensitivity and specificity
    sensitivity = TP / (TP + FN) if (TP + FN) > 0 else 0
    specificity = TN / (TN + FP) if (TN + FP) > 0 else 0

    # Subsets of DataFrame for FNs and FPs
    df_fns = df[(y_true == 1) & (y_pred_label == 0)]
    df_fps = df[(y_true == 0) & (y_pred_label == 1)]

    return sensitivity, specificity, df_fns, df_fps

In [14]:
tag_thresh = 0.1
sensitivity, specificity, df_fns, df_fps = _get_subtelity_count_fns(swin_base_, 'nodule_target', 'nodule' , tag_thresh)
print(f"Sensitivity: {sensitivity}, Specificity: {specificity}")

Sensitivity: 0.593607305936073, Specificity: 0.7724252491694352


In [15]:
df_fns.subtlety.value_counts()

subtlety
 3      28
 1      23
 2      22
-100    11
 4       5
Name: count, dtype: int64

In [16]:
df_fps.subtlety.value_counts()

subtlety
-100    137
Name: count, dtype: int64

In [22]:
model_pred = pd.read_csv("/fast_data_e2e11/qxr_ln_trainings/checkpoints/results/base_training/7_Aug_swinv2_base_unetplusplus_corrected_dice/model_1214-epoch=92-val_loss=0.931579/model_1214-epoch=92-val_loss=0.931579.csv")
testing_csv = pd.read_csv("/fast_data_e2e11/qxr_ln_trainings/training/training_csvs/testing_data_20240805_173856_.csv")

In [23]:
testing_csv

Unnamed: 0,filename,nodule,nipple_shadow,normal,fake
0,max.dev1.55135465,0,0,1,0
1,ims.1.2.392.200036.9107.307.15120.20190215.185...,0,0,1,0
2,diaphragm_20240801_173624_zJWdY8SN,1,0,0,1
3,krsnaa.1.2.392.200036.9125.9.0.253398181.50341...,0,0,1,0
4,medanta.630a4a58-e52f7f2a-3855ea0d-f4aa4feb-a8...,1,0,0,0
...,...,...,...,...,...
31961,medall.1.3.6.1.4.1.25403.52240747607.2612.2015...,0,0,1,0
31962,pleura_20240802_201705_33ujawQO,1,0,0,1
31963,ca.phase4.unit9.2.ed2ad4fa05706cfafc6a95c678a9...,0,0,1,0
31964,dmims.1.2.840.113564.54.192.168.101.16.2014082...,0,0,1,0


In [25]:
model_pred = model_pred.merge(testing_csv[['filename','fake']], on="filename", how ="left" )

In [32]:
auc, fake_auc , real_auc = get_AUC_score(model_pred,  gt_tag= 'nodule_target' , pred_tag = 'nodule')
print(f"Overall AUC: {auc}, Fake AUC: {fake_auc}, Real AUC: {real_auc}")

Overall AUC: 0.9948711665285848, Fake AUC: 0.9999174638688915, Real AUC: 0.9859044284571565


In [21]:
model_pred.columns

Index(['filename', 'nodule', 'nodule_target', 'nipple_shadow',
       'nipple_shadow_target', 'normal', 'normal_target'],
      dtype='object')

In [None]:
model_pred.columns

Index(['filename', 'nodule', 'nodule_target', 'nipple_shadow',
       'nipple_shadow_target', 'normal', 'normal_target'],
      dtype='object')

In [2]:
import torch 
model = torch.jit.load('/home/users/shreshtha.singh/qxr_lungcancer/nbs/testing/v4_model_analysis/v4_weights/v4_nodule_cuda.ts')

In [3]:
model.keys()

AttributeError: 'RecursiveScriptModule' object has no attribute 'keys'