In [1]:
import argparse
import os, sys
import pandas as pd
sys.path.append(os.path.abspath(os.path.join('..', '..')))

# dependencies for kMT and srmUNet Training
from data.dataloaders import load_segmentation_data, load_image_data, load_radiomics_data
from data.dataset import SRMDataset
from models.segmentation.semi_supervised import kidney_segmentor, srm_segmentor
from models.classification.classifier import cnn_classifier, xgboost_classifier
from models.classification.cnn import PatNET
#from utils import save_figures_and_show
from evaluation.classification.evaluate import bootstrap_ci, get_xgb_predictions

# dependencies for bounding box coordinates
from utils import *
from data.roi import load_inference_data, load_labeled_data, load_bbox 
from data.transforms import get_bbox_transforms

  from .autonotebook import tqdm as notebook_tqdm


ImportError: attempted relative import beyond top-level package

In [2]:
import numpy as np
import ast
import torch
from torch.utils.data import Dataset
import monai.transforms as mt
from scipy.ndimage import label, find_objects
class InferDataset(Dataset):
    """
     Used in the function "pos_neg_aug_datasets" to generate a positive/negative dataset 
     that will be used to control the original batch and augmented batch independently.
     It returns batches with patient ID, image, seg, label.
    """
    def __init__(self, data, data_rcc, q3_x_len=30, q3_y_len=61, q3_z_len=45, transform=None, augment_neg=False, augment_pos=False, aug_pos_rate=None, aug_neg_rate=None):
        self.data = data  # csv file that has patient id, image, seg, pred paths and bbox coords
        self.data_rcc = data_rcc
        self.q3_x_len = q3_x_len
        self.q3_y_len = q3_y_len
        self.q3_z_len = q3_z_len
        self.transform = transform
        self.augment_neg = augment_neg
        self.augment_pos = augment_pos
        self.aug_pos_rate = aug_pos_rate
        self.aug_neg_rate = aug_neg_rate

    def __len__(self):
        if "root" in self.data.keys():
            return len(self.data['root'])  # change to: data.keys()[0]
        else: 
            return len(self.data['img'])

    def get_box_centers(self, box):
        x_start, y_start, z_start, x_len, y_len, z_len = box
        x_center = x_start + x_len // 2
        y_center = y_start + y_len // 2 
        z_center = z_start + z_len // 2 
        return x_center, y_center, z_center

    def crop(self, resized_img, resized_seg, pred, box):
        
        x_start, y_start, z_start, x_len, y_len, z_len = box 

        x_center = x_start+x_len//2
        y_center = y_start+y_len//2
        z_center = z_start+z_len//2

        roi_center = (z_center, y_center, x_center)
        roi_size = (z_len,y_len,x_len)

            
        # Crop the image, seg and prediction to match box area
        cropper = mt.SpatialCrop(roi_center=roi_center, roi_size=roi_size)
        
        
        crop_pred = cropper(pred)
        crop_img = cropper(resized_img)
        crop_seg = cropper(resized_seg)
        crop_depth = crop_pred.shape[3]

        return crop_img, crop_seg, crop_pred, crop_depth


    def load_resized_data(self, data, i):

        # Load image, segmentation, prediction and boxes from data.csvseg_path = data["seg"][i]
        img_path = data['img'][i]
        seg_path = data['pred'][i]
        img_proc = mt.Compose([
                mt.LoadImage(image_only=True, ensure_channel_first=True),
                mt.EnsureType(), 
                mt.Orientation(axcodes='LPS'),
                mt.Spacing(pixdim=(2.0, 2.0, 5.0), mode=("nearest")), 
                mt.ScaleIntensityRange(a_min=-500.0, a_max=500.0, b_min=0.0, b_max=1.0, clip=True),
                mt.ToTensor()
                ])
        
        seg_proc = mt.Compose([
                mt.LoadImage(image_only=True, ensure_channel_first=True),
                mt.EnsureType(), 
                mt.Orientation(axcodes='LPS'),
                mt.Spacing(pixdim=(2.0, 2.0, 5.0), mode=("bilinear")), 
                mt.ScaleIntensityRange(a_min=-500.0, a_max=500.0, b_min=0.0, b_max=1.0, clip=True),
                mt.ToTensor()
                ])
        
        seg = seg_proc(seg_path)
        img = img_proc(img_path)

        # sitk : D H W    monai : W H D
        x = seg.shape[1]
        y = seg.shape[2]
        z = seg.shape[3]

        # Resize image and segmentation so they match prediction in shape
        resize_transform = mt.Resize(spatial_size=(x, y, z))
        resized_img = resize_transform(img)
        resized_seg = resize_transform(seg)

        return resized_img, resized_seg
    
    def resize_before_train(self, img, seg):


        resize_transform = mt.Resize(spatial_size=(64, 64, 32))
        resized_img = resize_transform(img)
        resized_seg = resize_transform(seg)
        

        return resized_img, resized_seg
    

        

    def checker(self, crop_seg):
        """
        Check if the crop_seg contains a tumor.
        
        Parameters:
        crop_seg (numpy array): The cropped segmentation mask.
        box (tuple): The bounding box coordinates (x, y, z, x_len, y_len, z_len).
        
        Returns:
        int: 1 if the crop_seg contains a tumor, 0 otherwise.
        """
        # Tumor label is 1
        
        tumor_label = 0.5
        return 1 if np.any(crop_seg > tumor_label) else 0
        

    def retrieve_kidney_boxes(self, boxes):
        
        kidney_boxes = []
        for box in boxes:
            x_start, y_start, z_start, x_len, y_len, z_len = box 
            volume = x_len * y_len * z_len
            kidney_boxes.append((x_start, y_start, z_start, x_len, y_len, z_len, volume))
        kidney_boxes = sorted(kidney_boxes, key=lambda x: x[6], reverse=True)[:2]
        kidney_boxes = [(x_start, y_start, z_start, x_len, y_len, z_len) for x_start, y_start, z_start, x_len, y_len, z_len, _ in kidney_boxes]
        return kidney_boxes
    
    # Function to calculate bounding boxes from binary masks
    def bounding_box(self, mask):
        mask_np = mask.squeeze().cpu().numpy()  # Convert tensor to numpy array
        labeled, num_features = label(mask_np)  # Label connected components
        objects = find_objects(labeled)  # Find bounding boxes for each connected component

        boxes = []
        for obj in objects:
            z_start, z_end = obj[0].start, obj[0].stop
            y_start, y_end = obj[1].start, obj[1].stop
            x_start, x_end = obj[2].start, obj[2].stop
            x_len = x_end - x_start
            y_len = y_end - y_start
            z_len = z_end - z_start
            volume = x_len * y_len * z_len
            boxes.append((x_start, y_start, z_start, x_len, y_len, z_len, volume))
        
        # Sort boxes by volume in descending order and take the two largest
        boxes = sorted(boxes, key=lambda x: x[6], reverse=True)[:2]
        
        # Remove the volume information from the output
        boxes = [(x_start, y_start, z_start, x_len, y_len, z_len) for x_start, y_start, z_start, x_len, y_len, z_len, _ in boxes]
        
        return boxes


    def __getitem__(self, idx):
        
        patient_id = self.data['root'][idx]

        target = self.data_rcc[idx]
        resized_img, resized_seg, pred = self.load_resized_data(self.data, idx)
        kidney_boxes = self.bounding_box(pred)

        r_box= kidney_boxes[0]
        l_box= kidney_boxes[1]

        l_crop_img, l_crop_seg, l_crop_pred, _ = self.crop(resized_img, resized_seg, pred, l_box)
        r_crop_img, r_crop_seg, r_crop_pred, _ = self.crop(resized_img, resized_seg, pred, r_box)
                            

        # select the kidney image and segment that contain tumor, ignore the other
        if self.checker(r_crop_seg) == 1:  # right contains tumor
            if self.transform:
                r_crop_img = self.transform(r_crop_img)
                r_crop_seg = self.transform(r_crop_seg)

            img, seg = self.resize_before_train(r_crop_img, r_crop_seg)
        

        else: # self.checker(l_crop_seg) == 1:  # left contains tumor
                if self.transform:
                    l_crop_img = self.transform(l_crop_img)
                    l_crop_seg = self.transform(l_crop_seg)
            
                img, seg = self.resize_before_train(l_crop_img, l_crop_seg)

    

        batch = {
            'Patient_ID': patient_id,
            'img': img,
            'pred': seg,
            'label': torch.tensor(target, dtype=torch.long)  # Target is a single label
        }


        return batch
        

In [2]:
test_data  = load_inference_data("D:\\srm_detection_pipeline\\application\\dataset\\holdout\\whole")
test_dataset = SRMDataset(data=test_data, data_rcc=pd.read_csv("D:\\srm_detection_pipeline\\application\\dataset\\labels.csv")['labels'].tolist()) 
test_loader = DataLoader(test_dataset, batch_size=1)
# returns dict with img, patient id, label and radiomics
post_proc_transform, post_pred_transform = get_bbox_transforms()
csv_data = load_bbox(test_data, post_pred_transform) 
save_dict_as_csv(csv_data, "D:\\srm_detection_pipeline\\application\\dataset\\bbox.csv")

Processing Labeled Patients: 100%|██████████| 1/1 [00:00<00:00, 917.79it/s]
Loading boxes: 100%|██████████| 1/1 [00:00<00:00,  4.36it/s]


'Dictionary saved as D:\\srm_detection_pipeline\\application\\dataset\\bbox.csv.'

In [3]:
save_gifs(test_data, post_proc_transform, post_pred_transform, output_folder="D:\\srm_detection_pipeline\\application\\dataset", tag='labeled')


KeyboardInterrupt: 

In [3]:
def get_clf_data_dict(data_loader):

    """
    Returns a dictionary with image tensor, patiend ID, label and radiomics

    """
    root_list = []
    img_list = []
    seg_list = []
    pred_list = []
    bbox_list = []
    features_lesion_list = []

    for data in tqdm(data_loader, desc="Processing Patients ..."):
        
        root, images, segs, preds, bboxes = data['Patient_ID'], data['img'], data['seg'], data['pred'], data['bbox']
        for patient, img, sg, prd, bx in tqdm(zip(root, images, segs, preds, bboxes)):
            root_list.append(patient)
            img_list.append(img)
            seg_list.append(sg)
            pred_list.append(prd)
            bbox_list.append(bx)
            hist_lesion_img = extract_features(img)
            features_lesion_list.append(hist_lesion_img)
            
    
    data_dict = dict()
    data_dict["root"] = root_list
    data_dict["img"] = img_list
    data_dict["seg"] = seg_list
    data_dict["pred"] = pred_list
    data_dict["bbox"] = bbox_list
    data_dict["radiomics"] = features_lesion_list

    return data_dict

radiomics_dict = get_clf_data_dict(test_loader)

Processing Patients ...:   0%|          | 0/1 [00:06<?, ?it/s]


NameError: name 'extract_features' is not defined

In [14]:
csv_data.keys()

dict_keys(['root', 'img', 'seg', 'pred', 'bbox', 'radiomics'])

In [4]:
radiomics_dict.keys()

dict_keys(['root', 'img', 'seg', 'pred', 'bbox', 'radiomics'])

In [None]:

import numpy as np

# Extract test data from PyTorch DataLoader
X_test_list = []
y_test_list = []

for batch in test_loader:
    imgs = batch["img"].numpy()  # Convert PyTorch tensor to NumPy
    labels = batch["label"].numpy()  # Convert labels to NumPy
    X_test_list.append(imgs)
    y_test_list.append(labels)

# Concatenate all batches into a single array
X_test_xgb = np.concatenate(X_test_list, axis=0)
y_test_xgb = np.concatenate(y_test_list, axis=0)

print("X_test_xgb shape:", X_test_xgb.shape)
print("y_test_xgb shape:", y_test_xgb.shape)


NameError: name 'extract_features' is not defined

In [4]:
# Flatten images if necessary
X_test_xgb = X_test_xgb.reshape(X_test_xgb.shape[0], -1)  # Convert to (num_samples, num_features)

In [4]:
import pandas as pd
import xgboost as xgb

xgb_model_file = "D:\\srm_detection_pipeline\\srm_detection\\models\\weights\\classification\\ccRCC_vs_non_ccRCC\\xgboost\\xgboost_model_attempt_9_more_aug_600.bin"
best_xgb_model = xgb.XGBClassifier()
best_xgb_model.load_model(xgb_model_file)

In [5]:
from evaluation.classification.evaluate import get_xgb_predictions, bootstrap_ci, evaluate_model


xgb_probs = get_xgb_predictions(best_xgb_model, X_test_xgb)

# Perform Stratified K-Fold validation
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=5)
test_results = []

roc_aucs, sensitivities, specificities = [], [], []

for fold, (train_index, test_index) in enumerate(skf.split(X_test_xgb, y_test_xgb)):
    X_true = X_test_xgb[test_index]
    y_true = y_test_xgb[test_index]
    
    y_probs = get_xgb_predictions(best_xgb_model, X_true)
    y_preds = (y_probs > 0.5).astype(int)

    metrics_report = evaluate_model(y_true, y_preds, y_probs)
    
    roc_aucs.append(metrics_report['ROC AUC'])
    sensitivities.append(metrics_report['Sensitivity'])
    specificities.append(metrics_report['Specificity'])

    test_results.append({
        'Fold': fold + 1,
        'ROC AUC': metrics_report['ROC AUC'],
        'Sensitivity': metrics_report['Sensitivity'],
        'Specificity': metrics_report['Specificity']
    })

# Convert results to DataFrame
df_results = pd.DataFrame(test_results)
print("Per-Fold Results:")
print(df_results)


NameError: name 'X_test_xgb' is not defined

In [None]:


'''xgb_clf = xgb.XGBClassifier()
xgb_clf.load_model(args.model_path)'''
#xgb_probs = get_xgb_predictions(xgb_clf, X_test_xgb)

xgb_model_file = f'./models/weights/classification/grade/xgboost/xgboost_model_attempt_8.bin'
best_xgb_model = xgb.XGBClassifier()
best_xgb_model.load_model(xgb_model_file)

xgb_probs = get_xgb_predictions(best_xgb_model, X_test_xgb)
# Stratified K-Fold on the test set
skf = StratifiedKFold(n_splits=5)
test_results = []

roc_aucs = []
sensitivities = []
specificities = []

for fold, (train_index, test_index) in enumerate(skf.split(X_test_xgb, y_test_xgb)):
    X_true = np.array([X_test_xgb[i] for i in test_index])
    y_true = np.array([y_test_xgb[i] for i in test_index])
    y_probs = get_xgb_predictions(best_xgb_model, X_true)
    y_preds = (y_probs > 0.5).astype(int)

    metrics_report = evaluate_model(y_true, y_preds, y_probs)
    roc_aucs.append(metrics_report['ROC AUC'])
    sensitivities.append(metrics_report['Sensitivity'])
    specificities.append(metrics_report['Specificity'])
    
    # Append results to the list
    test_results.append({
        'Fold': fold + 1,
        'ROC AUC': metrics_report['ROC AUC'],
        'Sensitivity': metrics_report['Sensitivity'],
        'Specificity': metrics_report['Specificity']
    })

# Convert results to a DataFrame
df_results = pd.DataFrame(test_results)

# Calculate bootstrap confidence intervals
roc_auc_ci_L, roc_auc_ci_U = bootstrap_ci(roc_aucs)
sensitivity_ci_L, sensitivity_ci_U = bootstrap_ci(sensitivities)
specificity_ci_L, specificity_ci_U  = bootstrap_ci(specificities)

# Display the per-fold results
print("Per-Fold Results:")
print(df_results)

# Calculate and display the summary statistics
summary_stats = pd.DataFrame({
    'Metric': ['ROC AUC', 'Sensitivity', 'Specificity'],
    'Mean ± Std': [
        f"{np.mean(roc_aucs):.3f} ± {np.std(roc_aucs):.3f}",
        f"{np.mean(sensitivities):.3f} ± {np.std(sensitivities):.3f}",
        f"{np.mean(specificities):.3f} ± {np.std(specificities):.3f}"
    ],
    '95% CI': [
        f"{roc_auc_ci_L:.3f} - {roc_auc_ci_U:.3f}",
        f"{sensitivity_ci_L:.3f} - {sensitivity_ci_U:.3f}",
        f"{specificity_ci_L:.3f} - {specificity_ci_U:.3f}"
    ]
})

print("\nSummary Statistics:")
print(summary_stats)


In [None]:
"""xgb_clf = xgb.XGBClassifier()
xgb_clf.load_model(args.model_path)
xgb_probs = get_xgb_predictions(xgb_clf, X_test_xgb)"""