In [None]:
from google.colab import drive

drive.mount('/content/drive')

In [None]:
#!git clone https://github.com/Hikarukurosawa123/TUPIL_Kidney.git
#!git clone https://github.com/Hikarukurosawa123/TUPIL_Kidney.git drive/MyDrive/Hikaru_Colab_Workspace/TUPIL_Kidney
#%cd drive/MyDrive/Hikaru_Colab_Workspace/TUPIL_Kidney
#!git stash

!git pull origin main     # or "master", depending on the branch name


In [5]:
import os
import math
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.model_selection import KFold, StratifiedKFold
from tensorflow.keras.applications import VGG16
from tensorflow.keras import layers, models, regularizers
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score, confusion_matrix, classification_report, roc_curve, auc, roc_auc_score
from skimage.transform import resize
from collections import defaultdict

from tensorflow.keras.models import Model
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from numpy import expand_dims
import scipy.ndimage
from sklearn.utils import class_weight

# Environment detection
def is_google_colab():
    """Check if running in Google Colab environment"""
    try:
        import google.colab
        return True
    except ImportError:
        return False

def is_google_drive_mounted():
    """Check if Google Drive is mounted in Colab"""
    return os.path.exists('/content/drive/MyDrive')

# Set paths based on environment
if is_google_colab() and is_google_drive_mounted():
    # Google Colab with Google Drive mounted
    IMAGE_FOLDER = '/content/drive/MyDrive/Hikaru_Colab_Workspace/lanczos_shape_corrected_only_nc_resized_images'
    CSV_FILE = '/content/drive/MyDrive/Hikaru_Colab_Workspace/patient_eGFR_at_pocus_2025_Jul_polynomial_estimation.csv'
    MODEL_WEIGHTS_PATH = '/content/drive/MyDrive/Hikaru_Colab_Workspace/model_weights/RadImageNet-ResNet50_notop.h5'
    print("Running on Google Colab with Google Drive mounted")
elif is_google_colab():
    # Google Colab without Google Drive mounted
    IMAGE_FOLDER = '/content/lanczos_shape_corrected_only_nc_resized_images'
    CSV_FILE = '/content/patient_eGFR_at_pocus_2025_Jul_polynomial_estimation.csv'
    MODEL_WEIGHTS_PATH = '/content/model_weights/RadImageNet-ResNet50_notop.h5'
    print("Running on Google Colab without Google Drive mounted")
else:
    # Local environment
    IMAGE_FOLDER = '/Users/hikaru/Desktop/TUPIL/Code/TUPIL_Kidney/data/lanczos_shape_corrected_only_nc_resized_images'
    CSV_FILE = '/Users/hikaru/Desktop/TUPIL/Code/TUPIL_Kidney/csv/patient_eGFR_at_pocus_2025_Jul_polynomial_estimation.csv'
    MODEL_WEIGHTS_PATH = '/Users/hikaru/Desktop/TUPIL/Code/TUPIL_Kidney/data/model_weights/RadImageNet-ResNet50_notop.h5'
    print("Running locally")

print(f"IMAGE_FOLDER: {IMAGE_FOLDER}")
print(f"CSV_FILE: {CSV_FILE}")
print(f"MODEL_WEIGHTS_PATH: {MODEL_WEIGHTS_PATH}")

BATCH_SIZE = 16 # batch = 8 -> 10% batch = 16 --> doesnt work
EPOCHS = 160 # epoch = 40 --> 10% epoch = 50 --> 9%
SEED = 42

Running locally
IMAGE_FOLDER: /Users/hikaru/Desktop/TUPIL/Code/TUPIL_Kidney/data/lanczos_shape_corrected_only_nc_resized_images
CSV_FILE: /Users/hikaru/Desktop/TUPIL/Code/TUPIL_Kidney/csv/patient_eGFR_at_pocus_2025_Jul_polynomial_estimation.csv
MODEL_WEIGHTS_PATH: /Users/hikaru/Desktop/TUPIL/Code/TUPIL_Kidney/data/model_weights/RadImageNet-ResNet50_notop.h5


In [6]:
# Test environment detection and path setup
print("=== Environment Detection Test ===") 
print(f"is_google_colab(): {is_google_colab()}")
print(f"is_google_drive_mounted(): {is_google_drive_mounted()}")
print(f"Current working directory: {os.getcwd()}")
print(f"IMAGE_FOLDER exists: {os.path.exists(IMAGE_FOLDER)}")
print(f"CSV_FILE exists: {os.path.exists(CSV_FILE)}")
print(f"MODEL_WEIGHTS_PATH exists: {os.path.exists(MODEL_WEIGHTS_PATH)}")
print("=== End Test ===")


=== Environment Detection Test ===
is_google_colab(): False
is_google_drive_mounted(): False
Current working directory: /Users/hikaru/Desktop/TUPIL/Code/TUPIL_Kidney
IMAGE_FOLDER exists: True
CSV_FILE exists: True
MODEL_WEIGHTS_PATH exists: True
=== End Test ===


## 1. Data Loading & Preprocessing

In [None]:
# Patient class used to hold all information related for training
class Patient:
    def __init__(self, patient_id, egfr, image_paths):
        self.patient_id = patient_id
        self.egfr = egfr
        self.image_paths = image_paths

    def __repr__(self):
        return f"Patient({self.patient_id}, eGFR={self.egfr}, # of images={len(self.image_paths)})"

# Creates patients objects based on images and eGFR csv raw dataset
def label_img_classification_by_patient(image_folder, csv_file):
    eGFR_data = pd.read_csv(csv_file)
    eGFR_data.rename(columns={'Patient ID': 'patient_id', 'eGFR (abs/closest)': 'eGFR'}, inplace=True)
    eGFR_data['patient_id'] = eGFR_data['patient_id'].astype(int)
    eGFR_data.set_index('patient_id', inplace=True)

    patient_image_map = defaultdict(list)

    for filename in sorted(os.listdir(image_folder)):
        try:
            patient_id = int(filename.split('_')[1])  # adjust if your filename pattern changes
            if patient_id in eGFR_data.index:
                img_path_full = os.path.join(image_folder, filename)
                patient_image_map[patient_id].append(img_path_full)
            else:
                print(f"Patient ID {patient_id} not found in CSV, skipping...")
        except Exception as e:
            print(f"Error processing file {filename}: {e}")

    # Build Patient objects
    patient_objects = []
    for patient_id, image_paths in patient_image_map.items():
        egfr = eGFR_data.loc[patient_id, 'eGFR']

        egfrLabel = 1 if egfr >= 60 else 0

        patient_objects.append(Patient(patient_id, egfrLabel, image_paths))

    return patient_objects

# Prepare image
def preprocess_img(img_path):
    image = tf.io.read_file(img_path)
    image = tf.image.decode_png(image, channels=3)  # Gray-scale image
    image = tf.image.convert_image_dtype(image, tf.float32)
    # Convert TensorFlow tensor to NumPy array for compatibility with ImageDataGenerator
    return tf.keras.utils.img_to_array(image)


# WIP: NEW DATA AUGMENT SYSTEM
I really don't like the ImageDataGenerator
- Its deprecated
- It created random results
- It only performs 1 operation (ex. horizontal flip or vertical flip or crop)
- This WIP system should just take each image, and create multiple images from it, using every operation

In [None]:
def augment_images(original_images, labels, random_state):
    """Generate augmented images.
       Returns array of images and labels containing the original and augmented images.
    """
    augmented_images = original_images.copy()
    augmented_labels = labels.copy()

    deterministic_random_number_gen = np.random.default_rng(random_state)

    for image, label in zip(original_images, labels):
        # First add horizontally flipped image
        augmented_images.append(np.fliplr(image))
        augmented_labels.append(label)

        # Randomly rotate between -40 and 40 degrees (black as padding)
        angle = deterministic_random_number_gen.integers(-40, 40)
        rotated_image = scipy.ndimage.rotate(image, angle, reshape=False, mode='constant', cval=1)
        augmented_images.append(rotated_image)
        augmented_labels.append(label)

        # Rotate again
        angle = deterministic_random_number_gen.integers(-40, 40)
        rotated_image = scipy.ndimage.rotate(image, angle, reshape=False, mode='constant', cval=1)
        augmented_images.append(rotated_image)
        augmented_labels.append(label)

        # Crop image randomly
        height, width = image.shape[:2]
        random_array = deterministic_random_number_gen.random(size=4);
        # Ensure crop is a square aspect ratio
        w = int((width*0.90) * (1+random_array[0]*0.10))
        h = w
        x = int(random_array[2] * (width-w))
        y = int(random_array[3] * (height-h))

        image_crop = image[y:h+y, x:w+x, 0:3]
        image_crop = resize(image_crop, image.shape, anti_aliasing=True)
        augmented_images.append(image_crop)
        augmented_labels.append(label)

    return np.array(augmented_images), np.array(augmented_labels)

def create_dataset_wip(file_paths, labels, augment=False, batch_size=4):
    # Not using generator in this implementation
    all_images = [preprocess_img(file_path) for file_path in file_paths]
    all_labels = labels

    if augment:
        all_images, all_labels = augment_images_wip(all_images, all_labels, SEED)

    dataset = tf.data.Dataset.from_tensor_slices((all_images, all_labels))
    dataset = dataset.batch(batch_size)
    return dataset

def create_dataset_from_patients(patients, augment=False, batch_size=4):
    # Load all images + labels into arrays
    all_images = [preprocess_img(file_path) for patient in patients for file_path in patient.image_paths]
    all_labels = [patient.egfr for patient in patients for image in patient.image_paths]

    dataset = tf.data.Dataset.from_tensor_slices((all_images, all_labels))
    dataset = dataset.batch(batch_size)

    if augment:
        # Apply augmentation on-the-fly
        # data_augmentation = tf.keras.Sequential([
        #     tf.keras.layers.RandomFlip("horizontal"),  # horizontal flip
        #     tf.keras.layers.RandomTranslation(
        #         height_factor=0.3,  # vertical shift up to 30%
        #         width_factor=0.3    # horizontal shift up to 30%
        #     ),
        # ])
        data_augmentation = tf.keras.Sequential([
        tf.keras.layers.RandomFlip("horizontal"),
        tf.keras.layers.RandomRotation(0.25),
        tf.keras.layers.RandomZoom(0.1),
        ])

        # data_augmentation = tf.keras.Sequential([
        #     # Random reflections
        #     tf.keras.layers.RandomFlip("horizontal_and_vertical"),

        #     # Random rotation (0–360 degrees → full circle)
        #     tf.keras.layers.RandomRotation(1.0),   # 1.0 = full range [-180°, +180°]

        #     # Random scaling (0.8–1.2)
        #     tf.keras.layers.RandomZoom(height_factor=(-0.2, 0.2), width_factor=(-0.2, 0.2))
        # ])

        dataset = dataset.map(lambda x, y: (data_augmentation(x, training=True), y))

    return dataset



In [None]:
# 1. Get file paths and labels
# file_paths, eGFR_labels, absolute_file_paths_indices = label_img(IMAGE_FOLDER, CSV_FILE)
patients = label_img_classification_by_patient(IMAGE_FOLDER, CSV_FILE)


In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models

def conv_block(x, filters, stride=1):
    """A residual block with projection shortcut when stride > 1"""
    shortcut = x

    # First conv
    x = layers.Conv2D(filters, kernel_size=3, strides=stride, padding="same",
                      use_bias=False, kernel_initializer="he_normal")(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    # Second conv
    x = layers.Conv2D(filters, kernel_size=3, strides=1, padding="same",
                      use_bias=False, kernel_initializer="he_normal")(x)
    x = layers.BatchNormalization()(x)

    # Projection for shortcut if shape mismatch
    if stride != 1 or shortcut.shape[-1] != filters:
        shortcut = layers.Conv2D(filters, kernel_size=1, strides=stride,
                                 use_bias=False, kernel_initializer="he_normal")(shortcut)
        shortcut = layers.BatchNormalization()(shortcut)

    x = layers.Add()([x, shortcut])
    x = layers.ReLU()(x)
    return x


def build_ResNet18(input_shape=(224, 224, 3)):
    inputs = layers.Input(shape=input_shape)

    # Initial conv + maxpool
    x = layers.Conv2D(64, 7, strides=2, padding="same", use_bias=False,
                      kernel_initializer="he_normal")(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.MaxPooling2D(pool_size=3, strides=2, padding="same")(x)

    # Residual blocks
    x = conv_block(x, 64, stride=1)
    x = conv_block(x, 64, stride=1)

    x = conv_block(x, 128, stride=2)
    x = conv_block(x, 128, stride=1)

    x = conv_block(x, 256, stride=2)
    x = conv_block(x, 256, stride=1)

    x = conv_block(x, 512, stride=2)
    x = conv_block(x, 512, stride=1)

    # Global avg pool + classifier
    x = layers.GlobalAveragePooling2D()(x)

    x = layers.Dense(4096, activation="relu")(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(2048, activation="relu")(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(1024, activation="relu")(x)
    x = layers.Dropout(0.2)(x)
    outputs = layers.Dense(1, activation="sigmoid")(x)

    model = models.Model(inputs, outputs, name="ResNet18")
    return model



# Resnet Model

In [None]:
def build_resnet_model(with_transfer_learning):
    if with_transfer_learning:
      base_model = models.load_model(MODEL_WEIGHTS_PATH, compile=False)
    else:
      base_model = tf.keras.applications.ResNet50(weights=None, include_top=False, input_shape=(224, 224, 3))

    base_model.trainable = True



    model = models.Sequential([
        # layers.Input(shape=(224, 224, 1)), # Define an Input layer with the desired shape
        # layers.Conv2D(3, (3, 3), padding='same', activation='relu'), # Convert grayscale to 3 channels
        base_model,
        # layers.Flatten(input_shape=(7, 7, 512)),  # Equivalent to PyTorch's view(output.size(0), -1)
        layers.GlobalAveragePooling2D(),  # Replace Flatten
        layers.Dense(4096, activation='relu', ),
        layers.Dropout(0.3),
        layers.Dense(2048, activation='relu', ),
        layers.Dropout(0.2),
        layers.Dense(1024, activation='relu',),
        layers.Dropout(0.2),
        layers.Dense(1, activation='sigmoid'),

        # kernel_regularizer=regularizers.l2(0.01)
    ])
    return model

# Create and compile the model outside the function
model = build_resnet_model(True)
model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.00001), loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# Training With K-Fold (Patient Exclusivity)

# Helper Functions

In [None]:
def plotHistogramOfDataset(training_labels, validation_labels, n_bins):
  plt.hist(training_labels, bins=n_bins, alpha=0.5, label='Training Data', color='blue')
  plt.hist(validation_labels, bins=n_bins, alpha=0.5, label='Validation Data', color='red')
  plt.xlabel('eGFR')
  plt.ylabel('Frequency')
  plt.title(f'Histogram with {n_bins} bins')
  plt.legend()
  plt.show()

def plotTrainingHistory(history):
  # Plot training and validation loss
  plt.figure(figsize=(10, 6))
  plt.plot(history.history['loss'], label='Training Loss')
  plt.plot(history.history['val_loss'], label='Validation Loss')
  plt.xlabel('Epochs')
  plt.ylabel('Binary Crossentropy Loss')
  plt.title('Training and Validation Loss')
  plt.legend()
  plt.show()

  # Plot Accuracy
  plt.figure(figsize=(10, 6))
  plt.plot(history.history['accuracy'], label='Training Accuracy')
  plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
  plt.xlabel('Epochs')
  plt.ylabel('Accuracy')
  plt.title('Training and Validation Accuracy')
  plt.legend()
  plt.show()

# def plotAndReturnValidationTesting(val_patients, model):
#   # Collect true vs prediction per patient
#   patient_to_true_labels = {}
#   patient_to_predicted_labels = {}


#   for patient in val_patients:
#     patient_dataset = create_dataset_from_patients([patient], augment=False, batch_size=BATCH_SIZE)
#     for images, labels in patient_dataset:
#       patient_to_true_labels[patient.patient_id] = labels[0]
#       patient_to_predicted_labels[patient.patient_id] = model.predict(images).flatten()

#   # Plot True vs Predicted eGFR
#   fig, ax = plt.subplots(figsize=(10, 6))
#   for i, patient_id in enumerate(patient_to_true_labels):
#     true_label = patient_to_true_labels[patient_id]
#     predictions = patient_to_predicted_labels[patient_id]

#     # Plot all individual predictions at the x-position of true_label
#     x_vals = np.full_like(predictions, true_label, dtype=np.float32)
#     ax.scatter(x_vals, predictions, color='blue', alpha=0.6, label='Predictions' if i == 0 else "")

#     # Add faint vertical line (error bar)
#     ax.vlines(x=true_label,
#               ymin=np.min(predictions),
#               ymax=np.max(predictions),
#               color='gray',
#               alpha=0.3,
#               linewidth=2)

#   # Line of best fit
#   true_labels = list(patient_to_true_labels.values())
#   predicted_labels = list(patient_to_predicted_labels.values())
#   x_vals = np.unique(true_labels)
#   fit_fn = np.poly1d(np.polyfit(true_labels, predicted_labels, 1))
#   plt.plot(x_vals, fit_fn(x_vals), color='blue', linewidth=2, label="Fit Line")

#   # Identity line
#   plt.plot([min(true_labels), max(true_labels)],
#           [min(true_labels), max(true_labels)],
#           'r--', label="Perfect Prediction")

#   ax.set_xlabel("True Label")
#   ax.set_ylabel("Predicted Values")
#   ax.set_title("True vs Predicted eGFR")
#   ax.grid(True)
#   ax.legend()
#   plt.show()

#   return patient_to_true_labels, patient_to_predicted_labels
def plotAndReturnValidationTesting(val_patients, model):
    patient_to_true_labels = {}
    patient_to_predicted_probs_list = {}

    all_true_labels_individual = []
    all_predicted_probs_individual = []

    for patient in val_patients:
        patient_dataset = create_dataset_from_patients([patient], augment=False, batch_size=BATCH_SIZE)
        for images, labels in patient_dataset:
            true_label = labels[0].numpy()  # Single label per patient
            predictions = model.predict(images).flatten()

            patient_to_true_labels[patient.patient_id] = true_label
            patient_to_predicted_probs_list[patient.patient_id] = predictions.tolist()

            # Collect individual predictions for metrics
            all_true_labels_individual.extend(labels.numpy().flatten())
            all_predicted_probs_individual.extend(predictions)

    # Binarize predictions (e.g., threshold = 0.5)
    all_predicted_labels_individual = [1 if prob >= 0.5 else 0 for prob in all_predicted_probs_individual]


    # Plot ROC Curve
    accuracy = accuracy_score(all_true_labels_individual, all_predicted_labels_individual)
    fpr, tpr, _ = roc_curve(all_true_labels_individual, all_predicted_probs_individual)
    auc_score = roc_auc_score(all_true_labels_individual, all_predicted_probs_individual)

    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, label=f"ROC Curve (AUC = {auc_score:.4f})", color='blue')
    plt.plot([0, 1], [0, 1], 'r--', label="Random Guess")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title("ROC Curve")
    plt.legend(loc="lower right")
    plt.grid(True)
    plt.show()

    return patient_to_true_labels, patient_to_predicted_probs_list


# Hyper-parmeter tuning configs

In [None]:

def resnet_no_tranfer_learning():
  model = build_resnet_model(False)
  model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy', tf.keras.metrics.AUC(name='auc')])
  return model

def resnet_with_tranfer_learning():
  model = build_resnet_model(True)
  model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy', tf.keras.metrics.AUC(name='auc')])
  return model

hyper_parameter_tuning_configs = {
    
    'resnet_no_tranfer_learning': resnet_no_tranfer_learning,
    'resnet_with_tranfer_learning': resnet_with_tranfer_learning,
}


# Training with K-Fold (New Method)

Latest k-fold

In [None]:
#stratified k-fold cross validation with early stopping and best weights storage, no change in test set across each hyper parameter (only the validatoin, training set changes)
#used for hyper parameter tuning 

from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.utils import class_weight
from sklearn.metrics import roc_auc_score, classification_report
import numpy as np
import tensorflow as tf

# ---------------------------------------------------
# Step 1: Load patients and split into train/val vs test
# ---------------------------------------------------
all_patients = label_img_classification_by_patient(IMAGE_FOLDER, CSV_FILE)

train_and_val_patients, test_patients = train_test_split(
    all_patients, test_size=0.1, random_state=SEED
)

# ---------------------------------------------------
# Step 2: Hyperparameter tuning with stratified k-fold CV
# ---------------------------------------------------
for config_name, model_creator in hyper_parameter_tuning_configs.items():
    print(f"\n==============================")
    print(f"Training with Config: {config_name}")
    print(f"==============================")

    all_patient_egfr = [patient.egfr for patient in train_and_val_patients]

    val_fold_aucs = []
    test_fold_aucs = []

    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)

    for fold, (train_idx, val_idx) in enumerate(skf.split(train_and_val_patients, all_patient_egfr)):
        print(f"\n----- Fold {fold + 1} -----")

        train_patients = [train_and_val_patients[i] for i in train_idx]
        val_patients = [train_and_val_patients[i] for i in val_idx]

        # Plot distribution
        plotHistogramOfDataset([p.egfr for p in train_patients], [p.egfr for p in val_patients], 2)

        # Create datasets
        train_dataset_fold = create_dataset_from_patients(train_patients, augment=True, batch_size=BATCH_SIZE)
        val_dataset = create_dataset_from_patients(val_patients, augment=False, batch_size=BATCH_SIZE)
        test_dataset = create_dataset_from_patients(test_patients, augment=False, batch_size=BATCH_SIZE)

        # Compute class weights
        weights = class_weight.compute_class_weight(
            class_weight='balanced',
            classes=np.unique(np.array([p.egfr for p in train_patients]).astype(int)),
            y=np.array([p.egfr for p in train_patients]).astype(int)
        )
        class_weights_dict = dict(enumerate(weights))

        # Build model
        model = model_creator()

        # Early stopping
        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_auc', mode='max', patience=50, restore_best_weights=True
        )

        # Train
        history = model.fit(
            train_dataset_fold,
            validation_data=val_dataset,
            epochs=EPOCHS,
            callbacks=[early_stopping],
            class_weight=class_weights_dict,
            verbose=1
        )

        plotTrainingHistory(history)

        # -------------------------
        # Validation performance
        # -------------------------
        patient_to_true_labels, patient_to_predicted_probs = plotAndReturnValidationTesting(val_patients, model)

        true_labels, predicted_probs = [], []
        for patient_id in patient_to_predicted_probs:
            for prob in patient_to_predicted_probs[patient_id]:
                true_labels.append(patient_to_true_labels[patient_id])
                predicted_probs.append(prob)

        predicted_labels = [1 if p >= 0.5 else 0 for p in predicted_probs]
        val_auc = roc_auc_score(true_labels, predicted_probs)
        print(f"Validation AUC (Fold {fold + 1}): {val_auc:.4f}")
        print(classification_report(true_labels, predicted_labels, digits=4))
        val_fold_aucs.append(val_auc)

        # Save best model for fold
        model.save_weights(f"best_model_fold_{fold+1}_{config_name}.weights.h5")

        # -------------------------
        # Test performance (same fold model)
        # -------------------------
        patient_to_true_labels, patient_to_predicted_probs = plotAndReturnValidationTesting(test_patients, model)

        true_labels, predicted_probs = [], []
        for patient_id in patient_to_predicted_probs:
            for prob in patient_to_predicted_probs[patient_id]:
                true_labels.append(patient_to_true_labels[patient_id])
                predicted_probs.append(prob)

        predicted_labels = [1 if p >= 0.5 else 0 for p in predicted_probs]
        test_auc = roc_auc_score(true_labels, predicted_probs)
        print(f"Test AUC (Fold {fold + 1}): {test_auc:.4f}")
        print(classification_report(true_labels, predicted_labels, digits=4))
        test_fold_aucs.append(test_auc)

    # ---------------------------------------------------
    # Final summary for this config
    # ---------------------------------------------------
    print(f"\n==== Final Results for Config: {config_name} ====")
    print(f"Mean Validation AUC: {np.mean(val_fold_aucs):.4f} ± {np.std(val_fold_aucs):.4f}")
    print(f"Mean Test AUC:       {np.mean(test_fold_aucs):.4f} ± {np.std(test_fold_aucs):.4f}")
    print("=================================================\n")



In [None]:
# ===================================================
# Plotting utility
# ===================================================
def plot_classification_results(patient_to_true_labels, 
                                patient_to_predicted_probs, 
                                dataset_name="Dataset"):
    """Plot true class (x-axis) vs eGFR (y-axis), mark misclassifications in red."""
    true_labels, predicted_labels, egfr_values = [], [], []

    for patient in patient_to_true_labels:
        true_label = patient_to_true_labels[patient]
        probs = patient_to_predicted_probs[patient]
        avg_prob = np.mean(probs)  # average if multiple images per patient
        pred_label = 1 if avg_prob >= 0.5 else 0

        true_labels.append(true_label)
        predicted_labels.append(pred_label)
        egfr_values.append(patient.egfr)  # assumes patient object has .egfr attribute

    true_labels = np.array(true_labels)
    predicted_labels = np.array(predicted_labels)
    egfr_values = np.array(egfr_values)

    plt.figure(figsize=(8,6))
    for cls in [0, 1]:
        idx = np.where(true_labels == cls)[0]
        correct_idx = idx[true_labels[idx] == predicted_labels[idx]]
        wrong_idx   = idx[true_labels[idx] != predicted_labels[idx]]

        plt.scatter(np.full_like(correct_idx, cls), 
                    egfr_values[correct_idx], 
                    color="black", alpha=0.7, 
                    label=f"Class {cls} Correct" if cls==0 else None)

        plt.scatter(np.full_like(wrong_idx, cls), 
                    egfr_values[wrong_idx], 
                    color="red", alpha=0.7, 
                    label=f"Class {cls} Misclassified" if cls==0 else None)

    plt.xticks([0, 1], ["Class 0", "Class 1"])
    plt.xlabel("True Class")
    plt.ylabel("eGFR")
    plt.title(f"Classification Results: {dataset_name}")
    plt.legend()
    plt.grid(True, linestyle="--", alpha=0.5)
    plt.show()

In [None]:
#running stratified hold-out split with early stopping and best weights storage 
#used for final model training and testing 
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, classification_report
import numpy as np
import tensorflow as tf
EPOCHS = 100
SEED         = 42
EPOCHS       = 100
N_RUNS       = 5

# ===================================================
# Load patients
# ===================================================
all_patients = label_img_classification_by_patient(IMAGE_FOLDER, CSV_FILE)
all_labels   = np.array([p.egfr for p in all_patients])  # patient-level labels

# ===================================================
# Run 5 stratified hold-out splits
# ===================================================

for config_name, model_creator in hyper_parameter_tuning_configs.items():
    print(f"\n==============================")
    print(f"Training with Config: {config_name}")
    print(f"==============================")
    val_aucs = []

    test_aucs = []
    for run in range(N_RUNS):
        print(f"\n==============================")
        print(f" Hold-out Run {run+1}/{N_RUNS} ")
        print(f"==============================")

        # Outer stratified split (90% train+val, 10% test)
        trainval_patients, test_patients, trainval_labels, test_labels = train_test_split(
            all_patients,
            all_labels,
            test_size=0.1,
            random_state=SEED + run,
            stratify=all_labels
        )

        # Inner stratified split (for early stopping)
        train_patients, val_patients, train_labels, val_labels = train_test_split(
            trainval_patients,
            trainval_labels,
            test_size=0.2,
            random_state=SEED + run,
            stratify=trainval_labels
        )

        # Build datasets
        train_dataset = create_dataset_from_patients(train_patients, augment=True,  batch_size=BATCH_SIZE)
        val_dataset   = create_dataset_from_patients(val_patients,   augment=False, batch_size=BATCH_SIZE)

        # Build and compile model
        model = model_creator()

        # Early stopping
        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor="val_auc", mode="max", patience=50, restore_best_weights=True
        )

        # Train
        history = model.fit(
            train_dataset,
            validation_data=val_dataset,
            epochs=EPOCHS,
            callbacks=[early_stopping],
            verbose=1
        )
        plotTrainingHistory(history)

        # ===================================================
        # Evaluate TRAIN set (patient-level)
        # ===================================================
        train_true, train_probs = plotAndReturnValidationTesting(train_patients, model)
        plot_classification_results(train_true, train_probs, dataset_name="Training Set")


        # -------------------------
        # Validation performance
        # -------------------------
        patient_to_true_labels, patient_to_predicted_probs = plotAndReturnValidationTesting(val_patients, model)

        true_labels, predicted_probs = [], []
        for patient_id in patient_to_predicted_probs:
            for prob in patient_to_predicted_probs[patient_id]:
                true_labels.append(patient_to_true_labels[patient_id])
                predicted_probs.append(prob)

        predicted_labels = [1 if p >= 0.5 else 0 for p in predicted_probs]
        val_auc = roc_auc_score(true_labels, predicted_probs)
        val_aucs.append(val_auc)
        print(f"Validation AUC (90/10): {val_auc:.4f}")
        print(classification_report(true_labels, predicted_labels, digits=4))
        plot_classification_results(patient_to_true_labels, patient_to_predicted_probs, dataset_name="Validation Set")

        # ===================================================
        # Evaluate on test set (PATIENT LEVEL)
        # ===================================================

        patient_to_true_labels, patient_to_predicted_probs = plotAndReturnValidationTesting(test_patients, model)


        true_labels, predicted_probs = [], []
        for patient_id in patient_to_predicted_probs:
            for prob in patient_to_predicted_probs[patient_id]:
                true_labels.append(patient_to_true_labels[patient_id])
                predicted_probs.append(prob)

        predicted_labels = [1 if p >= 0.5 else 0 for p in predicted_probs]
        test_auc = roc_auc_score(true_labels, predicted_probs)
        test_aucs.append(test_auc)
        print(f"Test AUC (Run {run}): {test_auc:.4f}")
        print(classification_report(true_labels, predicted_labels, digits=4))
        plot_classification_results(patient_to_true_labels, patient_to_predicted_probs, dataset_name="Test Set")

# ===================================================
# Final summary across runs
# ===================================================
print("\n==============================")
print("\n==============================")
print(" Validation Test Results ")
print("==============================")
print(f"Mean Validation AUC: {np.mean(val_aucs):.4f} ± {np.std(val_aucs):.4f}")

print(" Final Hold-out Test Results ")
print("==============================")
print(f"Mean Test AUC: {np.mean(test_aucs):.4f} ± {np.std(test_aucs):.4f}")


# Save Model to File

In [None]:
model_name = 'resnet_radimagenet_no_masking_lr0001__1346596590793527366'
# Please do not set to true accidently, to ensure models to not get erased
overwrite = False

# Set model save path based on environment
if is_google_colab() and is_google_drive_mounted():
    model_save_path = '/content/drive/MyDrive/uttiya_test_lab/model_weights/' + model_name + '.keras'
elif is_google_colab():
    model_save_path = '/content/model_weights/' + model_name + '.keras'
else:
    model_save_path = '/Users/hikaru/Desktop/TUPIL/Code/TUPIL_Kidney/data/model_weights/' + model_name + '.keras'

model.save(model_save_path, overwrite=overwrite)

# Load model and show performance

In [None]:
# Load model
# Set model load path based on environment
if is_google_colab() and is_google_drive_mounted():
    model_load_path = '/content/drive/MyDrive/uttiya_test_lab/model_weights/' + model_name + '.keras'
elif is_google_colab():
    model_load_path = '/content/model_weights/' + model_name + '.keras'
else:
    model_load_path = '/Users/hikaru/Desktop/TUPIL/Code/TUPIL_Kidney/data/model_weights/' + model_name + '.keras'

model = tf.keras.models.load_model(model_load_path)

# Predict on the test set
true_labels = []
predicted_labels = []

for images, labels in test_dataset:
    preds = model.predict(images)
    true_labels.extend(labels.numpy().flatten())
    predicted_labels.extend(preds.flatten())



# Plot True vs Predicted eGFR
plt.figure(figsize=(8, 8))
plt.scatter(true_labels, predicted_labels, c='blue', alpha=0.5)
# Draw line of best fit for the prediction (to show trend)
plt.plot(np.unique(true_labels), np.poly1d(np.polyfit(true_labels, predicted_labels, 1))(np.unique(true_labels)))

plt.plot([min(true_labels), max(true_labels)], [min(true_labels), max(true_labels)], 'r--')  # Perfect prediction line
plt.xlabel('True eGFR')
# Set scale to be 20-100
plt.xlim(15, 115)
plt.ylim(15, 110)
plt.ylabel('Predicted eGFR')
plt.title('True vs Predicted eGFR')
plt.show()

correlation = np.corrcoef(true_labels, predicted_labels)[0, 1]
print(f"Correlation between True and Predicted eGFR: {correlation:.2f}")


# Evaluate
results = model.evaluate(test_dataset)
print(f"Test Loss: {results[0]}, Test MAE: {results[1]}")

# Compute metrics
mae = mean_absolute_error(true_labels, predicted_labels)
rmse = np.sqrt(mean_squared_error(true_labels, predicted_labels))
r2 = r2_score(true_labels, predicted_labels)

print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"R² Score: {r2:.2f}")

# Display Feature Maps

In [None]:
# Show feature maps of conv layers

for layer in model.layers[1].layers:
  # print(layer.name)

  if 'conv' not in layer.name:
      continue
  filters , bias = layer.get_weights()
  print(layer.name , filters.shape)

filters, bias = model.layers[1].layers[1].get_weights()

# normalize filter values to 0-1 so we can visualize them
f_min, f_max = filters.min(), filters.max()
filters = (filters - f_min) / (f_max - f_min)

n_filters =6
ix=1
fig = plt.figure(figsize=(20,15))
for i in range(n_filters):
    # get the filters
    f = filters[:,:,:,i]
    for j in range(3):
        # subplot for 6 filters and 3 channels
        plt.subplot(n_filters,3,ix)
        plt.imshow(f[:,:,j] ,cmap='gray')
        ix+=1
#plot the filters
plt.show()

# Visualization of first block
first_block_model = Model(inputs=model.layers[1].inputs, outputs=model.layers[1].layers[1].output)

image = load_img(IMAGE_FOLDER + '/Patient_8_Resized_Image_1.png' , target_size=(224,224))

# convert the image to an array
image = img_to_array(image)
# expand dimensions so that it represents a single 'sample'
image = expand_dims(image, axis=0)

image = preprocess_input(image)

#calculating features_map
features = first_block_model.predict(image)

fig = plt.figure(figsize=(20,15))
for i in range(1,features.shape[3]+1):
    fig.suptitle("BLOCK_1", fontsize=20)
    plt.subplot(8,8,i)
    plt.imshow(features[0,:,:,i-1] , cmap='gray')

plt.show()

# Visualization of other blocks
blocks = [ 2, 5 , 9 , 13 , 17]
outputs = [model.layers[1].layers[i].output for i in blocks]

model2 = Model(inputs= model.layers[1].inputs, outputs = outputs)

feature_map = model2.predict(image)

for i,fmap in zip(blocks,feature_map):
    fig = plt.figure(figsize=(20,15))
    #https://stackoverflow.com/a/12444777
    fig.suptitle("BLOCK_{}".format(i) , fontsize=20)
    for i in range(1,features.shape[3]+1):

        plt.subplot(8,8,i)
        plt.imshow(fmap[0,:,:,i-1] , cmap='gray')

plt.show()