# First step Active learning
This notebook trains and evaluates a U-Net model for bladder neck dissection segmentation.

### 0. Access Google Drive on Google Colab
Mount Google Drive to access the dataset stored in your Drive.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

---

### 1. Import Libraries

In [None]:
!pip install labelme tensorflow matplotlib numpy opencv-python scikit-learn
!pip install --upgrade tensorflow

import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, Input
from tensorflow.keras.layers import Conv2D, MaxPooling2D, UpSampling2D, concatenate
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import LearningRateScheduler, EarlyStopping
from tensorflow.keras.metrics import MeanIoU
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.models import load_model
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import json
from PIL import Image
import cv2
import random
from IPython.display import clear_output
import time
from sklearn.metrics import jaccard_score
from sklearn.utils import shuffle
import pandas as pd
pd.set_option('display.max_colwidth', None)

### Image and Mask Loader

This class loads surgical images and their corresponding segmentation masks
(from LabelMe JSON annotations).  

In [None]:
class ImageMaskLoader:
    def __init__(self, data_root, folders, labels, resize_height=256, resize_width=256, background_label=0):
        self.data_root = data_root
        self.folders = folders
        self.labels = labels
        self.resize_height = resize_height
        self.resize_width = resize_width
        self.background_label = background_label

    def load_and_process_data(self):
        images = []
        masks = []

        for folder in self.folders:
            folder_path = os.path.join(self.data_root, folder)
            for file in os.listdir(folder_path):
                if file.endswith(".jpg"):
                    image_path = os.path.join(folder_path, file)
                    json_path = image_path.replace(".jpg", ".json")

                    image = cv2.imread(image_path)
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                    original_height, original_width = image.shape[:2]
                    image_resized = cv2.resize(image, (self.resize_width, self.resize_height))

                    if not os.path.exists(json_path):
                        print(f"{file} has no corresponding JSON file. Using background mask.")
                        mask = np.full((self.resize_height, self.resize_width), self.background_label, dtype=np.uint8)
                    else:
                        with open(json_path) as f:
                            data = json.load(f)
                            mask = np.full((self.resize_height, self.resize_width), self.background_label, dtype=np.uint8)

                            for shape in data['shapes']:
                                label = shape['label']
                                if label in self.labels:
                                    points = np.array(shape['points'], dtype=np.int32)
                                    points[:, 0] = (points[:, 0] * self.resize_width / original_width).astype(int)
                                    points[:, 1] = (points[:, 1] * self.resize_height / original_height).astype(int)
                                    cv2.fillPoly(mask, [points], self.labels[label])

                    images.append(img_to_array(image_resized))
                    masks.append(mask)

        images = np.array(images, dtype="float32") / 255.0
        masks = np.array(masks, dtype="int32")
        masks = np.expand_dims(masks, axis=-1)
        masks = to_categorical(masks, num_classes=len(self.labels))

        return images, masks

- **DATA_ROOT**: Root directory of the dataset on Google Drive.  
- **FOLDERS**: List of subfolders for training data.  
- **FOLDERS_VALIDATION**: List of subfolders for expert validation data.  
- **FOLDERS_VALIDATION_NOVICE**: List of subfolders for novice validation data.  

In [None]:
DATA_ROOT = '/content/drive/MyDrive/your_path'

FOLDERS   = []
FOLDERS_VALIDATION = []
FOLDERS_VALIDATION_NOVICE = []

LABELS = {
    "background": 0,
    "bladder": 1,
    "border": 2,
    "catheter": 3,
    "gauze": 4,
    "instrument": 5,
    "mucosa": 6,
    "prostate": 7,
    "suction": 8,
    "urethra": 9,
}

In [None]:
loader = ImageMaskLoader(DATA_ROOT, FOLDERS, LABELS)
images, masks = loader.load_and_process_data()

In [None]:
loader_validation = ImageMaskLoader(DATA_ROOT, FOLDERS_VALIDATION, LABELS)
images_v, masks_v = loader_validation.load_and_process_data()

In [None]:
loader_validation_novice = ImageMaskLoader(DATA_ROOT, FOLDERS_VALIDATION_NOVICE, LABELS)
images_v_n, masks_v_n = loader_validation_novice.load_and_process_data()

### Train / Test Split

In [None]:
train_images, test_images, train_masks, test_masks = train_test_split(images, masks, test_size=0.2, random_state=42)

### IoU Calculator
This class provides functions to calculate Intersection over Union (IoU).

In [None]:
class IoUCalculator:
    def __init__(self, num_classes):
        self.num_classes = num_classes

    def calculate_iou(self, y_true, y_pred):
        iou_metric = MeanIoU(num_classes=self.num_classes)
        iou_metric.update_state(y_true, y_pred)
        return iou_metric.result().numpy()

    def calculate_class_iou(self, test_masks, y_pred_argmax, class_index):
        y_true_class = (np.argmax(test_masks, axis=-1) == class_index).astype(np.uint8)
        y_pred_class = (y_pred_argmax == class_index).astype(np.uint8)
        iou = self.calculate_iou(y_true_class, y_pred_class)
        return iou

    def calculate_mean_iou(self, test_masks, y_pred_argmax):
        class_ious = []
        for cls_idx in range(self.num_classes):
            iou = self.calculate_class_iou(test_masks, y_pred_argmax, cls_idx)
            class_ious.append(iou)
        mean_iou = np.nanmean(class_ious)
        return mean_iou

### Fine Frame Image Loader

This class loads *unlabeled fine-frame images*
(e.g., 1-s interval frames extracted for active learning).

- **Inputs**
  - `fine_frame_base_dir`: Root directory for fine-frame images  
  - `fine_frame_folders`: List of subfolders containing extracted frames  

- **Process**
  - Iterates through all `.jpg` files in the specified folders  
  - Stores relative file paths (`folder/filename`) in `self.file_names` for later reference  

In [None]:
class FineFrameImageLoader:
    def __init__(self, fine_frame_base_dir, fine_frame_folders, resize_height=256, resize_width=256):
        self.fine_frame_base_dir = fine_frame_base_dir
        self.fine_frame_folders = fine_frame_folders
        self.resize_height = resize_height
        self.resize_width = resize_width
        self.file_names = []

    def load_images(self):
        images = []
        self.file_names = []
        for folder in self.fine_frame_folders:
            folder_path = os.path.join(self.fine_frame_base_dir, folder)
            for filename in sorted(os.listdir(folder_path)):
                if filename.endswith(".jpg"):
                    img_path = os.path.join(folder_path, filename)
                    image = cv2.imread(img_path)
                    if image is not None:
                        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                        image_resized = cv2.resize(image, (self.resize_width, self.resize_height))
                        images.append(image_resized)
                        self.file_names.append(os.path.join(folder, filename))

        return np.array(images, dtype="float32") / 255.0

### Fine-frame Data Configuration

- **DATA_ROOT_FINE**: Root directory containing unlabeled fine-frame images.  
- **FINE_FOLDERS**: List of subfolders to be used (e.g., `["RARP1_fine_frame", ...]`).  
These frames (sampled at 1-s intervals) are used for active learning uncertainty estimation.

In [None]:
DATA_ROOT_FINE = '/content/drive/MyDrive/your_path/fine_frame'
FINE_FOLDERS = []

image_loader = FineFrameImageLoader(DATA_ROOT_FINE, FINE_FOLDERS)
unlabeled_images = image_loader.load_images()

df = pd.DataFrame({
    'Index': range(len(unlabeled_images)),
    'File Name': image_loader.file_names
})

### Uncertainty Calculation

- **calculate_uncertainty**  
  - *Image-wise uncertainty* (uncertainty of an image)  
  - Computes per-pixel entropy for each class probability distribution

- **calculate_pixelwise_uncertainty**  
  - *Pixel-wise uncertainty* (uncertainty at each pixel)   
  - Computes entropy at each pixel location  

In [None]:
def calculate_uncertainty(predictions):
    entropy = -np.sum(predictions * np.log(predictions + 1e-10), axis=-1)
    uncertainty = np.mean(entropy, axis=(1, 2))
    return uncertainty

def calculate_pixelwise_uncertainty(predictions):
    entropy_maps = -np.sum(predictions * np.log(predictions + 1e-10), axis=-1)
    return entropy_maps

### Load Pre-trained **Model**

In [None]:
model_path = '/content/drive/MyDrive/your_path/base_model.h5'
model = load_model(model_path)

##Prediction

In [None]:
predictions = model.predict(unlabeled_images)

### Select Most Uncertain Samples

- **Uncertainty scores** are calculated for each image and stored in the DataFrame.  
- **Ranking**: Images are sorted by their uncertainty values.  
- **Selection**: The top-50 most uncertain samples are extracted for annotation or review (active learning).  

In [None]:
uncertainties = calculate_uncertainty(predictions)
df['Uncertainty'] = uncertainties

top_n = 50
most_uncertain_indices = df['Uncertainty'].argsort()[-top_n:][::-1]

top_uncertain_df = df.iloc[most_uncertain_indices].reset_index(drop=True)

print(top_uncertain_df[['File Name', 'Uncertainty']])

In [None]:
output_csv_path = '/content/drive/MyDrive/your_path/fine_frame/AL_1_uncertain_top50.csv'
top_uncertain_df.to_csv(output_csv_path, index=False)

### Uncertainty Scoring (for Active Learning)
Compute pixel-wise entropy maps and aggregate them into image-wise uncertainty scores.

In [None]:
entropy_maps = calculate_pixelwise_uncertainty(predictions)

mean_entropies = np.mean(entropy_maps, axis=(1, 2))

### Visualization of Selected Uncertain Samples
Visualize original image, predicted mask, entropy heatmap, and metadata
for qualitative inspection.

In [None]:

for i, row in top_uncertain_df.iterrows():
    plt.figure(figsize=(20, 4))

    # 'Original Image'
    original_image_path = os.path.join(fine_frame_base_dir, row['File Name'])
    original_image = cv2.imread(original_image_path)
    if original_image is not None:
        plt.subplot(1, 6, 1)
        plt.imshow(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB))
        plt.title('Original Image')
        plt.axis('off')
    else:
        print(f"Image not found: {original_image_path}")

    # 'Predicted Mask'
    plt.subplot(1, 6, 2)
    predicted_mask = np.argmax(predictions[row['Index']], axis=-1)
    colored_mask = apply_colormap_to_mask(predicted_mask, colors)
    plt.imshow(colored_mask)
    plt.title('Predicted Mask')
    plt.axis('off')

    # 'Entropy Heatmap'
    plt.subplot(1, 6, 3)
    entropy_map = entropy_maps[row['Index']]
    plt.imshow(entropy_map, cmap='hot')
    plt.title('Entropy Heatmap')
    plt.axis('off')

    # 'Mean Entropy'
    plt.subplot(1, 6, 4)
    plt.text(0.5, 0.5, f"Mean Entropy:\n{mean_entropies[row['Index']]:.4f}", fontsize=12, ha='center', va='center')
    plt.title('Mean Entropy')
    plt.axis('off')

    # 'File Name'
    plt.subplot(1, 6, 5)
    plt.text(0.5, 0.5, row['File Name'], fontsize=10, ha='center', va='center')
    plt.title('File Name')
    plt.axis('off')

    # 'Index'
    plt.subplot(1, 6, 6)
    plt.text(0.5, 0.5, str(row['Index']), fontsize=10, ha='center', va='center')
    plt.title('Index')
    plt.axis('off')

    plt.show()

### Load Annotated Images for Active Learning

This block loads the newly annotated images from the first active learning cycle.  
These samples were selected based on uncertainty, annotated manually, and are now added
to the training dataset for further model updates.

**Note:**  
Before running this cell, please execute all previous blocks to:
1. Compute image-wise uncertainty and select the most uncertain samples.  
2. Annotate the selected frames manually (outside this notebook).  


In [None]:
new_image_dir = '/content/drive/MyDrive/your_path/fine_frame/AL_labeled_fine_frames'
new_image_folders = ['AL_1_labeled_fine_frames']

new_train = ImageMaskLoader(new_image_dir, new_image_folders, LABELS)

new_train_images, new_train_masks = new_train.load_and_process_data()

### Training Configuration

- **Learning Rate Scheduler**  
  
- **Early Stopping**  
  
- **Optimizer**  

In [None]:
def scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return lr * 0.9999

lr_scheduler = LearningRateScheduler(scheduler)

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

initial_learning_rate = 0.000001
optimizer = Adam(learning_rate=initial_learning_rate)

### Compile and Retrain Model with Newly Annotated Data

In [None]:
model_path = '/content/drive/MyDrive/your_path/base_model.h5'
model = load_model(model_path)

model.compile(optimizer=optimizer, loss=tf.keras.losses.LogCosh(), metrics=['accuracy'])

history = model.fit(
    new_train_images,
    new_train_masks,
    batch_size=8,
    epochs=50,
    validation_split=0.2,
    callbacks=[lr_scheduler, early_stopping]
)

plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

###Active Learning Model Saving

In [None]:
new_model_path = '/content/drive/MyDrive/your_path/base_model_AL_1.h5'
model.save(new_model_path)

###Model Evaluation (IoU Calculation)
1) **Test set**
2) **Expert validation set**
3) **Novice validation set**

In [None]:
num_classes =  len(labels)
iou_calculator = IoUCalculator(num_classes)
y_pred = model.predict(test_images)
y_pred_argmax = np.argmax(y_pred, axis=-1)
iou_bladder = iou_calculator.calculate_class_iou(test_masks, y_pred_argmax, class_index=1)
iou_prostate = iou_calculator.calculate_class_iou(test_masks, y_pred_argmax, class_index=7)
iou_border = iou_calculator.calculate_class_iou(test_masks, y_pred_argmax, class_index=2)
iou_catheter = iou_calculator.calculate_class_iou(test_masks, y_pred_argmax, class_index=3)
iou_gauze = iou_calculator.calculate_class_iou(test_masks, y_pred_argmax, class_index=4)
iou_instrument = iou_calculator.calculate_class_iou(test_masks, y_pred_argmax, class_index=5)
iou_mucosa = iou_calculator.calculate_class_iou(test_masks, y_pred_argmax, class_index=6)
iou_suction = iou_calculator.calculate_class_iou(test_masks, y_pred_argmax, class_index=8)
iou_urethra = iou_calculator.calculate_class_iou(test_masks, y_pred_argmax, class_index=9)
iou_background = iou_calculator.calculate_class_iou(test_masks, y_pred_argmax, class_index=0)
mean_iou = iou_calculator.calculate_mean_iou(test_masks, y_pred_argmax)

print(f"Mean IoU: {mean_iou}")
print(f"IoU for Bladder: {iou_bladder}")
print(f"IoU for Prostate: {iou_prostate}")
print(f"IoU for Border: {iou_border}")
print(f"IoU for Catheter: {iou_catheter}")
print(f"IoU for Gauze: {iou_gauze}")
print(f"IoU for Instrument: {iou_instrument}")
print(f"IoU for Mucosa: {iou_mucosa}")
print(f"IoU for Suction: {iou_suction}")
print(f"IoU for Urethra: {iou_urethra}")
print(f"IoU for Background: {iou_background}")

In [None]:
num_classes =  len(labels)
iou_calculator = IoUCalculator(num_classes)
y_pred_v = model.predict(images_v)
y_pred_v_argmax = np.argmax(y_pred_v, axis=-1)
iou_bladder_v = iou_calculator.calculate_class_iou(masks_v, y_pred_v_argmax, class_index=1)
iou_prostate_v = iou_calculator.calculate_class_iou(masks_v, y_pred_v_argmax, class_index=7)
iou_border_v = iou_calculator.calculate_class_iou(masks_v, y_pred_v_argmax, class_index=2)
iou_catheter_v = iou_calculator.calculate_class_iou(masks_v, y_pred_v_argmax, class_index=3)
iou_gauze_v = iou_calculator.calculate_class_iou(masks_v, y_pred_v_argmax, class_index=4)
iou_instrument_v = iou_calculator.calculate_class_iou(masks_v, y_pred_v_argmax, class_index=5)
iou_mucosa_v = iou_calculator.calculate_class_iou(masks_v, y_pred_v_argmax, class_index=6)
iou_suction_v = iou_calculator.calculate_class_iou(masks_v, y_pred_v_argmax, class_index=8)
iou_urethra_v = iou_calculator.calculate_class_iou(masks_v, y_pred_v_argmax, class_index=9)
iou_background_v = iou_calculator.calculate_class_iou(masks_v, y_pred_v_argmax, class_index=0)
mean_iou_v = iou_calculator.calculate_mean_iou(masks_v, y_pred_v_argmax)

print(f"Mean IoU: {mean_iou_v}")
print(f"IoU for Bladder: {iou_bladder_v}")
print(f"IoU for Prostate: {iou_prostate_v}")
print(f"IoU for Border: {iou_border_v}")
print(f"IoU for Catheter: {iou_catheter_v}")
print(f"IoU for Gauze: {iou_gauze_v}")
print(f"IoU for Instrument: {iou_instrument_v}")
print(f"IoU for Mucosa: {iou_mucosa_v}")
print(f"IoU for Suction: {iou_suction_v}")
print(f"IoU for Urethra: {iou_urethra_v}")
print(f"IoU for Background: {iou_background_v}")

In [None]:
num_classes = len(labels)
iou_calculator = IoUCalculator(num_classes)
y_pred_v_n = model.predict(images_v_n)
y_pred_v_n_argmax = np.argmax(y_pred_v_n, axis=-1)
iou_bladder_v_n = iou_calculator.calculate_class_iou(masks_v_n, y_pred_v_n_argmax, class_index=1)
iou_prostate_v_n = iou_calculator.calculate_class_iou(masks_v_n, y_pred_v_n_argmax, class_index=7)
iou_border_v_n = iou_calculator.calculate_class_iou(masks_v_n, y_pred_v_n_argmax, class_index=2)
iou_catheter_v_n = iou_calculator.calculate_class_iou(masks_v_n, y_pred_v_n_argmax, class_index=3)
iou_gauze_v_n = iou_calculator.calculate_class_iou(masks_v_n, y_pred_v_n_argmax, class_index=4)
iou_instrument_v_n = iou_calculator.calculate_class_iou(masks_v_n, y_pred_v_n_argmax, class_index=5)
iou_mucosa_v_n = iou_calculator.calculate_class_iou(masks_v_n, y_pred_v_n_argmax, class_index=6)
iou_suction_v_n = iou_calculator.calculate_class_iou(masks_v_n, y_pred_v_n_argmax, class_index=8)
iou_urethra_v_n = iou_calculator.calculate_class_iou(masks_v_n, y_pred_v_n_argmax, class_index=9)
iou_background_v_n = iou_calculator.calculate_class_iou(masks_v_n, y_pred_v_n_argmax, class_index=0)

print(f"Mean IoU (novice): {mean_iou_v_n}")
print(f"IoU for Bladder (novice): {iou_bladder_v_n}")
print(f"IoU for Prostate (novice): {iou_prostate_v_n}")
print(f"IoU for Border (novice): {iou_border_v_n}")
print(f"IoU for Catheter (novice): {iou_catheter_v_n}")
print(f"IoU for Gauze (novice): {iou_gauze_v_n}")
print(f"IoU for Instrument (novice): {iou_instrument_v_n}")
print(f"IoU for Mucosa (novice): {iou_mucosa_v_n}")
print(f"IoU for Suction (novice): {iou_suction_v_n}")
print(f"IoU for Urethra (novice): {iou_urethra_v_n}")
print(f"IoU for Background (novice): {iou_background_v_n}")