# 1. Imports and Setup
You'll begin by importing necessary libraries, including your GeometricCalibrator library and other standard packages like tensorflow, scikit-learn, or pytorch for the CNN part, and numpy or pandas for data manipulation.

In [1]:
import sys
import os
import numpy as np
import tensorflow as tf

# Add the parent directory to the sys.path so Python can find the utils module
sys.path.append(os.path.abspath('..'))

2024-11-14 14:08:45.498834: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-14 14:08:45.525561: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-14 14:08:45.533594: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-14 14:08:45.555077: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
from calibrators.geometric_calibrators import GeometricCalibrator
from utils.logging_config import *

setup_logging()


# 2. Step 1: Data Retrieval
This part loads your dataset. It could be MNIST, CIFAR-10, or any dataset you're working with. Depending on the experiment, you may switch between different datasets to test the generality of your method.

In [3]:
from keras.src.datasets.mnist import load_data
from sklearn.model_selection import train_test_split

# Split and save the data for reproducible experiments (to be done once)
# Load MNIST data
(train_X_original, train_y_original), (test_X_original, test_y_original) = load_data()

# Combine train and test data for further splitting
data = np.concatenate((train_X_original, test_X_original), axis=0)
labels = np.concatenate((train_y_original, test_y_original), axis=0)

# Perform the splitting dynamically
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=0)  # 0.25 * 0.8 = 0.2



# 3. Step 2: Data Augmentation
Here, you'll apply different augmentation techniques (e.g., rotation, noise addition, or translation). Each augmentation method is treated as a different experiment. You'll want to apply these transformations to the training data before passing them through the model.

In [4]:
# Ensure the shape is correct by only adding the channel dimension once
# If you already added a channel, avoid calling expand_dims again
if X_train.ndim == 3:  # Check if the array is missing the channel dimension
    X_train = np.expand_dims(X_train, axis=-1)  # Shape becomes (batch_size, 28, 28, 1)
if X_test.ndim == 3:
    X_test = np.expand_dims(X_test, axis=-1)
if X_val.ndim == 3:
    X_val = np.expand_dims(X_val, axis=-1)

# Augmentation methods
def augment_data(images, method='rotation'):
    if method == 'rotation':
        datagen = tf.keras.preprocessing.image.ImageDataGenerator(rotation_range=90)
    elif method == 'shift':
        datagen = tf.keras.preprocessing.image.ImageDataGenerator(width_shift_range=0.1, height_shift_range=0.1)
    elif method == 'noise':
        noise = np.random.normal(loc=0.0, scale=0.1, size=images.shape)
        return np.clip(images + noise, 0., 1.)
    
    # Ensure images have the correct shape for ImageDataGenerator (rank 4)
    if images.ndim != 4:
        raise ValueError(f"Input to `ImageDataGenerator` should be rank 4. Got shape: {images.shape}")

    datagen.fit(images)
    
    # Use .flow() to create an iterator and retrieve augmented images
    augmented_images = datagen.flow(images, batch_size=len(images), shuffle=False)
    
    # Convert iterator output to Numpy array
    return next(augmented_images)  # Retrieve the first batch of augmented images

# Apply the selected augmentation method
augmented_train_images = augment_data(X_train, method='rotation')

# 4. Step 3: Build Stability Space using CNN + Pooling
You now use the augmented data and pass it through a convolutional neural network with pooling to create the stability space before performing geometric separation.

In [5]:
import os

# Define file path to save the trained model
model_path = 'saved_models/cnn_model.keras'

# Define CNN model with input shape of (28, 28, 1) for grayscale images
def build_cnn_model():
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),  # Adjusted input shape
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')  # 10 classes for MNIST
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Check if model already exists, load it if found, otherwise train a new model
if os.path.exists(model_path):
    print("Loading pre-trained model...")
    cnn_model = tf.keras.models.load_model(model_path)
else:
    print("Training new model...")
    cnn_model = build_cnn_model()
    
    # Train the CNN model
    cnn_model.fit(augmented_train_images, y_train, epochs=10, validation_data=(X_val, y_val))
    
    # Save the trained model
    os.makedirs(os.path.dirname(model_path), exist_ok=True)  # Create directory if not exists
    cnn_model.save(model_path)
    print(f"Model saved at {model_path}")

# Use the trained model to predict probabilities on validation and test data
features_val = cnn_model.predict(X_val)
features_test = cnn_model.predict(X_test)

Loading pre-trained model...
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step


# 5. Step 4: Apply Geometric Calibration
Once you have the features, pass them through your GeometricCalibrator to compute the stability space and perform uncertainty calibration.

In [6]:
import bisect
import concurrent
from itertools import repeat
# import scann
# from annoy import AnnoyIndex
# import nmslib
# import hnswlib
import torch
import numpy as np
from sklearn.decomposition import PCA
import tensorflow as tf
from math import sqrt

import faiss
import numpy as np
from scipy.optimize import optimize, minimize
import h5py
import os
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import ElasticNet, Lasso, LinearRegression
from sklearn.neighbors import NearestNeighbors
from scipy.optimize import curve_fit
from sklearn.isotonic import IsotonicRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import json
import scipy.stats
from tqdm import tqdm
import torchvision.transforms as transforms
import shutil
import time
import logging
from utils.logging_config import setup_logging
# geometric_calibrators.py
import numpy as np
import logging
from sklearn.isotonic import IsotonicRegression
from tqdm import tqdm

from calibrators.base_calibrator import BaseCalibrator
from utils.utils import StabilitySpace, Compression, calc_balanced_acc
from utils.logging_config import setup_logging
from sklearn.metrics import balanced_accuracy_score


In [7]:
from sklearn.neighbors import NearestNeighbors
import faiss
import numpy as np
import concurrent.futures

class StabilitySpace:
    """
    Class to compute stability and geometric values for the input X using various similarity search libraries.
    """

    def __init__(self, X_train, y_train, compression=None, library='knn', metric='minkowski', num_labels=None):
        """
        Initialize the stability space by compressing the input data (optional) and setting up nearest neighbor models.
        """
        self.logger = logging.getLogger(self.__class__.__name__)
        self.logger.info(f"Initializing StabilitySpace with {library} library and {metric} metric.")

        self.metric = metric
        self.library = library
        self.num_labels = num_labels or len(set(y_train))
        self.compression = compression

        if self.compression:
            X_train, y_train = self.compression(X_train, y_train)

        self.X_train = X_train
        self.y_train = y_train

        # Initialize nearest neighbor models based on library
        if library == 'faiss':
            self._initialize_faiss()
        elif library == 'knn':
            self._initialize_knn()
        elif library == 'separation':
            self.logger.info("Using separation-based stability calculation.")
        else:
            raise ValueError(f"Unsupported library: {library}")
        
    def _initialize_faiss(self):
        """
        Initialize FAISS indices for each label.
        """
        self.same_nbrs = {}
        self.other_nbrs = {}
    
        # Ensure X_train is 2D for FAISS (flatten images if needed)
        if len(self.X_train.shape) > 2:
            self.logger.debug("Flattening X_train for FAISS compatibility.")
            self.X_train = self.X_train.reshape(self.X_train.shape[0], -1).astype('float32')
    
        dim = self.X_train.shape[1]
        for label in range(self.num_labels):
            idx_same = np.where(self.y_train == label)[0]
            idx_other = np.where(self.y_train != label)[0]
    
            # Build FAISS indices
            self.same_nbrs[label] = faiss.IndexFlatL2(dim)
            self.same_nbrs[label].add(self.X_train[idx_same])
    
            self.other_nbrs[label] = faiss.IndexFlatL2(dim)
            self.other_nbrs[label].add(self.X_train[idx_other])

    def _initialize_knn(self):
        """
        Initialize NearestNeighbors models for each label, flattening input if necessary.
        """
        self.same_nbrs = []
        self.other_nbrs = []
    
        # Flatten X_train if it has more than 2 dimensions (e.g., images)
        if len(self.X_train.shape) > 2:
            self.logger.debug("Flattening X_train for KNN compatibility.")
            self.X_train = self.X_train.reshape(self.X_train.shape[0], -1).astype('float32')
    
        for label in range(self.num_labels):
            idx_same = np.where(self.y_train == label)[0]
            idx_other = np.where(self.y_train != label)[0]
    
            # Initialize NearestNeighbors for KNN library
            same_nn = NearestNeighbors(n_neighbors=1, metric=self.metric).fit(self.X_train[idx_same])
            other_nn = NearestNeighbors(n_neighbors=1, metric=self.metric).fit(self.X_train[idx_other])
    
            self.same_nbrs.append(same_nn)
            self.other_nbrs.append(other_nn)

    def _stability_faiss(self, valX, val_y_pred):
        """
        Calculate stability using FAISS with a progress indicator.
        """
        self.logger.info("Calculating stability using FAISS.")
        stability = np.zeros(len(valX))
        predicted_labels = np.argmax(val_y_pred, axis=1) if len(val_y_pred.shape) > 1 else val_y_pred
    
        for i in tqdm(range(len(valX)), desc="Calculating Stability (FAISS)", unit="sample"):
            x = valX[i].reshape(1, -1).astype('float32')
            pred_label = int(predicted_labels[i])
    
            try:
                _, dist_same = self.same_nbrs[pred_label].search(x, 1)
                _, dist_other = self.other_nbrs[pred_label].search(x, 1)
                stability[i] = (dist_other[0][0] - dist_same[0][0]) / 2
            except Exception as e:
                self.logger.error(f"Error in FAISS stability calculation for sample {i}: {e}")
                stability[i] = np.nan
    
        return stability
    
    def _stability_knn(self, valX, val_y_pred):
        """
        Calculate stability using KNN with a progress indicator.
        """
        self.logger.info("Calculating stability using KNN.")
        stability = np.zeros(len(valX))
        predicted_labels = np.argmax(val_y_pred, axis=1) if len(val_y_pred.shape) > 1 else val_y_pred
    
        for i in tqdm(range(len(valX)), desc="Calculating Stability (KNN)", unit="sample"):
            x = valX[i].reshape(1, -1)
            pred_label = int(predicted_labels[i])
    
            try:
                dist_same, _ = self.same_nbrs[pred_label].kneighbors(x)
                dist_other, _ = self.other_nbrs[pred_label].kneighbors(x)
                stability[i] = (dist_other[0][0] - dist_same[0][0]) / 2
            except Exception as e:
                self.logger.error(f"Error in KNN stability calculation for sample {i}: {e}")
                stability[i] = np.nan
    
        return stability
                
    def _stability_separation(self, testX, pred_y, norm='L2', parallel=False):
        """
        Calculate separation-based stability with progress tracking.
        """
        self.logger.info("Calculating stability using separation method.")
        
        if parallel:
            self.logger.debug("Entering parallel separation calculation with progress tracking.")
            
            # Use a progress bar to track the entire operation in parallel mode
            results = list(tqdm(self._sep_calc_parallel(testX, pred_y, norm=norm), 
                                desc="Calculating Separation (Parallel)", unit="sample"))
            
            self.logger.debug("Completed parallel separation calculation.")
            return np.array(results)
        else:
            self.logger.debug("Entering sequential separation calculation with progress tracking.")
            
            # Call _sep_calc with tqdm integrated for sequential calculations
            results = np.array(self._sep_calc(testX, pred_y, norm=norm))
            
            self.logger.debug("Completed sequential separation calculation.")
            return results

    def _sep_calc_parallel(self, testX, pred_y, norm='L2'):
        """
        Calculate the separation of all test/val examples in parallel with progress tracking.
        """
        print("Entered _sep_calc_parallel")
        self.logger.debug("Starting parallel separation calculation.")
        
        with concurrent.futures.ProcessPoolExecutor() as executor:
            # Submit each task to the executor and track progress with tqdm
            futures = [
                executor.submit(self._sep_calc_point, x, self.X_train, self.y_train, pred, norm)
                for x, pred in zip(testX, pred_y)
            ]
            
            print(f"Submitted {len(futures)} tasks to the executor.")
            self.logger.debug(f"Submitted {len(futures)} tasks to the executor.")
            
            # Use tqdm to show progress as futures complete
            separation = []
            for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), 
                               desc="Calculating Separation (Parallel)", unit="sample"):
                try:
                    result = future.result()  # Get the result of each future
                    separation.append(result)
                except Exception as e:
                    self.logger.error(f"Error in parallel separation calculation: {e}")
            
            print("Completed gathering results from futures.")
            self.logger.debug("Completed gathering results from futures.")
            
        return separation

    def _sep_calc(self, testX, pred_y, norm):
        """
        Calculate the separation of all test/val examples without parallel processing, with progress tracking.
        """
        print("Started _sep_calc with tqdm progress bar")
        self.logger.debug("Starting sequential separation calculation with tqdm progress bar.")
        
        # Use tqdm to track progress over testX for the sequential calculation
        results = []
        for i, x in tqdm(enumerate(testX), desc="Calculating Separation (Sequential)", unit="sample", total=len(testX)):
            result = self._sep_calc_point(x, self.X_train, self.y_train, pred_y[i], norm)
            results.append(result)
            
        print("Completed _sep_calc with tqdm progress bar")
        self.logger.debug("Completed sequential separation calculation with tqdm progress bar.")
        return results

    def _sep_calc_point(self, x, X_train, y_train, y_pred, norm='L2'):
        """
        Calculate the separation for a single test instance.
        """
        self.logger.debug("Started _sep_calc_point")
        
        # Ensure `y_pred` is a scalar
        if hasattr(y_pred, "__len__") and len(y_pred) > 1:
            y_pred = np.argmax(y_pred)  # Convert probability vector to a class label if needed
    
        # Flatten `x` if it has more than one dimension
        if x.ndim > 1:
            x = x.flatten()
        
        # Determine norm type
        norm_val = {'L1': 1, 'L2': 2, 'Linf': np.inf}.get(norm, 2)
        
        # Compute distances, ensuring `train` is also flattened if needed
        same = [(np.linalg.norm(x - train.flatten(), norm_val), index) 
                for index, train in enumerate(X_train) if y_train[index] == y_pred]
        others = [(np.linalg.norm(x - train.flatten(), norm_val), index) 
                  for index, train in enumerate(X_train) if y_train[index] != y_pred]
        
        same.sort(key=lambda x: x[0])
        others.sort(key=lambda x: x[0])
        
        min_r = same[0][0] + 2 * others[0][0]
        sep_other = min_r
        for o in others:
            sep_same = np.NINF
            if o[0] > min_r:
                break
            for s in same:
                if s[0] > min(min_r, o[0]) and o[0] > same[0][0]:
                    break
                x_s = X_train[s[1]].flatten()  # Ensure x_s is also flattened
                x_o = X_train[o[1]].flatten()  # Ensure x_o is also flattened
                sep_same = max(self._two_point_sep_calc(x, x_s, x_o, norm_val), sep_same)
            sep_other = min(sep_same, sep_other)
            min_r = same[0][0] + 2 * max(0, sep_other)
        
        self.logger.debug("Completed _sep_calc_point")
        
        return sep_other

    def _two_point_sep_calc(self, x, x1, x2, norm=2):
        """
        Calculate the separation parameter for a single test point and two nearest points.
        """
        a = np.linalg.norm(x1 - x, norm)
        b = np.linalg.norm(x2 - x, norm)
        c = np.linalg.norm(x1 - x2, norm)
        return ((b ** 2 - a ** 2) / (2 * c))

    def calc_stab(self, X_val, y_val_pred, timeout=1800):
        """
        Calculate stability for the test set with a timeout and track the time taken.
        """
        if self.compression:
            X_val, _ = self.compression(X_val, None, train=False)
    
        self.logger.info(f"Calculating stability using {self.library}.")
        start_time = time.time()  # Start timing
    
        if self.library == 'faiss':
            stability = self._stability_faiss(X_val, y_val_pred)
        elif self.library == 'knn':
            stability = self._stability_knn(X_val, y_val_pred)
        elif self.library == 'separation':
            stability = self._stability_separation(X_val, y_val_pred)
        else:
            raise ValueError(f"Unsupported library: {self.library}")
    
        end_time = time.time()  # End timing
        elapsed_time = end_time - start_time
        self.logger.info(f"Time taken for {self.library} stability calculation: {elapsed_time:.2f} seconds")
    
        # Optionally log the first 200 stability scores for debugging
        self.logger.debug(f"First 200 stability scores: {stability[:200]}")
    
        return stability


In [8]:
setup_logging()
logger = logging.getLogger(__name__)


class GeometricCalibrator(BaseCalibrator):
    """
    Class serving as a wrapper for the geometric calibration method (stability/separation).
    """

    def __init__(self, model, X_train, y_train, fitting_func=None, compression_mode=None, compression_param=None,
                 metric='l2', stability_space=None, library='faiss'):
        """
        Initializes the GeometricCalibrator with a model, stability space, and calibration function.

        Args:
            model: The model to be calibrated (with `predict` and `predict_proba` methods).
            X_train: Training data (flattened images).
            y_train: Training labels.
            fitting_func: Custom fitting function (default: IsotonicRegression).
            compression_mode: Compression mode for data.
            compression_param: Parameter controlling the compression level.
            metric: Distance metric for stability/separation calculations.
            stability_space: Optional custom StabilitySpace instance. If not provided, one is initialized automatically.
            library: The library used for stability calculation (default is 'faiss').
        """
        super().__init__()
        self.model = model
        self.popt = None
        self._fitted = False

        # Determine the number of classes (unique labels in y_train)
        self.num_labels = len(np.unique(y_train))  # Fix: Initialize num_labels based on the training labels

        # Default to IsotonicRegression if no custom fitting function is provided
        self.fitting_func = fitting_func if fitting_func else IsotonicRegression(out_of_bounds="clip")

        # Use provided stability space or create a new one with the default settings
        if stability_space:
            self.stab_space = stability_space  # User provided custom StabilitySpace
            logger.info(f"{self.__class__.__name__}: Using custom StabilitySpace provided by user.")
        else:
            # Automatically initialize StabilitySpace with defaults if not provided
            self.stab_space = StabilitySpace(X_train, y_train,
                                             compression=Compression(compression_mode, compression_param),
                                             library=library, metric=metric)
            logger.info(f"{self.__class__.__name__}: Initialized StabilitySpace with default settings"
                        f" (library: {library}, metric: {metric}).")

        logger.info(f"Initialized {self.__class__.__name__} with model {self.model.__class__.__name__}"
                    f" and fitting function {self.fitting_func.__class__.__name__}.")

    def fit(self, X_val, y_val):
        """
        Fits the calibrator with the validation data using rounded stability and balanced accuracy.
    
        Args:
            X_val: Validation data (flattened images).
            y_val: Validation labels.
        """
        logger.info(f"{self.__class__.__name__}: Fitting with validation data using balanced accuracy and rounded stability.")
    
        try:
            # Step 1: Predict on validation data
            y_pred_val = self.model.predict(X_val)
            y_pred_classes = np.argmax(y_pred_val, axis=1)  # Convert predictions to class labels
    
            # Step 2: Compute stability values based on predictions
            stability_val = self.stab_space.calc_stab(X_val, y_pred_val)
            logger.info(f"Stability values (first 200): {stability_val[:200]}")  # Log a sample of the stability values
    
            # Step 3: Round the stability values for binning
            round_digits = 2  # Precision for rounding stability values
            rounded_stability = np.round(stability_val, decimals=round_digits)
            unique_stabilities = np.unique(rounded_stability)
    
            # Step 4: Calculate accuracy for each unique stability value
            stability_accuracy = {}
            i = 0
            for stab in unique_stabilities:
                indices = np.where(rounded_stability == stab)[0]  # Get indices of points with this stability
                y_true_stab = y_val[indices]  # Get true labels for these points
                y_pred_stab = y_pred_classes[indices]  # Get predicted labels for these points
    
                # Calculate balanced accuracy for this stability value
                if len(np.unique(y_true_stab)) > 1:  # Ensure we have more than one class
                    
                    acc = balanced_accuracy_score(y_true_stab, y_pred_stab)
                else:
                    acc = np.mean(y_true_stab == y_pred_stab)  # Use normal accuracy for single-class stability
    
                stability_accuracy[stab] = acc
                if i % 350 == 0:
                    logger.info(f"Stability {stab}: Accuracy = {acc}")
                    logger.info(f"True labels: {y_true_stab[:10]}")
                    logger.info(f"Predicted labels: {y_pred_stab[:10]}")
                i += 1
    
            # Step 5: Prepare calibration data: (rounded_stability, accuracy) pairs
            calibration_data = [(stab, stability_accuracy[stab]) for stab in stability_accuracy]
    
            # Step 6: Fit the provided fitting function (e.g., IsotonicRegression) on the calibration data
            stability_vals, accuracies = zip(*calibration_data)
            self.popt = self.fitting_func.fit(np.array(stability_vals).reshape(-1, 1), np.array(accuracies))
    
            self._fitted = True
            logger.info(f"{self.__class__.__name__}: Successfully fitted using stability-accuracy pairs and {self.fitting_func.__class__.__name__}.")
    
        except Exception as e:
            logger.error(f"{self.__class__.__name__}: Failed to fit with error: {e}")
            raise
            
    def calibrate(self, X_test):
        """
        Calibrates the test data based on the fitted model.
    
        Args:
            X_test: Test data (flattened images).
    
        Returns:
            np.ndarray: Calibrated probability matrix for each image and class.
        """
        if not self._fitted:
            raise ValueError("You must fit the calibrator before using it.")
    
        logger.info(f"{self.__class__.__name__}: Calibrating test data.")
    
        try:
            # Predict on the test data using the trained model (get predicted probabilities for all classes)
            y_test_pred = self.model.predict(X_test)
            y_test_labels = np.argmax(y_test_pred, axis=1)  # Get predicted class labels from probabilities
    
            # Initialize progress bar using tqdm
            num_samples = X_test.shape[0]
            num_classes = y_test_pred.shape[1]
            calibrated_probs = np.zeros((num_samples, num_classes))  # Initialize a matrix to store calibrated probabilities
    
            logger.info(f"Starting calibration for {num_samples} samples and {num_classes} classes.")
    
            # Compute stability for the predicted probabilities
            stability_test = self.stab_space.calc_stab(X_test, y_test_pred)
            logger.info(f"Stability values during calibration (first 10): {stability_test[:10]}")  # Add logging
    
            # Apply the fitted calibration function to the stability values
            calibrated_values = self.popt.predict(stability_test.reshape(-1, 1))
            logger.info(f"Calibrated values (first 10): {calibrated_values[:10]}")  # Add logging
    
            # Distribute the calibrated values across the predicted class
            for i in range(X_test.shape[0]):
                # Assign the calibrated probability to the predicted class label
                calibrated_probs[i, y_test_labels[i]] = calibrated_values[i]
                
                # Distribute remaining probability equally across other classes
                remaining_prob = (1 - calibrated_values[i]) / (self.num_labels - 1)
                for j in range(self.num_labels):
                    if j != y_test_labels[i]:
                        calibrated_probs[i, j] = remaining_prob
    
            # Ensure probabilities are in [0, 1] and sum to 1
            calibrated_probs = np.clip(calibrated_probs, 0, 1)
            calibrated_probs = calibrated_probs / calibrated_probs.sum(axis=1, keepdims=True)
    
            logger.info(f"{self.__class__.__name__}: Calibration successful.")
    
            return calibrated_probs
    
        except Exception as e:
            logger.error(f"{self.__class__.__name__}: Calibration failed with error: {e}")
            raise


In [9]:
# Initialize Geometric Calibrator with FAISS
geo_calibrator_faiss = GeometricCalibrator(model=cnn_model, X_train=augmented_train_images, y_train=y_train, library='faiss')

# Fit the calibrator using FAISS on the validation data
geo_calibrator_faiss.fit(X_val, y_val)


2024-11-14 14:09:23,844 - INFO - Initialized GeometricCalibrator with n_classes=None, bins=15, temperature=1.0
2024-11-14 14:09:23,847 - INFO - Initializing StabilitySpace with faiss library and l2 metric.


Running without compression, the shape of X needs to be square


2024-11-14 14:09:24,977 - INFO - GeometricCalibrator: Initialized StabilitySpace with default settings (library: faiss, metric: l2).
2024-11-14 14:09:24,978 - INFO - Initialized GeometricCalibrator with model Sequential and fitting function IsotonicRegression.
2024-11-14 14:09:24,979 - INFO - GeometricCalibrator: Fitting with validation data using balanced accuracy and rounded stability.


[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step


2024-11-14 14:09:27,593 - INFO - Calculating stability using faiss.
2024-11-14 14:09:27,593 - INFO - Calculating stability using FAISS.


Running without compression, the shape of X needs to be square


Calculating Stability (FAISS): 100%|██████████| 14000/14000 [03:27<00:00, 67.46sample/s]
2024-11-14 14:12:55,131 - INFO - Time taken for faiss stability calculation: 207.54 seconds
2024-11-14 14:12:55,134 - INFO - Stability values (first 200): [ 2033.  11424.5  4852.   7476.5 16121.5 14145.  11329.   7829.5  -413.5
  -108.   1484.   -557.   7866.5  4723.   1315.  15849.  13269.5 16438.5
  3525.5  8591.5  8337.5  6314.5  5945.   1653.   5394.   9764.5  3852.5
   289.   3491.   9914.  11864.  13472.5  -382.   2654.5  9461.5 14027.5
  1516.5  6097.   -163.5 15371.  13379.5  5869.   1114.   1429.   2142.
  5722.   5415.   7446.  13907.  15681.5  4553.  13210.   3064.  17126.5
  1712.5  5731.5  7558.5 15333.  17471.   9082.5    73.5    68.5 15005.
   705.  10644.5  6783.  15638.5  6809.5 16443.   -883.   6609.5  1942.
 10947.  11182.  14435.5    51.  10439.   9517.5 16027.5  9229.5 12533.5
  7588.5  6402.5 15780.5  7014.5  8222.  14702.   6296.5   734.5 16320.5
 14700.5  2961.5  -179.    11

In [10]:
# Initialize Geometric Calibrator with KNN
geo_calibrator_knn = GeometricCalibrator(model=cnn_model, X_train=augmented_train_images, y_train=y_train, library='knn')
 
# Fit the calibrator using KNN on the validation data
geo_calibrator_knn.fit(X_val, y_val)


2024-11-14 14:12:57,449 - INFO - Initialized GeometricCalibrator with n_classes=None, bins=15, temperature=1.0
2024-11-14 14:12:57,452 - INFO - Initializing StabilitySpace with knn library and l2 metric.


Running without compression, the shape of X needs to be square


2024-11-14 14:12:58,241 - INFO - GeometricCalibrator: Initialized StabilitySpace with default settings (library: knn, metric: l2).
2024-11-14 14:12:58,242 - INFO - Initialized GeometricCalibrator with model Sequential and fitting function IsotonicRegression.
2024-11-14 14:12:58,243 - INFO - GeometricCalibrator: Fitting with validation data using balanced accuracy and rounded stability.


[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step


2024-11-14 14:13:00,854 - INFO - Calculating stability using knn.
2024-11-14 14:13:00,854 - INFO - Calculating stability using KNN.


Running without compression, the shape of X needs to be square


Calculating Stability (KNN): 100%|██████████| 14000/14000 [41:07<00:00,  5.67sample/s]
2024-11-14 14:54:08,652 - INFO - Time taken for knn stability calculation: 2467.80 seconds
2024-11-14 14:54:08,661 - INFO - Stability values (first 200): [ 107.39357057  188.57932197   68.38390547  163.42312817  114.47139429
  -21.3398076   278.95239992  199.68483266  196.90528296  264.72963513
  273.53804245  193.49012894  263.8397058   214.00214948  138.65237921
  121.29105167   68.82863128  -28.36717668  -21.86161156  232.07014742
  148.84671058  -56.24796898   -8.62869495  131.91626523  198.57014463
  214.58838967  164.4815702   354.56195353  257.78062035  124.11555487
  343.81359107  120.82430579  244.72684505  302.27388476  -88.47794885
   74.11261103  149.36498041  123.92732665  112.4636237  -265.29512876
  108.62072987  364.51438533  101.53909447  123.05056476  142.5417905
   82.08709687   36.50235407   79.52961846  271.08862186  114.9427474
  537.04187754  250.6055521   343.21800307  150.757

In [None]:
# Initialize Geometric Calibrator with KNN
geo_calibrator_separation = GeometricCalibrator(model=cnn_model, X_train=augmented_train_images, y_train=y_train, library='separation')

# Fit the calibrator using KNN on the validation data
geo_calibrator_separation.fit(X_val, y_val)
print("Both FAISS and KNN calibrators have been initialized and fitted.")


2024-11-14 11:34:32,074 - INFO - Initialized GeometricCalibrator with n_classes=None, bins=15, temperature=1.0
2024-11-14 11:34:32,077 - INFO - Initializing StabilitySpace with separation library and l2 metric.
2024-11-14 11:34:32,081 - INFO - Using separation-based stability calculation.
2024-11-14 11:34:32,082 - INFO - GeometricCalibrator: Initialized StabilitySpace with default settings (library: separation, metric: l2).
2024-11-14 11:34:32,083 - INFO - Initialized GeometricCalibrator with model Sequential and fitting function IsotonicRegression.
2024-11-14 11:34:32,084 - INFO - GeometricCalibrator: Fitting with validation data using balanced accuracy and rounded stability.


Running without compression, the shape of X needs to be square
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step
Running without compression, the shape of X needs to be square

2024-11-14 11:34:34,741 - INFO - Calculating stability using separation.
2024-11-14 11:34:34,742 - INFO - Calculating stability using separation method.



Started _sep_calc with tqdm progress bar


Calculating Separation (Sequential):  19%|█▊        | 2592/14000 [2:01:59<2:43:43,  1.16sample/s]   

# 6. Step 5: Calibrate the Test Data
After fitting the calibrator, use it to predict and calibrate the probabilities for the test set.

In [11]:
# Calibrate the predictions on the test set using FAISS-based calibrator
calibrated_probs_faiss = geo_calibrator_faiss.calibrate(X_test)

# Get predicted labels by selecting the class with the highest calibrated probability for FAISS
y_test_pred_faiss = np.argmax(calibrated_probs_faiss, axis=1)

# Calculate accuracy for FAISS
accuracy_faiss = accuracy_score(y_test, y_test_pred_faiss)
print(f"Accuracy after calibration with FAISS: {accuracy_faiss}")




2024-11-14 14:54:11,067 - INFO - GeometricCalibrator: Calibrating test data.


[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step


2024-11-14 14:54:13,652 - INFO - Starting calibration for 14000 samples and 10 classes.
2024-11-14 14:54:13,653 - INFO - Calculating stability using faiss.
2024-11-14 14:54:13,654 - INFO - Calculating stability using FAISS.


Running without compression, the shape of X needs to be square


Calculating Stability (FAISS): 100%|██████████| 14000/14000 [03:25<00:00, 67.98sample/s]
2024-11-14 14:57:39,612 - INFO - Time taken for faiss stability calculation: 205.96 seconds
2024-11-14 14:57:39,615 - INFO - Stability values during calibration (first 10): [ 3298.5   450.5 16198.5 14784.   5191.5 14685.   5768.  14454.5  9764.5
  7207.5]
2024-11-14 14:57:39,621 - INFO - Calibrated values (first 10): [0.93718025 0.93718025 0.93718025 0.93718025 0.93718025 0.93718025
 0.93718025 0.93718025 0.93718025 0.93718025]
2024-11-14 14:57:39,681 - INFO - GeometricCalibrator: Calibration successful.


Accuracy after calibration with FAISS: 0.9390714285714286


In [12]:
# Calibrate the predictions on the test set using KNN-based calibrator
calibrated_probs_knn = geo_calibrator_knn.calibrate(X_test)

# Get predicted labels by selecting the class with the highest calibrated probability for KNN
y_test_pred_knn = np.argmax(calibrated_probs_knn, axis=1)

# Calculate accuracy for KNN
accuracy_knn = accuracy_score(y_test, y_test_pred_knn)
print(f"Accuracy after calibration with KNN: {accuracy_knn}")



2024-11-14 14:57:39,700 - INFO - GeometricCalibrator: Calibrating test data.


[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step


2024-11-14 14:57:42,314 - INFO - Starting calibration for 14000 samples and 10 classes.
2024-11-14 14:57:42,315 - INFO - Calculating stability using knn.
2024-11-14 14:57:42,316 - INFO - Calculating stability using KNN.


Running without compression, the shape of X needs to be square


Calculating Stability (KNN): 100%|██████████| 14000/14000 [40:45<00:00,  5.73sample/s]
2024-11-14 15:38:27,479 - INFO - Time taken for knn stability calculation: 2445.16 seconds
2024-11-14 15:38:27,484 - INFO - Stability values during calibration (first 10): [3.77562836e+02 9.41959992e+01 2.74555369e+02 5.54964798e+01
 1.69049937e+01 1.37424858e-02 2.54933662e+02 3.13699597e+02
 2.75472990e+02 6.72081116e+01]
2024-11-14 15:38:27,490 - INFO - Calibrated values (first 10): [1.         0.99375975 0.99967846 0.97894022 0.92382812 0.91666667
 0.99967846 1.         0.99967846 0.97894022]
2024-11-14 15:38:27,616 - INFO - GeometricCalibrator: Calibration successful.


Accuracy after calibration with KNN: 0.9405714285714286


In [None]:
# Calibrate the predictions on the test set using KNN-based calibrator
calibrated_probs_separation = geo_calibrator_separation.calibrate(X_test)

# Get predicted labels by selecting the class with the highest calibrated probability for KNN
y_test_pred_separation = np.argmax(calibrated_probs_separation, axis=1)

# Calculate accuracy for KNN
accuracy_separation = accuracy_score(y_test, y_test_pred_separation)
print(f"Accuracy after calibration with KNN: {accuracy_separation}")

# 7. Step 6: Calculate ECE
Evaluate the calibration by computing the Expected Calibration Error (ECE).

In [14]:
import logging

logger = logging.getLogger(__name__)
def ECE_calc(probs, y_pred, y_real, n_bins=20):
    """
    Expected Calibration Error (ECE) calculation.

    Parameters:
        probs (np.ndarray): Calibrated probabilities for each class.
        y_pred (np.ndarray): Predicted class labels.
        y_real (np.ndarray): True class labels.
        n_bins (int): Number of bins to divide probabilities.

    Returns:
        float: ECE value.
    """
    logger.info("Starting Expected Calibration Error (ECE) calculation.")

    # Select the probabilities of the predicted classes
    confidence_of_pred_class = np.max(probs, axis=1)

    # Bin the confidences
    bin_boundaries = np.linspace(0, 1, n_bins + 1)
    bin_indices = np.digitize(confidence_of_pred_class, bin_boundaries) - 1

    logger.info(f"Bin boundaries: {bin_boundaries}")
    logger.info(f"Bin indices (first 10): {bin_indices[:10]}")
    
    total_error = 0.0
    total_weight = 0.0  # To track the weight distribution

    for i in range(n_bins):
        bin_mask = bin_indices == i
        bin_confidences = confidence_of_pred_class[bin_mask]
        bin_real = y_real[bin_mask]
        bin_pred = y_pred[bin_mask]

        if len(bin_confidences) > 0:
            bin_acc = np.mean(bin_real == bin_pred)
            bin_conf = np.mean(bin_confidences)
            bin_weight = len(bin_confidences) / len(probs)
            total_error += bin_weight * np.abs(bin_acc - bin_conf)
            total_weight += bin_weight

            logger.info(f"Bin {i}:")
            logger.info(f"  Bin size: {len(bin_confidences)}")
            logger.info(f"  Accuracy: {bin_acc}")
            logger.info(f"  Confidence: {bin_conf}")
            logger.info(f"  Bin weight: {bin_weight}")
            logger.info(f"  ECE contribution: {bin_weight * np.abs(bin_acc - bin_conf)}")
        else:
            logger.info(f"Bin {i} is empty.")

    logger.info(f"Total weight: {total_weight}")
    logger.info(f"Final ECE value: {total_error}")
    
    return total_error
# Predict on the test data using the trained model (without calibration)
y_test_pred_raw = cnn_model.predict(X_test)  # This is the predicted probabilities
y_test_pred_labels_raw = np.argmax(y_test_pred_raw, axis=1)  # This is the predicted class labels

# Calculate ECE for the model without calibration (raw probabilities)
ece_value_raw = ECE_calc(y_test_pred_raw, y_test_pred_labels_raw, y_test)  # Pass raw probabilities and labels
print(f"ECE without calibration: {ece_value_raw}")

# Calculate ECE for FAISS-based calibrated probabilities
ece_value_faiss = ECE_calc(calibrated_probs_faiss, y_test_pred_faiss, y_test)  # Pass calibrated probabilities and labels
print(f"ECE after FAISS calibration: {ece_value_faiss}")

# Debug shape of KNN-calibrated probabilities
print(f"Initial shape of calibrated_probs_knn: {calibrated_probs_knn.shape}")

# Reshape or normalize if necessary
if calibrated_probs_knn.ndim == 1:
    # Adjust for the expected number of classes
    num_classes = 2  # Adjust according to your problem's number of classes
    calibrated_probs_knn = np.column_stack([1 - calibrated_probs_knn, calibrated_probs_knn])

# Ensure probabilities sum to 1
calibrated_probs_knn = calibrated_probs_knn / calibrated_probs_knn.sum(axis=1, keepdims=True)

# Now call ECE_calc
ece_value_knn = ECE_calc(calibrated_probs_knn, y_test_pred_knn, y_test)
print(f"ECE after KNN calibration: {ece_value_knn}")



[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step


2024-11-14 15:42:43,634 - INFO - Starting Expected Calibration Error (ECE) calculation.
2024-11-14 15:42:43,637 - INFO - Bin boundaries: [0.   0.05 0.1  0.15 0.2  0.25 0.3  0.35 0.4  0.45 0.5  0.55 0.6  0.65
 0.7  0.75 0.8  0.85 0.9  0.95 1.  ]
2024-11-14 15:42:43,638 - INFO - Bin indices (first 10): [19 19 19 19 19 19 19 19 19 19]
2024-11-14 15:42:43,639 - INFO - Bin 0 is empty.
2024-11-14 15:42:43,640 - INFO - Bin 1 is empty.
2024-11-14 15:42:43,641 - INFO - Bin 2 is empty.
2024-11-14 15:42:43,642 - INFO - Bin 3 is empty.
2024-11-14 15:42:43,643 - INFO - Bin 4 is empty.
2024-11-14 15:42:43,644 - INFO - Bin 5:
2024-11-14 15:42:43,645 - INFO -   Bin size: 1
2024-11-14 15:42:43,645 - INFO -   Accuracy: 1.0
2024-11-14 15:42:43,646 - INFO -   Confidence: 0.29476022720336914
2024-11-14 15:42:43,646 - INFO -   Bin weight: 7.142857142857143e-05
2024-11-14 15:42:43,647 - INFO -   ECE contribution: 5.037426948547364e-05
2024-11-14 15:42:43,648 - INFO - Bin 6:
2024-11-14 15:42:43,648 - INFO -  

ECE without calibration: 0.03827404156114375
ECE after FAISS calibration: 0.001956149756326618
Initial shape of calibrated_probs_knn: (14000, 10)
ECE after KNN calibration: 0.005066965668822986


# 8. Step 7: Initiate Metrics and run all of them to see the results
init metrics.py and use it as the instance of the metrics calculator

In [22]:
from utils.metrics import CalibrationMetrics

probs_faiss = y_test_pred_raw
y_pred_faiss = y_test_pred_faiss
y_true = y_test
probs = y_test_pred_raw
y_pred = y_test_pred_labels_raw
calib_metrics_raw = CalibrationMetrics(probs, y_pred, y_true)
calib_metrics_faiss = CalibrationMetrics(probs_faiss, y_pred_faiss, y_true)



# 8. Step 7: Run Multiple Experiments
Wrap the entire pipeline in a loop to test different augmentation methods and repeat the experiment multiple times.

In [None]:
augmentation_methods = ['rotation', 'shift', 'noise']
results = {}

for method in augmentation_methods:
    print(f"Running experiment with {method} augmentation...")
    augmented_train_images = augment_data(X_train, method=method)
    
    # Train the CNN model
    cnn_model.fit(augmented_train_images, y_train, epochs=10)
    features_val = cnn_model.predict(X_val)
    features_test = cnn_model.predict(X_test)
    
    # Initialize and fit the geometric calibrator
    geo_calibrator = GeometricCalibrator(model=cnn_model, X_train=features_val, y_train=y_val)
    geo_calibrator.fit(X_val, y_val)
    
    # Calibrate the test set
    calibrated_probs = geo_calibrator.calibrate(X_test)
    y_test_pred = np.argmax(calibrated_probs, axis=1)
    
    # Calculate ECE
    ece_value = ECE_calc(y_test_pred, calibrated_probs, y_test)
    results[method] = ece_value
    print(f"{method} ECE: {ece_value}")

# Find the best augmentation method based on ECE
best_method = min(results, key=results.get)
print(f"Best augmentation method: {best_method} with ECE = {results[best_method]}")