# 1. Imports and Setup
You'll begin by importing necessary libraries, including your GeometricCalibrator library and other standard packages like tensorflow, scikit-learn, or pytorch for the CNN part, and numpy or pandas for data manipulation.

In [1]:
import sys
import os
import numpy as np
import tensorflow as tf

# Add the parent directory to the sys.path so Python can find the utils module
sys.path.append(os.path.abspath('..'))

2024-12-17 12:03:50.875507: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-12-17 12:03:50.907288: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-12-17 12:03:50.917125: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-17 12:03:50.940611: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [1]:
import pandas as pd
import os

# Base path to the Aggregate directory
base_path = "/cs/cs_groups/cliron_group/Calibrato/Aggregate"

# List of datasets, models and metrics to process
datasets = ["CIFAR10", "CIFAR100", "Fashion_MNIST", "GTSRB", "MNIST", "SignLanguage"]
models = ["cnn", "GB", "RF"]
metrics = ["L1", "L2", "Linf"]

# Function to process a single file
def extract_knn_time(file_path):
    try:
        df = pd.read_csv(file_path)
        knn_row = df[df['Metric'] == 'Knn']
        if not knn_row.empty:
            time_mean = knn_row['Calibration Time (s) mean'].values[0]
            time_lower = knn_row['Calibration Time (s) 95% CI Lower'].values[0]
            time_upper = knn_row['Calibration Time (s) 95% CI Upper'].values[0]
            return f"{time_mean:.2f}±{(time_upper-time_lower)/2:.2f}"
        return "N/A"
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return "Error"

# Create lists to store the data for the DataFrame
data_rows = []

# Process each dataset, model and metric combination
for dataset in datasets:
    dataset_path = os.path.join(base_path, dataset)
    
    if os.path.exists(dataset_path):
        for model in models:
            model_path = os.path.join(dataset_path, model)
            
            if os.path.exists(model_path):
                for metric in metrics:
                    file_path = os.path.join(model_path, f"aggregate_results_non_transformed_{metric}.csv")
                    
                    if os.path.exists(file_path):
                        time_result = extract_knn_time(file_path)
                    else:
                        time_result = "File not found"
                    
                    # Add row to data
                    data_rows.append({
                        'Dataset': dataset,
                        'Model': model,
                        'Metric': metric,
                        'Calibration_Time': time_result
                    })
            else:
                for metric in metrics:
                    data_rows.append({
                        'Dataset': dataset,
                        'Model': model,
                        'Metric': metric,
                        'Calibration_Time': "Model dir not found"
                    })
    else:
        print(f"Dataset directory not found: {dataset}")

# Create DataFrame from collected data
df = pd.DataFrame(data_rows)

# Pivot the DataFrame to get the desired format
pivot_df = df.pivot_table(
    index=['Dataset', 'Metric'],
    columns='Model',
    values='Calibration_Time',
    aggfunc='first'
).reset_index()

# Reorder columns
pivot_df = pivot_df[['Dataset', 'Metric', 'cnn', 'RF', 'GB']]

# Save to CSV
output_file = "knn_calibration_times.csv"
pivot_df.to_csv(output_file, index=False)
print(f"Results saved to {output_file}")

Results saved to knn_calibration_times.csv


In [1]:
import os
import glob

# Set the base directory
base_dir = "/cs/cs_groups/cliron_group/Calibrato/CIFAR100"

# Search for all "all_results.csv" files specifically under "L2" directories
search_path = os.path.join(base_dir, "**", "L2", "**", "all_results.csv")

# Search and prompt user before deletion
for file_path in glob.glob(search_path, recursive=True):
    try:
        # Prompt user for confirmation before deleting
        # confirm = input(f"Do you want to delete this file? {file_path} (yes/no): ").strip().lower()
        # if confirm in ['yes', 'y']:
        os.remove(file_path)
        print(f"Deleted: {file_path}")
        # else:
        #     print(f"Skipped: {file_path}")
    except Exception as e:
        print(f"Failed to delete {file_path}: {e}")

print("File deletion process complete.")


Deleted: /cs/cs_groups/cliron_group/Calibrato/CIFAR100/2/pretrained_efficientnet/L2/all/all_results.csv
Deleted: /cs/cs_groups/cliron_group/Calibrato/CIFAR100/2/GB/L2/all/all_results.csv
Deleted: /cs/cs_groups/cliron_group/Calibrato/CIFAR100/2/RF/L2/all/all_results.csv
Deleted: /cs/cs_groups/cliron_group/Calibrato/CIFAR100/4/GB/L2/all/all_results.csv
Deleted: /cs/cs_groups/cliron_group/Calibrato/CIFAR100/4/RF/L2/all/all_results.csv
Deleted: /cs/cs_groups/cliron_group/Calibrato/CIFAR100/4/pretrained_efficientnet/L2/all/all_results.csv
Deleted: /cs/cs_groups/cliron_group/Calibrato/CIFAR100/0/pretrained_efficientnet/L2/all/all_results.csv
Deleted: /cs/cs_groups/cliron_group/Calibrato/CIFAR100/0/RF/L2/all/all_results.csv
Deleted: /cs/cs_groups/cliron_group/Calibrato/CIFAR100/0/GB/L2/all/all_results.csv
Deleted: /cs/cs_groups/cliron_group/Calibrato/CIFAR100/1/pretrained_efficientnet/L2/all/all_results.csv
Deleted: /cs/cs_groups/cliron_group/Calibrato/CIFAR100/1/GB/L2/all/all_results.csv
Del

In [None]:
import os
import glob

# Set the base directory
base_dir = "/cs/cs_groups/cliron_group/Calibrato/"

# Search for all "all_results.csv" files recursively
for file_path in glob.glob(os.path.join(base_dir, "**", "all_results.csv"), recursive=True):
    try:
        # Prompt user for confirmation before deleting
        confirm = input(f"Do you want to delete this file? {file_path} (yes/no): ").strip().lower()
        if confirm in ['yes', 'y']:
            os.remove(file_path)
            print(f"Deleted: {file_path}")
        else:
            print(f"Skipped: {file_path}")
    except Exception as e:
        print(f"Failed to delete {file_path}: {e}")

print("File deletion process complete.")



In [2]:
from calibrators.geometric_calibrators import GeometricCalibrator
from utils.logging_config import *

setup_logging()


# 2. Step 1: Data Retrieval
This part loads your dataset. It could be MNIST, CIFAR-10, or any dataset you're working with. Depending on the experiment, you may switch between different datasets to test the generality of your method.

In [3]:
from keras.src.datasets.mnist import load_data
from sklearn.model_selection import train_test_split

# Split and save the data for reproducible experiments (to be done once)
# Load MNIST data
(train_X_original, train_y_original), (test_X_original, test_y_original) = load_data()

# Combine train and test data for further splitting
data = np.concatenate((train_X_original, test_X_original), axis=0)
labels = np.concatenate((train_y_original, test_y_original), axis=0)

# Perform the splitting dynamically
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=0)  # 0.25 * 0.8 = 0.2



# 4. Step 3: Build Stability Space using CNN + Pooling
You now use the augmented data and pass it through a convolutional neural network with pooling to create the stability space before performing geometric separation.

In [5]:
import os

# Define file path to save the trained model
model_path = 'saved_models/cnn_model.keras'

# Define CNN model with input shape of (28, 28, 1) for grayscale images
def build_cnn_model():
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),  # Adjusted input shape
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')  # 10 classes for MNIST
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Check if model already exists, load it if found, otherwise train a new model
if os.path.exists(model_path):
    print("Loading pre-trained model...")
    cnn_model = tf.keras.models.load_model(model_path)
else:
    print("Training new model...")
    cnn_model = build_cnn_model()
    
    # Train the CNN model
    cnn_model.fit(augmented_train_images, y_train, epochs=10, validation_data=(X_val, y_val))
    
    # Save the trained model
    os.makedirs(os.path.dirname(model_path), exist_ok=True)  # Create directory if not exists
    cnn_model.save(model_path)
    print(f"Model saved at {model_path}")

# Use the trained model to predict probabilities on validation and test data
features_val = cnn_model.predict(X_val)
features_test = cnn_model.predict(X_test)

Loading pre-trained model...
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


# 5. Step 4: Apply Geometric Calibration
Once you have the features, pass them through your GeometricCalibrator to compute the stability space and perform uncertainty calibration.

In [6]:
import bisect
import concurrent
from itertools import repeat
# import scann
# from annoy import AnnoyIndex
# import nmslib
# import hnswlib
import torch
import numpy as np
from sklearn.decomposition import PCA
import tensorflow as tf
from math import sqrt

import faiss
import numpy as np
from scipy.optimize import optimize, minimize
import h5py
import os
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import ElasticNet, Lasso, LinearRegression
from sklearn.neighbors import NearestNeighbors
from scipy.optimize import curve_fit
from sklearn.isotonic import IsotonicRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import json
import scipy.stats
from tqdm import tqdm
import torchvision.transforms as transforms
import shutil
import time
import logging
from utils.logging_config import setup_logging
# geometric_calibrators.py
import numpy as np
import logging
from sklearn.isotonic import IsotonicRegression
from tqdm import tqdm

from calibrators.base_calibrator import BaseCalibrator
from utils.utils import StabilitySpace, Compression, calc_balanced_acc
from utils.logging_config import setup_logging
from sklearn.metrics import balanced_accuracy_score


In [7]:
from sklearn.neighbors import NearestNeighbors
import faiss
import numpy as np
import concurrent.futures

class StabilitySpace:
    """
    Class to compute stability and geometric values for the input X using various similarity search libraries.
    """

    def __init__(self, X_train, y_train, compression=None, library='knn', metric='minkowski', num_labels=None):
        """
        Initialize the stability space by compressing the input data (optional) and setting up nearest neighbor models.
        """
        self.logger = logging.getLogger(self.__class__.__name__)
        self.logger.info(f"Initializing StabilitySpace with {library} library and {metric} metric.")

        self.metric = metric
        self.library = library
        self.num_labels = num_labels or len(set(y_train))
        self.compression = compression

        if self.compression:
            X_train, y_train = self.compression(X_train, y_train)

        self.X_train = X_train
        self.y_train = y_train

        # Initialize nearest neighbor models based on library
        if library == 'faiss':
            self._initialize_faiss()
        elif library == 'knn':
            self._initialize_knn()
        elif library == 'separation':
            self.logger.info("Using separation-based stability calculation.")
        else:
            raise ValueError(f"Unsupported library: {library}")
        
    def _initialize_faiss(self):
        """
        Initialize FAISS indices for each label.
        """
        self.same_nbrs = {}
        self.other_nbrs = {}
    
        # Ensure X_train is 2D for FAISS (flatten images if needed)
        if len(self.X_train.shape) > 2:
            self.logger.debug("Flattening X_train for FAISS compatibility.")
            self.X_train = self.X_train.reshape(self.X_train.shape[0], -1).astype('float32')
    
        dim = self.X_train.shape[1]
        for label in range(self.num_labels):
            idx_same = np.where(self.y_train == label)[0]
            idx_other = np.where(self.y_train != label)[0]
    
            # Build FAISS indices
            self.same_nbrs[label] = faiss.IndexFlatL2(dim)
            self.same_nbrs[label].add(self.X_train[idx_same])
    
            self.other_nbrs[label] = faiss.IndexFlatL2(dim)
            self.other_nbrs[label].add(self.X_train[idx_other])

    def _initialize_knn(self):
        """
        Initialize NearestNeighbors models for each label, flattening input if necessary.
        """
        self.same_nbrs = []
        self.other_nbrs = []
    
        # Flatten X_train if it has more than 2 dimensions (e.g., images)
        if len(self.X_train.shape) > 2:
            self.logger.debug("Flattening X_train for KNN compatibility.")
            self.X_train = self.X_train.reshape(self.X_train.shape[0], -1).astype('float32')
    
        for label in range(self.num_labels):
            idx_same = np.where(self.y_train == label)[0]
            idx_other = np.where(self.y_train != label)[0]
    
            # Initialize NearestNeighbors for KNN library
            same_nn = NearestNeighbors(n_neighbors=1, metric=self.metric).fit(self.X_train[idx_same])
            other_nn = NearestNeighbors(n_neighbors=1, metric=self.metric).fit(self.X_train[idx_other])
    
            self.same_nbrs.append(same_nn)
            self.other_nbrs.append(other_nn)

    def _stability_faiss(self, valX, val_y_pred):
        """
        Calculate stability using FAISS with a progress indicator.
        """
        self.logger.info("Calculating stability using FAISS.")
        stability = np.zeros(len(valX))
        predicted_labels = np.argmax(val_y_pred, axis=1) if len(val_y_pred.shape) > 1 else val_y_pred
    
        for i in tqdm(range(len(valX)), desc="Calculating Stability (FAISS)", unit="sample"):
            x = valX[i].reshape(1, -1).astype('float32')
            pred_label = int(predicted_labels[i])
    
            try:
                _, dist_same = self.same_nbrs[pred_label].search(x, 1)
                _, dist_other = self.other_nbrs[pred_label].search(x, 1)
                stability[i] = (dist_other[0][0] - dist_same[0][0]) / 2
            except Exception as e:
                self.logger.error(f"Error in FAISS stability calculation for sample {i}: {e}")
                stability[i] = np.nan
    
        return stability
    
    def _stability_knn(self, valX, val_y_pred):
        """
        Calculate stability using KNN with a progress indicator.
        """
        self.logger.info("Calculating stability using KNN.")
        stability = np.zeros(len(valX))
        predicted_labels = np.argmax(val_y_pred, axis=1) if len(val_y_pred.shape) > 1 else val_y_pred
    
        for i in tqdm(range(len(valX)), desc="Calculating Stability (KNN)", unit="sample"):
            x = valX[i].reshape(1, -1)
            pred_label = int(predicted_labels[i])
    
            try:
                dist_same, _ = self.same_nbrs[pred_label].kneighbors(x)
                dist_other, _ = self.other_nbrs[pred_label].kneighbors(x)
                stability[i] = (dist_other[0][0] - dist_same[0][0]) / 2
            except Exception as e:
                self.logger.error(f"Error in KNN stability calculation for sample {i}: {e}")
                stability[i] = np.nan
    
        return stability
                
    def _stability_separation(self, testX, pred_y, norm='L2', parallel=False):
        """
        Calculate separation-based stability with progress tracking.
        """
        self.logger.info("Calculating stability using separation method.")
        
        if parallel:
            self.logger.debug("Entering parallel separation calculation with progress tracking.")
            
            # Use a progress bar to track the entire operation in parallel mode
            results = list(tqdm(self._sep_calc_parallel(testX, pred_y, norm=norm), 
                                desc="Calculating Separation (Parallel)", unit="sample"))
            
            self.logger.debug("Completed parallel separation calculation.")
            return np.array(results)
        else:
            self.logger.debug("Entering sequential separation calculation with progress tracking.")
            
            # Call _sep_calc with tqdm integrated for sequential calculations
            results = np.array(self._sep_calc(testX, pred_y, norm=norm))
            
            self.logger.debug("Completed sequential separation calculation.")
            return results

    def _sep_calc_parallel(self, testX, pred_y, norm='L2'):
        """
        Calculate the separation of all test/val examples in parallel with progress tracking.
        """
        print("Entered _sep_calc_parallel")
        self.logger.debug("Starting parallel separation calculation.")
        
        with concurrent.futures.ProcessPoolExecutor() as executor:
            # Submit each task to the executor and track progress with tqdm
            futures = [
                executor.submit(self._sep_calc_point, x, self.X_train, self.y_train, pred, norm)
                for x, pred in zip(testX, pred_y)
            ]
            
            print(f"Submitted {len(futures)} tasks to the executor.")
            self.logger.debug(f"Submitted {len(futures)} tasks to the executor.")
            
            # Use tqdm to show progress as futures complete
            separation = []
            for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), 
                               desc="Calculating Separation (Parallel)", unit="sample"):
                try:
                    result = future.result()  # Get the result of each future
                    separation.append(result)
                except Exception as e:
                    self.logger.error(f"Error in parallel separation calculation: {e}")
            
            print("Completed gathering results from futures.")
            self.logger.debug("Completed gathering results from futures.")
            
        return separation

    def _sep_calc(self, testX, pred_y, norm):
        """
        Calculate the separation of all test/val examples without parallel processing, with progress tracking.
        """
        print("Started _sep_calc with tqdm progress bar")
        self.logger.debug("Starting sequential separation calculation with tqdm progress bar.")
        
        # Use tqdm to track progress over testX for the sequential calculation
        results = []
        for i, x in tqdm(enumerate(testX), desc="Calculating Separation (Sequential)", unit="sample", total=len(testX)):
            result = self._sep_calc_point(x, self.X_train, self.y_train, pred_y[i], norm)
            results.append(result)
            
        print("Completed _sep_calc with tqdm progress bar")
        self.logger.debug("Completed sequential separation calculation with tqdm progress bar.")
        return results

    def _sep_calc_point(self, x, X_train, y_train, y_pred, norm='L2'):
        """
        Calculate the separation for a single test instance.
        """
        self.logger.debug("Started _sep_calc_point")
        
        # Ensure `y_pred` is a scalar
        if hasattr(y_pred, "__len__") and len(y_pred) > 1:
            y_pred = np.argmax(y_pred)  # Convert probability vector to a class label if needed
    
        # Flatten `x` if it has more than one dimension
        if x.ndim > 1:
            x = x.flatten()
        
        # Determine norm type
        norm_val = {'L1': 1, 'L2': 2, 'Linf': np.inf}.get(norm, 2)
        
        # Compute distances, ensuring `train` is also flattened if needed
        same = [(np.linalg.norm(x - train.flatten(), norm_val), index) 
                for index, train in enumerate(X_train) if y_train[index] == y_pred]
        others = [(np.linalg.norm(x - train.flatten(), norm_val), index) 
                  for index, train in enumerate(X_train) if y_train[index] != y_pred]
        
        same.sort(key=lambda x: x[0])
        others.sort(key=lambda x: x[0])
        
        min_r = same[0][0] + 2 * others[0][0]
        sep_other = min_r
        for o in others:
            sep_same = np.NINF
            if o[0] > min_r:
                break
            for s in same:
                if s[0] > min(min_r, o[0]) and o[0] > same[0][0]:
                    break
                x_s = X_train[s[1]].flatten()  # Ensure x_s is also flattened
                x_o = X_train[o[1]].flatten()  # Ensure x_o is also flattened
                sep_same = max(self._two_point_sep_calc(x, x_s, x_o, norm_val), sep_same)
            sep_other = min(sep_same, sep_other)
            min_r = same[0][0] + 2 * max(0, sep_other)
        
        self.logger.debug("Completed _sep_calc_point")
        
        return sep_other

    def _two_point_sep_calc(self, x, x1, x2, norm=2):
        """
        Calculate the separation parameter for a single test point and two nearest points.
        """
        a = np.linalg.norm(x1 - x, norm)
        b = np.linalg.norm(x2 - x, norm)
        c = np.linalg.norm(x1 - x2, norm)
        return ((b ** 2 - a ** 2) / (2 * c))

    def calc_stab(self, X_val, y_val_pred, timeout=1800):
        """
        Calculate stability for the test set with a timeout and track the time taken.
        """
        if self.compression:
            X_val, _ = self.compression(X_val, None, train=False)
    
        self.logger.info(f"Calculating stability using {self.library}.")
        start_time = time.time()  # Start timing
    
        if self.library == 'faiss':
            stability = self._stability_faiss(X_val, y_val_pred)
        elif self.library == 'knn':
            stability = self._stability_knn(X_val, y_val_pred)
        elif self.library == 'separation':
            stability = self._stability_separation(X_val, y_val_pred)
        else:
            raise ValueError(f"Unsupported library: {self.library}")
    
        end_time = time.time()  # End timing
        elapsed_time = end_time - start_time
        self.logger.info(f"Time taken for {self.library} stability calculation: {elapsed_time:.2f} seconds")
    
        # Optionally log the first 200 stability scores for debugging
        self.logger.debug(f"First 200 stability scores: {stability[:200]}")
    
        return stability


In [8]:
setup_logging()
logger = logging.getLogger(__name__)


class GeometricCalibrator(BaseCalibrator):
    """
    Class serving as a wrapper for the geometric calibration method (stability/separation).
    """

    def __init__(self, model, X_train, y_train, fitting_func=None, compression_mode=None, compression_param=None,
                 metric='l2', stability_space=None, library='faiss'):
        """
        Initializes the GeometricCalibrator with a model, stability space, and calibration function.

        Args:
            model: The model to be calibrated (with `predict` and `predict_proba` methods).
            X_train: Training data (flattened images).
            y_train: Training labels.
            fitting_func: Custom fitting function (default: IsotonicRegression).
            compression_mode: Compression mode for data.
            compression_param: Parameter controlling the compression level.
            metric: Distance metric for stability/separation calculations.
            stability_space: Optional custom StabilitySpace instance. If not provided, one is initialized automatically.
            library: The library used for stability calculation (default is 'faiss').
        """
        super().__init__()
        self.model = model
        self.popt = None
        self._fitted = False

        # Determine the number of classes (unique labels in y_train)
        self.num_labels = len(np.unique(y_train))  # Fix: Initialize num_labels based on the training labels

        # Default to IsotonicRegression if no custom fitting function is provided
        self.fitting_func = fitting_func if fitting_func else IsotonicRegression(out_of_bounds="clip")

        # Use provided stability space or create a new one with the default settings
        if stability_space:
            self.stab_space = stability_space  # User provided custom StabilitySpace
            logger.info(f"{self.__class__.__name__}: Using custom StabilitySpace provided by user.")
        else:
            # Automatically initialize StabilitySpace with defaults if not provided
            self.stab_space = StabilitySpace(X_train, y_train,
                                             compression=Compression(compression_mode, compression_param),
                                             library=library, metric=metric)
            logger.info(f"{self.__class__.__name__}: Initialized StabilitySpace with default settings"
                        f" (library: {library}, metric: {metric}).")

        logger.info(f"Initialized {self.__class__.__name__} with model {self.model.__class__.__name__}"
                    f" and fitting function {self.fitting_func.__class__.__name__}.")

    def fit(self, X_val, y_val):
        """
        Fits the calibrator with the validation data using rounded stability and balanced accuracy.
    
        Args:
            X_val: Validation data (flattened images).
            y_val: Validation labels.
        """
        logger.info(f"{self.__class__.__name__}: Fitting with validation data using balanced accuracy and rounded stability.")
    
        try:
            # Step 1: Predict on validation data
            y_pred_val = self.model.predict(X_val)
            y_pred_classes = np.argmax(y_pred_val, axis=1)  # Convert predictions to class labels
    
            # Step 2: Compute stability values based on predictions
            stability_val = self.stab_space.calc_stab(X_val, y_pred_val)
            logger.info(f"Stability values (first 200): {stability_val[:200]}")  # Log a sample of the stability values
    
            # Step 3: Round the stability values for binning
            round_digits = 2  # Precision for rounding stability values
            rounded_stability = np.round(stability_val, decimals=round_digits)
            unique_stabilities = np.unique(rounded_stability)
    
            # Step 4: Calculate accuracy for each unique stability value
            stability_accuracy = {}
            i = 0
            for stab in unique_stabilities:
                indices = np.where(rounded_stability == stab)[0]  # Get indices of points with this stability
                y_true_stab = y_val[indices]  # Get true labels for these points
                y_pred_stab = y_pred_classes[indices]  # Get predicted labels for these points
    
                # Calculate balanced accuracy for this stability value
                if len(np.unique(y_true_stab)) > 1:  # Ensure we have more than one class
                    
                    acc = balanced_accuracy_score(y_true_stab, y_pred_stab)
                else:
                    acc = np.mean(y_true_stab == y_pred_stab)  # Use normal accuracy for single-class stability
    
                stability_accuracy[stab] = acc
                if i % 350 == 0:
                    logger.info(f"Stability {stab}: Accuracy = {acc}")
                    logger.info(f"True labels: {y_true_stab[:10]}")
                    logger.info(f"Predicted labels: {y_pred_stab[:10]}")
                i += 1
    
            # Step 5: Prepare calibration data: (rounded_stability, accuracy) pairs
            calibration_data = [(stab, stability_accuracy[stab]) for stab in stability_accuracy]
    
            # Step 6: Fit the provided fitting function (e.g., IsotonicRegression) on the calibration data
            stability_vals, accuracies = zip(*calibration_data)
            self.popt = self.fitting_func.fit(np.array(stability_vals).reshape(-1, 1), np.array(accuracies))
    
            self._fitted = True
            logger.info(f"{self.__class__.__name__}: Successfully fitted using stability-accuracy pairs and {self.fitting_func.__class__.__name__}.")
    
        except Exception as e:
            logger.error(f"{self.__class__.__name__}: Failed to fit with error: {e}")
            raise
            
    def calibrate(self, X_test):
        """
        Calibrates the test data based on the fitted model.
    
        Args:
            X_test: Test data (flattened images).
    
        Returns:
            np.ndarray: Calibrated probability matrix for each image and class.
        """
        if not self._fitted:
            raise ValueError("You must fit the calibrator before using it.")
    
        logger.info(f"{self.__class__.__name__}: Calibrating test data.")
    
        try:
            # Predict on the test data using the trained model (get predicted probabilities for all classes)
            y_test_pred = self.model.predict(X_test)
            y_test_labels = np.argmax(y_test_pred, axis=1)  # Get predicted class labels from probabilities
    
            # Initialize progress bar using tqdm
            num_samples = X_test.shape[0]
            num_classes = y_test_pred.shape[1]
            calibrated_probs = np.zeros((num_samples, num_classes))  # Initialize a matrix to store calibrated probabilities
    
            logger.info(f"Starting calibration for {num_samples} samples and {num_classes} classes.")
    
            # Compute stability for the predicted probabilities
            stability_test = self.stab_space.calc_stab(X_test, y_test_pred)
            logger.info(f"Stability values during calibration (first 10): {stability_test[:10]}")  # Add logging
    
            # Apply the fitted calibration function to the stability values
            calibrated_values = self.popt.predict(stability_test.reshape(-1, 1))
            logger.info(f"Calibrated values (first 10): {calibrated_values[:10]}")  # Add logging
    
            # Distribute the calibrated values across the predicted class
            for i in range(X_test.shape[0]):
                # Assign the calibrated probability to the predicted class label
                calibrated_probs[i, y_test_labels[i]] = calibrated_values[i]
                
                # Distribute remaining probability equally across other classes
                remaining_prob = (1 - calibrated_values[i]) / (self.num_labels - 1)
                for j in range(self.num_labels):
                    if j != y_test_labels[i]:
                        calibrated_probs[i, j] = remaining_prob
    
            # Ensure probabilities are in [0, 1] and sum to 1
            calibrated_probs = np.clip(calibrated_probs, 0, 1)
            calibrated_probs = calibrated_probs / calibrated_probs.sum(axis=1, keepdims=True)
    
            logger.info(f"{self.__class__.__name__}: Calibration successful.")
    
            return calibrated_probs
    
        except Exception as e:
            logger.error(f"{self.__class__.__name__}: Calibration failed with error: {e}")
            raise


In [11]:
# Initialize Geometric Calibrator with FAISS
geo_calibrator_faiss = GeometricCalibrator(model=cnn_model, X_train=augmented_train_images, y_train=y_train, library='faiss')

# Fit the calibrator using FAISS on the validation data
geo_calibrator_faiss.fit(X_val, y_val)


2024-11-13 22:25:32,110 - INFO - Initialized GeometricCalibrator with n_classes=None, bins=15, temperature=1.0
2024-11-13 22:25:32,114 - INFO - Initializing StabilitySpace with faiss library and l2 metric.


Running without compression, the shape of X needs to be square


2024-11-13 22:25:33,150 - INFO - GeometricCalibrator: Initialized StabilitySpace with default settings (library: faiss, metric: l2).
2024-11-13 22:25:33,153 - INFO - Initialized GeometricCalibrator with model Sequential and fitting function IsotonicRegression.
2024-11-13 22:25:33,155 - INFO - GeometricCalibrator: Fitting with validation data using balanced accuracy and rounded stability.


[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step


2024-11-13 22:25:35,421 - INFO - Calculating stability using faiss.
2024-11-13 22:25:35,422 - INFO - Calculating stability using FAISS.


Running without compression, the shape of X needs to be square


Calculating Stability (FAISS): 100%|██████████| 14000/14000 [01:40<00:00, 139.23sample/s]
2024-11-13 22:27:15,983 - INFO - Time taken for faiss stability calculation: 100.56 seconds
2024-11-13 22:27:15,985 - INFO - Stability values (first 200): [16441.5 10662.5  6601.   7771.5  4988.5 14724.   7668.5  4431.   8289.
  4498.5  8042.5 16785.  12142.5  6794.5 10615.   -417.5  9622.   8436.
  5629.  11281.   7390.    363.  17870.5  4651.   4829.5 15575.5  2808.5
  -161.  13756.5  1284.5 15545.5 15455.   1390.5 16750.  12155.5  -345.5
  4935.5 11087.  15040.5 10060.  11479.5  -518.5 10912.5 15152.  17321.
  1726.5 16688.5  6216.5  6172.5  6759.  11830.5   872.  17286.5 17908.5
   988.5  8924.  17580.  15733.5 16880.    714.5 10869.   5301.   2640.5
  9969.5  7697.5   404.   9531.   6136.   -148.5 11483.   9773.  11011.
  9514.5  2606.  17657.5  8732.   6002.5 13409.5  1083.5 10927.  12375.
   569.   -696.5  -369.  -1518.  15080.   7888.5  5437.5  7878.   9138.
   820.  16636.   6634.   6363.

In [10]:
# Initialize Geometric Calibrator with KNN
geo_calibrator_knn = GeometricCalibrator(model=cnn_model, X_train=augmented_train_images, y_train=y_train, library='knn')
# Now I want also to calculate the amount of time each of them take, I mean, I want to be able to know how much time it took for the faiss calibration, how much time it took for the knn with fast separation and how much it took with the regular separation.
#where should I start the timing and end it? give that this is the cells that initiate the geometric calibrator:
 
# Fit the calibrator using KNN on the validation data
geo_calibrator_knn.fit(X_val, y_val)


2024-11-13 22:00:55,967 - INFO - Initialized GeometricCalibrator with n_classes=None, bins=15, temperature=1.0
2024-11-13 22:00:55,970 - INFO - Initializing StabilitySpace with knn library and l2 metric.


Running without compression, the shape of X needs to be square


2024-11-13 22:00:56,413 - INFO - GeometricCalibrator: Initialized StabilitySpace with default settings (library: knn, metric: l2).
2024-11-13 22:00:56,414 - INFO - Initialized GeometricCalibrator with model Sequential and fitting function IsotonicRegression.
2024-11-13 22:00:56,415 - INFO - GeometricCalibrator: Fitting with validation data using balanced accuracy and rounded stability.


[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


2024-11-13 22:00:57,411 - INFO - Calculating stability using knn.


Running without compression, the shape of X needs to be square


2024-11-13 22:00:57,412 - INFO - Calculating stability using KNN.
Calculating Stability (KNN): 100%|██████████| 14000/14000 [24:32<00:00,  9.51sample/s]
2024-11-13 22:25:29,789 - INFO - Time taken for knn stability calculation: 1472.38 seconds
2024-11-13 22:25:29,792 - INFO - Stability values (first 200): [ 123.55928365  110.43148942  157.16462775   51.21385319   18.6228135
  -47.41662471   81.38789617  264.77372945  295.49980365  444.97949798
  364.39447228  146.01834308  260.32161023  148.3967903   142.60028255
   37.50408328   90.78855653  147.76347758  -27.46930596  273.80414449
   85.02986899  -27.54638002   42.40935357  326.95672563   88.88054226
  277.52779781  224.85708862  399.51594799  111.59271918  272.43254196
  318.76466341  -37.92224477  208.06469556  165.79789804 -189.56145803
  161.54779942  214.18329086  118.90824656   92.71852312 -231.97718024
  100.32502285  233.7510947    94.72863653  233.44154397  156.38600351
  101.30085255  118.8546965    90.79588855  311.996309 

In [None]:
# Initialize Geometric Calibrator with KNN
geo_calibrator_separation = GeometricCalibrator(model=cnn_model, X_train=augmented_train_images, y_train=y_train, library='separation')

# Fit the calibrator using KNN on the validation data
geo_calibrator_separation.fit(X_val, y_val)
print("Both FAISS and KNN calibrators have been initialized and fitted.")


2024-11-13 22:53:52,217 - INFO - Initialized GeometricCalibrator with n_classes=None, bins=15, temperature=1.0
2024-11-13 22:53:52,220 - INFO - Initializing StabilitySpace with separation library and l2 metric.
2024-11-13 22:53:52,222 - INFO - Using separation-based stability calculation.
2024-11-13 22:53:52,222 - INFO - GeometricCalibrator: Initialized StabilitySpace with default settings (library: separation, metric: l2).
2024-11-13 22:53:52,223 - INFO - Initialized GeometricCalibrator with model Sequential and fitting function IsotonicRegression.
2024-11-13 22:53:52,223 - INFO - GeometricCalibrator: Fitting with validation data using balanced accuracy and rounded stability.


Running without compression, the shape of X needs to be square
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


2024-11-13 22:53:53,439 - INFO - Calculating stability using separation.


Running without compression, the shape of X needs to be square


2024-11-13 22:53:53,439 - INFO - Calculating stability using separation method.


Started _sep_calc with tqdm progress bar


Calculating Separation (Sequential):   2%|▏         | 347/14000 [05:19<1:53:14,  2.01sample/s] 

# 6. Step 5: Calibrate the Test Data
After fitting the calibrator, use it to predict and calibrate the probabilities for the test set.

In [18]:
# Calibrate the predictions on the test set using FAISS-based calibrator
calibrated_probs_faiss = geo_calibrator_faiss.calibrate(X_test)

# Get predicted labels by selecting the class with the highest calibrated probability for FAISS
y_test_pred_faiss = np.argmax(calibrated_probs_faiss, axis=1)

# Calculate accuracy for FAISS
accuracy_faiss = accuracy_score(y_test, y_test_pred_faiss)
print(f"Accuracy after calibration with FAISS: {accuracy_faiss}")




2024-11-13 19:13:47,390 - INFO - GeometricCalibrator: Calibrating test data.


[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step


2024-11-13 19:13:50,642 - INFO - Starting calibration for 14000 samples and 10 classes.
2024-11-13 19:13:50,644 - INFO - Calculating stability using faiss.
2024-11-13 19:13:50,646 - INFO - Calculating stability using FAISS.


Running without compression, the shape of X needs to be square


2024-11-13 19:16:53,341 - INFO - Stability values during calibration (first 10): [11680.5 15909.5 15715.    901.5  6569.5 16175.   1523.5 11379.5 11097.
 10605.5]
2024-11-13 19:16:53,347 - INFO - Calibrated values (first 10): [0.93763953 0.93763953 0.93763953 0.93646401 0.93646401 0.93763953
 0.93646401 0.93763953 0.93763953 0.93763953]
2024-11-13 19:16:53,442 - INFO - GeometricCalibrator: Calibration successful.


Accuracy after calibration with FAISS: 0.9390714285714286


In [None]:
# Calibrate the predictions on the test set using KNN-based calibrator
calibrated_probs_knn = geo_calibrator_knn.calibrate(X_test)

# Get predicted labels by selecting the class with the highest calibrated probability for KNN
y_test_pred_knn = np.argmax(calibrated_probs_knn, axis=1)

# Calculate accuracy for KNN
accuracy_knn = accuracy_score(y_test, y_test_pred_knn)
print(f"Accuracy after calibration with KNN: {accuracy_knn}")



In [None]:
# Calibrate the predictions on the test set using KNN-based calibrator
calibrated_probs_separation = geo_calibrator_separation.calibrate(X_test)

# Get predicted labels by selecting the class with the highest calibrated probability for KNN
y_test_pred_separation = np.argmax(calibrated_probs_separation, axis=1)

# Calculate accuracy for KNN
accuracy_separation = accuracy_score(y_test, y_test_pred_separation)
print(f"Accuracy after calibration with KNN: {accuracy_separation}")

# 7. Step 6: Calculate ECE
Evaluate the calibration by computing the Expected Calibration Error (ECE).

In [20]:
import logging

logger = logging.getLogger(__name__)
def ECE_calc(probs, y_pred, y_real, n_bins=20):
    """
    Expected Calibration Error (ECE) calculation.

    Parameters:
        probs (np.ndarray): Calibrated probabilities for each class.
        y_pred (np.ndarray): Predicted class labels.
        y_real (np.ndarray): True class labels.
        n_bins (int): Number of bins to divide probabilities.

    Returns:
        float: ECE value.
    """
    logger.info("Starting Expected Calibration Error (ECE) calculation.")

    # Select the probabilities of the predicted classes
    confidence_of_pred_class = np.max(probs, axis=1)

    # Bin the confidences
    bin_boundaries = np.linspace(0, 1, n_bins + 1)
    bin_indices = np.digitize(confidence_of_pred_class, bin_boundaries) - 1

    logger.info(f"Bin boundaries: {bin_boundaries}")
    logger.info(f"Bin indices (first 10): {bin_indices[:10]}")
    
    total_error = 0.0
    total_weight = 0.0  # To track the weight distribution

    for i in range(n_bins):
        bin_mask = bin_indices == i
        bin_confidences = confidence_of_pred_class[bin_mask]
        bin_real = y_real[bin_mask]
        bin_pred = y_pred[bin_mask]

        if len(bin_confidences) > 0:
            bin_acc = np.mean(bin_real == bin_pred)
            bin_conf = np.mean(bin_confidences)
            bin_weight = len(bin_confidences) / len(probs)
            total_error += bin_weight * np.abs(bin_acc - bin_conf)
            total_weight += bin_weight

            logger.info(f"Bin {i}:")
            logger.info(f"  Bin size: {len(bin_confidences)}")
            logger.info(f"  Accuracy: {bin_acc}")
            logger.info(f"  Confidence: {bin_conf}")
            logger.info(f"  Bin weight: {bin_weight}")
            logger.info(f"  ECE contribution: {bin_weight * np.abs(bin_acc - bin_conf)}")
        else:
            logger.info(f"Bin {i} is empty.")

    logger.info(f"Total weight: {total_weight}")
    logger.info(f"Final ECE value: {total_error}")
    
    return total_error
# Predict on the test data using the trained model (without calibration)
y_test_pred_raw = cnn_model.predict(X_test)  # This is the predicted probabilities
y_test_pred_labels_raw = np.argmax(y_test_pred_raw, axis=1)  # This is the predicted class labels

# Calculate ECE for the model without calibration (raw probabilities)
ece_value_raw = ECE_calc(y_test_pred_raw, y_test_pred_labels_raw, y_test)  # Pass raw probabilities and labels
print(f"ECE without calibration: {ece_value_raw}")

# Calculate ECE for FAISS-based calibrated probabilities
ece_value_faiss = ECE_calc(calibrated_probs_faiss, y_test_pred_faiss, y_test)  # Pass calibrated probabilities and labels
print(f"ECE after FAISS calibration: {ece_value_faiss}")

print(f"For debug purpose, raw probs: {y_test_pred_raw} calibrated probs: {calibrated_probs_faiss}")
# # Calculate ECE for KNN-based calibrated probabilities
# ece_value_knn = ECE_calc(y_test_pred_knn, calibrated_probs_knn, y_test)
# print(f"ECE after KNN calibration: {ece_value_knn}")


[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step


2024-11-13 19:17:06,276 - INFO - Starting Expected Calibration Error (ECE) calculation.
2024-11-13 19:17:06,280 - INFO - Bin boundaries: [0.   0.05 0.1  0.15 0.2  0.25 0.3  0.35 0.4  0.45 0.5  0.55 0.6  0.65
 0.7  0.75 0.8  0.85 0.9  0.95 1.  ]
2024-11-13 19:17:06,282 - INFO - Bin indices (first 10): [20 20 20 19 19 19 20 19 19 19]
2024-11-13 19:17:06,284 - INFO - Bin 0 is empty.
2024-11-13 19:17:06,286 - INFO - Bin 1 is empty.
2024-11-13 19:17:06,288 - INFO - Bin 2 is empty.
2024-11-13 19:17:06,291 - INFO - Bin 3 is empty.
2024-11-13 19:17:06,292 - INFO - Bin 4 is empty.
2024-11-13 19:17:06,294 - INFO - Bin 5:
2024-11-13 19:17:06,297 - INFO -   Bin size: 1
2024-11-13 19:17:06,299 - INFO -   Accuracy: 1.0
2024-11-13 19:17:06,301 - INFO -   Confidence: 0.29476067423820496
2024-11-13 19:17:06,303 - INFO -   Bin weight: 7.142857142857143e-05
2024-11-13 19:17:06,304 - INFO -   ECE contribution: 5.0374237554413934e-05
2024-11-13 19:17:06,306 - INFO - Bin 6:
2024-11-13 19:17:06,307 - INFO - 

ECE without calibration: 0.038202684798410935
ECE after FAISS calibration: 0.002327271244170846
For debug purpose, raw probs: [[1.00000000e+00 5.24300519e-15 1.04557384e-10 ... 3.71166504e-13
  5.11456121e-11 2.98514324e-10]
 [5.04470430e-16 3.51001238e-14 6.73395693e-13 ... 4.49319781e-09
  3.75411631e-08 3.08899240e-09]
 [2.44305984e-19 1.00000000e+00 2.11173933e-17 ... 1.20417185e-13
  5.66958030e-15 1.77919310e-16]
 ...
 [1.12077680e-06 5.98499764e-06 8.02870181e-06 ... 9.99950409e-01
  2.81105947e-08 1.42379335e-08]
 [5.48735514e-15 7.35615995e-13 2.69352526e-11 ... 3.35308823e-06
  2.00429994e-07 3.11649995e-09]
 [6.10046172e-12 3.77388292e-18 1.26492340e-16 ... 2.14303534e-21
  1.97911895e-11 4.51687868e-13]] calibrated probs: [[0.93763953 0.00692894 0.00692894 ... 0.00692894 0.00692894 0.00692894]
 [0.00692894 0.00692894 0.00692894 ... 0.00692894 0.00692894 0.00692894]
 [0.00692894 0.93763953 0.00692894 ... 0.00692894 0.00692894 0.00692894]
 ...
 [0.00705955 0.00705955 0.007059

# 8. Step 7: Initiate Metrics and run all of them to see the results
init metrics.py and use it as the instance of the metrics calculator

In [22]:
from utils.metrics import CalibrationMetrics

probs_faiss = y_test_pred_raw
y_pred_faiss = y_test_pred_faiss
y_true = y_test
probs = y_test_pred_raw
y_pred = y_test_pred_labels_raw
calib_metrics_raw = CalibrationMetrics(probs, y_pred, y_true)
calib_metrics_faiss = CalibrationMetrics(probs_faiss, y_pred_faiss, y_true)



# 8. Step 7: Run Multiple Experiments
Wrap the entire pipeline in a loop to test different augmentation methods and repeat the experiment multiple times.

In [None]:
augmentation_methods = ['rotation', 'shift', 'noise']
results = {}

for method in augmentation_methods:
    print(f"Running experiment with {method} augmentation...")
    augmented_train_images = augment_data(X_train, method=method)
    
    # Train the CNN model
    cnn_model.fit(augmented_train_images, y_train, epochs=10)
    features_val = cnn_model.predict(X_val)
    features_test = cnn_model.predict(X_test)
    
    # Initialize and fit the geometric calibrator
    geo_calibrator = GeometricCalibrator(model=cnn_model, X_train=features_val, y_train=y_val)
    geo_calibrator.fit(X_val, y_val)
    
    # Calibrate the test set
    calibrated_probs = geo_calibrator.calibrate(X_test)
    y_test_pred = np.argmax(calibrated_probs, axis=1)
    
    # Calculate ECE
    ece_value = ECE_calc(y_test_pred, calibrated_probs, y_test)
    results[method] = ece_value
    print(f"{method} ECE: {ece_value}")

# Find the best augmentation method based on ECE
best_method = min(results, key=results.get)
print(f"Best augmentation method: {best_method} with ECE = {results[best_method]}")