<a href="https://colab.research.google.com/github/Krishil-Jayswal/Conformal-Prediction/blob/master/benchmark_scores.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Benchmarking

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import time

In [2]:
np.random.seed(50)

### Tube Loss

In [3]:
def train_tube_model_conformal(X_train, y_train, X_cal, y_cal, X_test, y_test, q=0.90, r=0.5, delta=0, alpha=0.1, verbose=False):

    # 1.
    y_train = y_train.reshape(-1)
    y_cal = y_cal.reshape(-1)
    y_test = y_test.reshape(-1)

    y_train_tb = np.stack((y_train, y_train), axis=1)
    y_cal_tb = np.stack((y_cal, y_cal), axis=1)
    y_test_tb = np.stack((y_test, y_test), axis=1)

    def confidence_loss(y_true, y_pred):
        y_true = y_true[:, 0]
        f1 = y_pred[:, 0]  # Lower
        f2 = y_pred[:, 1]  # Upper

        c1 = (1 - q) * (f2 - y_true)
        c2 = (1 - q) * (y_true - f1)
        c3 = q * (f1 - y_true)
        c4 = q * (y_true - f2)

        loss_part1 = tf.where(y_true > r * (f1 + f2), c1, c2)
        loss_part2 = tf.where(f1 > y_true, c3, c4)

        final_loss = tf.where(tf.logical_and(y_true <= f2, y_true >= f1), loss_part1, loss_part2) + (delta * tf.abs(f1 - f2))
        return tf.reduce_mean(final_loss)

    # Build model
    model = Sequential()
    model.add(Dense(200, input_dim=X_train.shape[1], activation='relu',
                    kernel_initializer=tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.2)))
    model.add(Dense(2, activation='linear',
                    kernel_initializer=tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.3),
                    bias_initializer=tf.keras.initializers.Constant(value=[-3, 3])))

    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=0.02,
        decay_steps=10000,
        decay_rate=0.01)

    opt = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
    model.compile(loss=confidence_loss, optimizer=opt)

    # Train model
    start_time = time.time()
    model.fit(X_train, y_train_tb, epochs=400, batch_size=40, verbose=0)
    training_time = time.time() - start_time

    # Predict on calibration and test
    y_cal_pred = model.predict(X_cal, verbose=0)
    y_test_pred = model.predict(X_test, verbose=0)

    q_lo_cal = y_cal_pred[:, 0]
    q_hi_cal = y_cal_pred[:, 1]
    q_lo_test = y_test_pred[:, 0]
    q_hi_test = y_test_pred[:, 1]

    # Conformal adjustment
    scores = np.maximum(q_lo_cal - y_cal, y_cal - q_hi_cal)
    q_hat = np.quantile(scores, 1 - alpha, method='higher')

    lower_bound = q_lo_test - q_hat
    upper_bound = q_hi_test + q_hat

    # Evaluation
    coverage = np.mean((y_test >= lower_bound) & (y_test <= upper_bound))
    mpiw = np.mean(upper_bound - lower_bound)

    if verbose:
        print(f"Training time: {training_time:.2f} seconds")
        print(f"PICP (conformal): {coverage:.4f}")
        print(f"MPIW (conformal): {mpiw:.4f}")

    return training_time, coverage, mpiw

### CQR

In [4]:
def train_two_model_cqr_conformal(X_train, y_train, X_cal, y_cal, X_test, y_test, q_low=0.1, q_high=0.9, alpha=0.1, verbose=False):

    # 1.
    y_train = y_train.reshape(-1)
    y_test = y_test.reshape(-1)
    y_cal = y_cal.reshape(-1)

    def quantile_loss(q):
        def loss(y_true, y_pred):
            e = y_true - y_pred
            return tf.reduce_mean(tf.maximum(q * e, (q - 1) * e))
        return loss

    def build_model(input_dim):
        return Sequential([
            Dense(200, input_dim=input_dim, activation='relu'),
            Dense(1, activation='linear')
        ])

    # Separate optimizers for each model
    opt_lower = tf.keras.optimizers.Adam(learning_rate=0.01)
    opt_upper = tf.keras.optimizers.Adam(learning_rate=0.01)

    model_lo = build_model(X_train.shape[1])
    model_hi = build_model(X_train.shape[1])

    model_lo.compile(loss=quantile_loss(q_low), optimizer=opt_lower)
    model_hi.compile(loss=quantile_loss(q_high), optimizer=opt_upper)

    # 2. Train both models
    start_time = time.time()
    model_lo.fit(X_train, y_train, epochs=400, batch_size=40, verbose=0)
    model_hi.fit(X_train, y_train, epochs=400, batch_size=40, verbose=0)
    training_time = time.time() - start_time

    # 3. Predict quantiles on calibration and test sets
    def predict_quantiles(model, X):
        return model.predict(X, verbose=0).reshape(-1)

    q_lo_cal = predict_quantiles(model_lo, X_cal)
    q_hi_cal = predict_quantiles(model_hi, X_cal)
    q_lo_test = predict_quantiles(model_lo, X_test)
    q_hi_test = predict_quantiles(model_hi, X_test)

    # 4. Conformal adjustment
    scores = np.maximum(q_lo_cal - y_cal, y_cal - q_hi_cal)
    q_hat = np.quantile(scores, 1 - alpha, method='higher')

    lower_bound = q_lo_test - q_hat
    upper_bound = q_hi_test + q_hat

    # 5. Evaluation metrics
    coverage = np.mean((y_test >= lower_bound) & (y_test <= upper_bound))
    mpiw = np.mean(upper_bound - lower_bound)

    if verbose:
        print(f"Training time: {training_time:.2f} seconds")
        print(f"PICP (conformal): {coverage:.4f}")
        print(f"MPIW (conformal): {mpiw:.4f}")

    return training_time, coverage, mpiw

### 1. Concrete Dataset

In [5]:
concrete_df = pd.read_csv("https://raw.githubusercontent.com/Krishil-Jayswal/Conformal-Prediction/refs/heads/master/datasets/Concrete_Data.csv")

In [9]:
X = concrete_df.iloc[:, :-1]
y = concrete_df.iloc[:, -1:].values

In [10]:
# Scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split into train, cal, test
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_cal, X_test, y_cal, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [11]:
print("Tubeloss Scores:")
training_time, picp, mpiw = train_tube_model_conformal(X_train, y_train, X_cal, y_cal, X_test, y_test, verbose=True)
print("CQR Scores:")
training_time, picp, mpiw = train_two_model_cqr_conformal(X_train, y_train, X_cal, y_cal, X_test, y_test, q_low=0.05, q_high=0.95, verbose=True)

Tubeloss Scores:


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training time: 47.83 seconds
PICP (conformal): 0.8835
MPIW (conformal): 19.6368
CQR Scores:
Training time: 82.26 seconds
PICP (conformal): 0.8544
MPIW (conformal): 15.5231


### 2. Bike Dataset

In [27]:
df=pd.read_csv("https://raw.githubusercontent.com/Krishil-Jayswal/Conformal-Prediction/refs/heads/master/datasets/bike_train.csv")

# # seperating season as per values. this is bcoz this will enhance features.
season=pd.get_dummies(df['season'],prefix='season')
df=pd.concat([df,season],axis=1)

# # # same for weather. this is bcoz this will enhance features.
weather=pd.get_dummies(df['weather'],prefix='weather')
df=pd.concat([df,weather],axis=1)

# # # now can drop weather and season.
df.drop(['season','weather'],inplace=True,axis=1)
df.head()

df["hour"] = [t.hour for t in pd.DatetimeIndex(df.datetime)]
df["day"] = [t.dayofweek for t in pd.DatetimeIndex(df.datetime)]
df["month"] = [t.month for t in pd.DatetimeIndex(df.datetime)]
df['year'] = [t.year for t in pd.DatetimeIndex(df.datetime)]
df['year'] = df['year'].map({2011:0, 2012:1})

df.drop('datetime',axis=1,inplace=True)
df.drop(['casual','registered'],axis=1,inplace=True)
df.columns.to_series().groupby(df.dtypes).groups
X = df.drop('count', axis=1).astype(float).values
y = df['count'].values

In [28]:
# Scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split into train, cal, test
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_cal, X_test, y_cal, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [None]:
print("Tubeloss Scores:")
training_time, picp, mpiw = train_tube_model_conformal(X_train, y_train, X_cal, y_cal, X_test, y_test, verbose=True)
print("CQR Scores:")
training_time, picp, mpiw = train_two_model_cqr_conformal(X_train, y_train, X_cal, y_cal, X_test, y_test, q_low=0.05, q_high=0.95, verbose=True)

### 3. Star Dataset

In [16]:
df = pd.read_csv("https://raw.githubusercontent.com/Krishil-Jayswal/Conformal-Prediction/refs/heads/master/datasets/STAR.csv")
df.loc[df['gender'] == 'female', 'gender'] = 0
df.loc[df['gender'] == 'male', 'gender'] = 1

df.loc[df['ethnicity'] == 'cauc', 'ethnicity'] = 0
df.loc[df['ethnicity'] == 'afam', 'ethnicity'] = 1
df.loc[df['ethnicity'] == 'asian', 'ethnicity'] = 2
df.loc[df['ethnicity'] == 'hispanic', 'ethnicity'] = 3
df.loc[df['ethnicity'] == 'amindian', 'ethnicity'] = 4
df.loc[df['ethnicity'] == 'other', 'ethnicity'] = 5

df.loc[df['stark'] == 'regular', 'stark'] = 0
df.loc[df['stark'] == 'small', 'stark'] = 1
df.loc[df['stark'] == 'regular+aide', 'stark'] = 2

df.loc[df['star1'] == 'regular', 'star1'] = 0
df.loc[df['star1'] == 'small', 'star1'] = 1
df.loc[df['star1'] == 'regular+aide', 'star1'] = 2

df.loc[df['star2'] == 'regular', 'star2'] = 0
df.loc[df['star2'] == 'small', 'star2'] = 1
df.loc[df['star2'] == 'regular+aide', 'star2'] = 2

df.loc[df['star3'] == 'regular', 'star3'] = 0
df.loc[df['star3'] == 'small', 'star3'] = 1
df.loc[df['star3'] == 'regular+aide', 'star3'] = 2

df.loc[df['lunchk'] == 'free', 'lunchk'] = 0
df.loc[df['lunchk'] == 'non-free', 'lunchk'] = 1

df.loc[df['lunch1'] == 'free', 'lunch1'] = 0
df.loc[df['lunch1'] == 'non-free', 'lunch1'] = 1

df.loc[df['lunch2'] == 'free', 'lunch2'] = 0
df.loc[df['lunch2'] == 'non-free', 'lunch2'] = 1

df.loc[df['lunch3'] == 'free', 'lunch3'] = 0
df.loc[df['lunch3'] == 'non-free', 'lunch3'] = 1

df.loc[df['schoolk'] == 'inner-city', 'schoolk'] = 0
df.loc[df['schoolk'] == 'suburban', 'schoolk'] = 1
df.loc[df['schoolk'] == 'rural', 'schoolk'] = 2
df.loc[df['schoolk'] == 'urban', 'schoolk'] = 3

df.loc[df['school1'] == 'inner-city', 'school1'] = 0
df.loc[df['school1'] == 'suburban', 'school1'] = 1
df.loc[df['school1'] == 'rural', 'school1'] = 2
df.loc[df['school1'] == 'urban', 'school1'] = 3

df.loc[df['school2'] == 'inner-city', 'school2'] = 0
df.loc[df['school2'] == 'suburban', 'school2'] = 1
df.loc[df['school2'] == 'rural', 'school2'] = 2
df.loc[df['school2'] == 'urban', 'school2'] = 3

df.loc[df['school3'] == 'inner-city', 'school3'] = 0
df.loc[df['school3'] == 'suburban', 'school3'] = 1
df.loc[df['school3'] == 'rural', 'school3'] = 2
df.loc[df['school3'] == 'urban', 'school3'] = 3

df.loc[df['degreek'] == 'bachelor', 'degreek'] = 0
df.loc[df['degreek'] == 'master', 'degreek'] = 1
df.loc[df['degreek'] == 'specialist', 'degreek'] = 2
df.loc[df['degreek'] == 'master+', 'degreek'] = 3

df.loc[df['degree1'] == 'bachelor', 'degree1'] = 0
df.loc[df['degree1'] == 'master', 'degree1'] = 1
df.loc[df['degree1'] == 'specialist', 'degree1'] = 2
df.loc[df['degree1'] == 'phd', 'degree1'] = 3

df.loc[df['degree2'] == 'bachelor', 'degree2'] = 0
df.loc[df['degree2'] == 'master', 'degree2'] = 1
df.loc[df['degree2'] == 'specialist', 'degree2'] = 2
df.loc[df['degree2'] == 'phd', 'degree2'] = 3

df.loc[df['degree3'] == 'bachelor', 'degree3'] = 0
df.loc[df['degree3'] == 'master', 'degree3'] = 1
df.loc[df['degree3'] == 'specialist', 'degree3'] = 2
df.loc[df['degree3'] == 'phd', 'degree3'] = 3

df.loc[df['ladderk'] == 'level1', 'ladderk'] = 0
df.loc[df['ladderk'] == 'level2', 'ladderk'] = 1
df.loc[df['ladderk'] == 'level3', 'ladderk'] = 2
df.loc[df['ladderk'] == 'apprentice', 'ladderk'] = 3
df.loc[df['ladderk'] == 'probation', 'ladderk'] = 4
df.loc[df['ladderk'] == 'pending', 'ladderk'] = 5
df.loc[df['ladderk'] == 'notladder', 'ladderk'] = 6


df.loc[df['ladder1'] == 'level1', 'ladder1'] = 0
df.loc[df['ladder1'] == 'level2', 'ladder1'] = 1
df.loc[df['ladder1'] == 'level3', 'ladder1'] = 2
df.loc[df['ladder1'] == 'apprentice', 'ladder1'] = 3
df.loc[df['ladder1'] == 'probation', 'ladder1'] = 4
df.loc[df['ladder1'] == 'noladder', 'ladder1'] = 5
df.loc[df['ladder1'] == 'notladder', 'ladder1'] = 6

df.loc[df['ladder2'] == 'level1', 'ladder2'] = 0
df.loc[df['ladder2'] == 'level2', 'ladder2'] = 1
df.loc[df['ladder2'] == 'level3', 'ladder2'] = 2
df.loc[df['ladder2'] == 'apprentice', 'ladder2'] = 3
df.loc[df['ladder2'] == 'probation', 'ladder2'] = 4
df.loc[df['ladder2'] == 'noladder', 'ladder2'] = 5
df.loc[df['ladder2'] == 'notladder', 'ladder2'] = 6

df.loc[df['ladder3'] == 'level1', 'ladder3'] = 0
df.loc[df['ladder3'] == 'level2', 'ladder3'] = 1
df.loc[df['ladder3'] == 'level3', 'ladder3'] = 2
df.loc[df['ladder3'] == 'apprentice', 'ladder3'] = 3
df.loc[df['ladder3'] == 'probation', 'ladder3'] = 4
df.loc[df['ladder3'] == 'noladder', 'ladder3'] = 5
df.loc[df['ladder3'] == 'notladder', 'ladder3'] = 6

df.loc[df['tethnicityk'] == 'cauc', 'tethnicityk'] = 0
df.loc[df['tethnicityk'] == 'afam', 'tethnicityk'] = 1

df.loc[df['tethnicity1'] == 'cauc', 'tethnicity1'] = 0
df.loc[df['tethnicity1'] == 'afam', 'tethnicity1'] = 1

df.loc[df['tethnicity2'] == 'cauc', 'tethnicity2'] = 0
df.loc[df['tethnicity2'] == 'afam', 'tethnicity2'] = 1

df.loc[df['tethnicity3'] == 'cauc', 'tethnicity3'] = 0
df.loc[df['tethnicity3'] == 'afam', 'tethnicity3'] = 1
df.loc[df['tethnicity3'] == 'asian', 'tethnicity3'] = 2

df = df.dropna()

grade = df["readk"] + df["read1"] + df["read2"] + df["read3"]
grade += df["mathk"] + df["math1"] + df["math2"] + df["math3"]


names = df.columns
target_names = names[8:16]
data_names = np.concatenate((names[0:8],names[17:]))
X = df.loc[:, data_names].values
y = grade.values

In [17]:
# Scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split into train, cal, test
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_cal, X_test, y_cal, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [18]:
print("Tubeloss Scores:")
training_time, picp, mpiw = train_tube_model_conformal(X_train, y_train, X_cal, y_cal, X_test, y_test, verbose=True)
print("CQR Scores:")
training_time, picp, mpiw = train_two_model_cqr_conformal(X_train, y_train, X_cal, y_cal, X_test, y_test, q_low=0.05, q_high=0.95, verbose=True)

Tubeloss Scores:


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training time: 56.13 seconds
PICP (conformal): 0.9076
MPIW (conformal): 2042.3408
CQR Scores:
Training time: 109.63 seconds
PICP (conformal): 0.9215
MPIW (conformal): 997.5884


### 4. Community Dataset

In [21]:
attrib = pd.read_csv("https://raw.githubusercontent.com/Krishil-Jayswal/Conformal-Prediction/refs/heads/master/datasets/communities_attributes.csv", delim_whitespace = True)
data = pd.read_csv("https://raw.githubusercontent.com/Krishil-Jayswal/Conformal-Prediction/refs/heads/master/datasets/communities.data", names = attrib['attributes'])
data = data.drop(columns=['state','county',
                'community','communityname',
                'fold'], axis=1)

from sklearn.impute import SimpleImputer

# Convert '?' to NaN
data = data.replace('?', np.nan).astype(float)

# Impute 'OtherPerCap' with mean
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
data[['OtherPerCap']] = imputer.fit_transform(data[['OtherPerCap']])

# Drop any remaining columns with NaN values
data = data.dropna(axis=1)

# Split into features and target
X = data.iloc[:, 0:100].values
y = data.iloc[:, 100].values


  attrib = pd.read_csv("https://raw.githubusercontent.com/Krishil-Jayswal/Conformal-Prediction/refs/heads/master/datasets/communities_attributes.csv", delim_whitespace = True)


In [22]:
# Scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split into train, cal, test
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_cal, X_test, y_cal, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [23]:
print("Tubeloss Scores:")
training_time, picp, mpiw = train_tube_model_conformal(X_train, y_train, X_cal, y_cal, X_test, y_test, verbose=True)
print("CQR Scores:")
training_time, picp, mpiw = train_two_model_cqr_conformal(X_train, y_train, X_cal, y_cal, X_test, y_test, q_low=0.05, q_high=0.95, verbose=True)

Tubeloss Scores:


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training time: 53.23 seconds
PICP (conformal): 0.8997
MPIW (conformal): 2.8906
CQR Scores:
Training time: 100.79 seconds
PICP (conformal): 0.8722
MPIW (conformal): 0.3682


### 5. Facebook Dataset

In [24]:
df = pd.read_csv("https://raw.githubusercontent.com/Krishil-Jayswal/Conformal-Prediction/refs/heads/master/datasets/facebook_v1.csv")
X = df.iloc[:,0:53].values
y = df.iloc[:,53].values

In [25]:
# Scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split into train, cal, test
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_cal, X_test, y_cal, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [26]:
print("Tubeloss Scores:")
training_time, picp, mpiw = train_tube_model_conformal(X_train, y_train, X_cal, y_cal, X_test, y_test, verbose=True)
print("CQR Scores:")
training_time, picp, mpiw = train_two_model_cqr_conformal(X_train, y_train, X_cal, y_cal, X_test, y_test, q_low=0.05, q_high=0.95, verbose=True)

Tubeloss Scores:


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training time: 547.06 seconds
PICP (conformal): 0.9034
MPIW (conformal): 186.3247
CQR Scores:
Training time: 1051.28 seconds
PICP (conformal): 0.9233
MPIW (conformal): 15.9237
