In [1]:
from sklearn.ensemble import RandomForestClassifier
from Geo_cal_utils import GeometricCalibrator,ECE_calc
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from tqdm.notebook import tqdm

In [2]:
X, y = make_classification(
    n_samples=1000,  # row number
    n_features=900, # because our calibrator is working on images the number of features need to simulate image, thus the number we choose should hav square root.
    n_informative=6, # The number of informative features
    n_classes = 2, # The number of classes
    random_state = 42 # random seed 
)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

In [4]:
model = RandomForestClassifier().fit(X_train,y_train)

In [5]:
y_pred_test = model.predict(X_test)
y_test_probs = model.predict_proba(X_test)
accuracy_score(y_test, y_pred_test)

0.825

In [6]:
GeoCalibratorFS = GeometricCalibrator(model,X_train,y_train,method="Fast Seperation", comprasion_mode='Maxpool', comprassion_param=2 )
GeoCalibratorFS.fit(X_val,y_val)
calibrated_prob_FS = GeoCalibratorFS.calibrate(X_test)

100%|██████████| 200/200 [00:00<00:00, 1327.61it/s]
100%|██████████| 200/200 [00:00<00:00, 1503.43it/s]


In [7]:
GeoCalibratorS = GeometricCalibrator(model,X_train,y_train,method="Seperation")
GeoCalibratorS.fit(X_val,y_val)
calibrated_prob_S = GeoCalibratorS.calibrate(X_test)

200it [00:04, 41.36it/s]
200it [00:04, 46.28it/s]


In [8]:
# After Calibration
ECE_calc(calibrated_prob_FS,y_pred_test,y_test)

0.06554554752708829

In [9]:
# After Calibration
ECE_calc(calibrated_prob_S,y_pred_test,y_test)

0.0638130663181575

In [10]:
# before Calibration
ECE_calc(y_test_probs,y_pred_test,y_test)

0.22850000000000012