In [1]:
from sklearn.ensemble import RandomForestClassifier
from Geo_cal_utils import GeometricCalibrator,ECE_calc
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from tqdm.notebook import tqdm

In [2]:
X, y = make_classification(
    n_samples=1000,  # row number
    n_features=900, # because our calibrator is working on images the number of features need to simulate image, thus the number we choose should hav square root.
    n_informative=6, # The number of informative features
    n_classes = 2, # The number of classes
    random_state = 42 # random seed 
)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

In [4]:
model = RandomForestClassifier().fit(X_train,y_train)

In [5]:
y_pred_test = model.predict(X_test)
y_test_probs = model.predict_proba(X_test)
accuracy_score(y_test, y_pred_test)

0.83

In [6]:
# Fast separation calibration
GeoCalibrator = GeometricCalibrator(model, X_train, y_train)
GeoCalibrator.fit(X_val, y_val)
calibrated_prob_Geo = GeoCalibrator.calibrate(X_test)

100%|██████████| 200/200 [00:00<00:00, 571.49it/s]
100%|██████████| 200/200 [00:00<00:00, 627.09it/s]


In [7]:
# Fast separation calibration -compressed
GeoCalibrator_compressed = GeometricCalibrator(model, X_train, y_train, comprasion_mode='Maxpool',
                                               compression_param=2)
GeoCalibrator_compressed.fit(X_val, y_val)
calibrated_prob_GeoCompressed = GeoCalibrator_compressed.calibrate(X_test)

100%|██████████| 200/200 [00:00<00:00, 1019.53it/s]
100%|██████████| 200/200 [00:00<00:00, 1035.58it/s]


In [8]:
# After Calibration
print(f'Geometric Calibration Fast separation ECE: \t{(ECE_calc(calibrated_prob_Geo, y_pred_test, y_test)):.4f}')
print(f'Geometric Calibration Fast separation ECE: \t{(ECE_calc(calibrated_prob_GeoCompressed, y_pred_test, y_test)):.4f}')
print(f'No Calibration ECE: \t{(ECE_calc(y_test_probs, y_pred_test, y_test)):.4f}')

Geometric Calibration Fast separation ECE: 	0.0369
Geometric Calibration Fast separation ECE: 	0.0548
No Calibration ECE: 	0.2369
