In [4]:
cd ..

C:\Users\elema\gitEli\A-Geometric-Method-for-Improved-Uncertainty-Estimation-in-Real-time\Experiments


In [5]:
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

from tqdm.notebook import tqdm
from calibrators import *
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.calibration import CalibratedClassifierCV
from utils import ECE_calc
from sklearn.preprocessing import RobustScaler
from sklearn import preprocessing
import xgboost as xgb
from utils import stability_calc,mean_confidence_interval_str,sep_calc

  from pandas import MultiIndex, Int64Index


In [6]:
from ModelLoader import *

In [7]:
import warnings
warnings.filterwarnings('ignore')

In [8]:
def color_max(s):
    numbers = []
    for i in s:
        if isinstance(i,str) and len(i)>1:
            numbers.append(i)
        else:
            numbers.append(np.inf)
    numbers = np.array(numbers)
    is_max = numbers == min(numbers)
    return ['background-color: darkgreen' if v else '' for v in is_max]

In [9]:
import scipy
def mean_confidence_interval(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n - 1)
    return f'{m:.4f}±{h:.4f}'

In [10]:
def percentage_format(x):
    if isinstance(x,str) and len(x)>1:
        a,b = x.split('±')
        a=float(a)*100
        b=float(b)*100
        return f'{format(a, ".2f")}±{format(b, ".2f")}'
    return x

In [11]:
model_names = ['RF','GB','XGB']
# methods = ['Stab','Sep','SKlearn_platt', 'SKlearn_iso', 'HB', 'Stab+HB', 'Sep+HB', 'SBC', 'Stab+SBC', 'Sep+SBC']
methods = ['Stab','SKlearn_platt', 'SKlearn_iso', 'HB', 'SBC']

In [31]:
d = {f'WINE-{model}':{method:[] for method in methods} for model in model_names}
models = [RandomForestClassifier(),GradientBoostingClassifier(),xgb.XGBClassifier(eval_metric='mlogloss',use_label_encoder=False)]

df=pd.read_csv('./WINE/winequality-red.csv')
X = np.log1p(df.drop(['quality','free sulfur dioxide', 'pH', 'residual sugar'], axis=1))
y = df['quality']

#Preprocessing
le = preprocessing.LabelEncoder()
y = le.fit_transform(y)


for random_state in tqdm(range(100)):
    
    X_train_v, X_test, y_train_v , y_test = train_test_split(X, y, test_size=0.2, random_state=random_state)
    X_train, X_val, y_train, y_val = train_test_split(X_train_v, y_train_v, test_size=0.25, random_state=random_state)
    scaler = RobustScaler()
    x_train = scaler.fit_transform(X_train)
    x_val = scaler.transform(X_val)
    x_test = scaler.transform(X_test)
    num_labels = len(np.unique(y_train))
    
    for model,model_name in zip(models,model_names):
        clf = model
        clf.fit(x_train, y_train)

        probs_test = clf.predict_proba(x_test)
        y_pred_test = clf.predict(x_test)
        probs_val = clf.predict_proba(x_val)
        y_pred_val = clf.predict(x_val)
        
        corrects = y_pred_val==y_val
        
        stab_test = stability_calc(x_train, x_test, y_train, y_pred_test, num_labels, metric='l2')
        stab_val = stability_calc(x_train, x_val, y_train, y_pred_val, num_labels, metric='l2')
        
        # sep_val = sep_calc(x_train, x_val, y_train, y_pred_val, norm = 'L1')
        # sep_test = sep_calc(x_train, x_test, y_train, y_pred_test, norm = 'L1')
        
        #calibration Stage:
        ########################################################################################################
        #Stability Calibration
        stabCal = StabilityCalibrator()
        stabCal.fit(stab_val, corrects)
        ECE_Stab = stabCal.ECE(stab_test, y_pred_test, y_test)  
        d[f'WINE-{model_name}']['Stab'].append(ECE_Stab)
        
        # #Separation Calibration
        # sepCal = SeparationCalibrator()
        # sepCal.fit(sep_val, corrects)
        # ECE_Sep = sepCal.ECE(sep_test, y_pred_test, y_test)
        # d[f'WINE-{model_name}']['Sep'].append(ECE_Sep)
        ########################################################################################################

        #SKlearn-isotonic
        sklearn_platt = CalibratedClassifierCV(base_estimator=clf, cv="prefit", method='isotonic')
        sklearn_platt.fit(x_val, y_val)
        calibrated_probs_test = sklearn_platt.predict_proba(x_test)
        calibrated_y_pred_test = sklearn_platt.predict(x_test)
        ECE_iso = ECE_calc(calibrated_probs_test,calibrated_y_pred_test,y_test)
        d[f'WINE-{model_name}']['SKlearn_iso'].append(ECE_iso)

        #SKlearn-platt
        sklearn_platt = CalibratedClassifierCV(base_estimator=clf, cv="prefit", method='sigmoid')
        sklearn_platt.fit(x_val, y_val)
        calibrated_probs_test = sklearn_platt.predict_proba(x_test)
        calibrated_y_pred_test = sklearn_platt.predict(x_test)
        ECE_platt = ECE_calc(calibrated_probs_test,calibrated_y_pred_test,y_test)
        d[f'WINE-{model_name}']['SKlearn_platt'].append(ECE_platt)
        
        ########################################################################################################
        
        # HB
        HBcalibrator = HBCalibrator()
        HBcalibrator.fit(probs_val, y_val + 1)
        HB_test_calibrated = HBcalibrator.calibrate(probs_test)
        ECE_HB = ECE_calc(HB_test_calibrated, y_pred_test, y_test)
        d[f'WINE-{model_name}']['HB'].append(ECE_HB)
        
        # #stab+HB(our implementation)
        # stabHB = StabilityHistogramBinningCalibrator()
        # stabHB.fit(stab_val,corrects)
        # stabHB_test_calibrated =stabHB.calibrate(stab_test)
        # ECE_stabHB = ECE_calc(stabHB_test_calibrated, y_pred_test, y_test)
        # d[f'WINE-{model_name}']['Stab+HB'].append(ECE_stabHB)
        
        #Sep+HB
        # sepHB = SeparationHistogramBinningCalibrator()
        # sepHB.fit(sep_val,corrects)
        # sepHB_test_calibrated =sepHB.calibrate(sep_test)
        # ECE_sepHB = ECE_calc(sepHB_test_calibrated, y_pred_test, y_test)
        # d[f'WINE-{model_name}']['Sep+HB'].append(ECE_sepHB)
        
        ########################################################################################################
        
        #SBC - making problems when we dont have all labels of Dataset in y_val
        try:
            #SBC
            SBCcalibrator = SBCCalibrator()
            SBCcalibrator.fit(probs_val, y_val)
            probs_calibrated = SBCcalibrator.calibrate(probs_test)
            pred_y_test_calibrated = np.argmax(probs_calibrated, axis=1)
            ECE_SBC = ECE_calc(probs_calibrated, pred_y_test_calibrated, y_test)
            d[f'WINE-{model_name}']['SBC'].append(ECE_SBC)
            
            #stab+SBC
#             stab_SBCtop_calibrator = stab_SBC_Calibrator()
#             stab_SBCtop_calibrator.fit(stab_val ,probs_val, corrects)
#             calibratedTOP_test = stab_SBCtop_calibrator.calibrate(stab_test)
#             ECE_stabSBC = ECE_calc(calibratedTOP_test, y_pred_test, y_test)
#             d[f'WINE-{model_name}']['Stab+SBC'].append(ECE_stabSBC)
            
#             #Sep+SBC
#             stab_SBCtop_calibrator = stab_SBC_Calibrator()
#             stab_SBCtop_calibrator.fit(sep_val ,probs_val, corrects)
#             calibratedTOP_test = stab_SBCtop_calibrator.calibrate(sep_test)
#             ECE_sepSBC = ECE_calc(calibratedTOP_test, y_pred_test, y_test)
#             d[f'WINE-{model_name}']['Sep+SBC'].append(ECE_sepSBC)
        
        except BaseException as error:
                print('An exception occurred: {}'.format(error))

  8%|█▎               | 8/100 [00:26<05:07,  3.34s/it]

An exception occurred: 
An exception occurred: 


  9%|█▌               | 9/100 [00:29<05:02,  3.32s/it]

An exception occurred: 


 11%|█▊              | 11/100 [00:36<04:55,  3.32s/it]

An exception occurred: 
An exception occurred: 


 12%|█▉              | 12/100 [00:39<04:53,  3.33s/it]

An exception occurred: 


 15%|██▍             | 15/100 [00:50<04:44,  3.35s/it]

An exception occurred: 
An exception occurred: 


 16%|██▌             | 16/100 [00:53<04:35,  3.28s/it]

An exception occurred: 


 20%|███▏            | 20/100 [01:06<04:28,  3.35s/it]

An exception occurred: 
An exception occurred: 


 21%|███▎            | 21/100 [01:09<04:23,  3.33s/it]

An exception occurred: 


 28%|████▍           | 28/100 [01:33<03:58,  3.32s/it]

An exception occurred: 
An exception occurred: 


 29%|████▋           | 29/100 [01:36<03:52,  3.28s/it]

An exception occurred: 


 35%|█████▌          | 35/100 [01:56<03:39,  3.38s/it]

An exception occurred: 
An exception occurred: 


 36%|█████▊          | 36/100 [01:59<03:33,  3.34s/it]

An exception occurred: 


 49%|███████▊        | 49/100 [02:44<02:51,  3.37s/it]

An exception occurred: 
An exception occurred: 


 50%|████████        | 50/100 [02:47<02:46,  3.33s/it]

An exception occurred: 


 64%|██████████▏     | 64/100 [03:34<02:01,  3.38s/it]

An exception occurred: 
An exception occurred: 


 65%|██████████▍     | 65/100 [03:38<01:57,  3.35s/it]

An exception occurred: 


 80%|████████████▊   | 80/100 [04:27<01:03,  3.20s/it]

An exception occurred: 
An exception occurred: 


 81%|████████████▉   | 81/100 [04:30<00:57,  3.03s/it]

An exception occurred: 
An exception occurred: 
An exception occurred: 


 82%|█████████████   | 82/100 [04:32<00:51,  2.89s/it]

An exception occurred: 
An exception occurred: 
An exception occurred: 


 83%|█████████████▎  | 83/100 [04:35<00:49,  2.89s/it]

An exception occurred: 


 99%|███████████████▊| 99/100 [05:18<00:02,  2.65s/it]

An exception occurred: 
An exception occurred: 


100%|███████████████| 100/100 [05:20<00:00,  3.21s/it]

An exception occurred: 





In [32]:
np.save('.\Saved_computations\Wine_cali_dict_l2.npy', d) 

In [7]:
Wine_cali_dict = np.load('.\Saved_computations\Wine_cali_dict_l2.npy',allow_pickle='TRUE').item()

In [8]:
for model, method in Wine_cali_dict.items():
    for method, scores in method.items():
        Wine_cali_dict[model][method] = np.apply_along_axis(mean_confidence_interval_str,0,scores).item() 

In [9]:
df = pd.DataFrame(Wine_cali_dict).T

In [10]:
df

Unnamed: 0,Stab,SKlearn_platt,SKlearn_iso,HB,SBC
WINE-RF,0.0643±0.0043,0.0683±0.0036,0.0653±0.0035,0.0649±0.0044,0.0688±0.0039
WINE-GB,0.0690±0.0040,0.0743±0.0044,0.0663±0.0037,0.0709±0.0050,0.0706±0.0040
WINE-XGB,0.0673±0.0042,0.0734±0.0044,0.0748±0.0040,0.0738±0.0044,0.0689±0.0033


In [11]:
def non_zero_format(x):
    if x!='-':
        a,b = x.split('±')
        if a[:2]=='0.':
            a=a[1:]
        if b[:2]=='0.':
            b=b[1:]
        return f'{a}±{b}'
    return x

def percentage_format(x):
    if isinstance(x,str) and len(x)>1:
        a,b = x.split('±')
        a=float(a)*100
        b=float(b)*100
        return f'{format(a, ".2f")}±{format(b, ".2f")}'
    return x

def color_max(s):
    numbers = []
    for i in s:
        if isinstance(i,str) and len(i)>1:
            numbers.append(i)
        else:
            numbers.append(np.inf)
    numbers = np.array(numbers)
    is_max = numbers == min(numbers)
    return ['background-color: darkgreen' if v else '' for v in is_max]

In [12]:
df =df.applymap(percentage_format)
df = df.applymap(non_zero_format)

In [13]:
df.style.apply(color_max, axis=1)

Unnamed: 0,Stab,SKlearn_platt,SKlearn_iso,HB,SBC
WINE-RF,6.43±.43,6.83±.36,6.53±.35,6.49±.44,6.88±.39
WINE-GB,6.90±.40,7.43±.44,6.63±.37,7.09±.50,7.06±.40
WINE-XGB,6.73±.42,7.34±.44,7.48±.40,7.38±.44,6.89±.33


In [14]:
cols = df.columns
rows = df.index

In [16]:
for c in cols[1:]:
    for r in rows:
        if df[c][r]!='-':
            a = float(str.split(df[c][r],'±')[0])
            b = float(str.split(df[cols[0]][r],'±')[0])
            ans = (a-b)/a*100
            ans = round(ans,1)
            ans_str = '('+str(ans)+'%)'
            # print(df.at[r,c]+ans_str)
            df.at[r,c] = df.at[r,c]+ans_str

In [17]:
df.style.apply(color_max, axis=1)

Unnamed: 0,Stab,SKlearn_platt,SKlearn_iso,HB,SBC
WINE-RF,6.43±.43,6.83±.36(5.9%),6.53±.35(1.5%),6.49±.44(0.9%),6.88±.39(6.5%)
WINE-GB,6.90±.40,7.43±.44(7.1%),6.63±.37(-4.1%),7.09±.50(2.7%),7.06±.40(2.3%)
WINE-XGB,6.73±.42,7.34±.44(8.3%),7.48±.40(10.0%),7.38±.44(8.8%),6.89±.33(2.3%)
