In [1]:
import os
import pickle
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import ProcessPoolExecutor, as_completed
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import sys
sys.path.append(str(Path.cwd().parent))

In [2]:
# Directory containing pickle files
directory = Path("../results/classification/quantitative")

# Optionally filter for specific substrings
pkl_files = [f for f in directory.iterdir() if f.suffix == ".pkl"]

# Efficient loader
def load_file(file_path):
    try:
        with open(file_path, "rb") as f:
            data = pickle.load(f)
            if isinstance(data, dict):
                return file_path.name, data
    except (EOFError, pickle.UnpicklingError) as e:
        print(f"Warning: Failed to load {file_path.name}: {e}")
    except Exception as e:
        print(f"Unexpected error with {file_path.name}: {e}")
    return None

# Load files using multithreading (I/O bound)
all_data = {}
with ThreadPoolExecutor() as executor:
    futures = [executor.submit(load_file, f) for f in pkl_files]
    for future in as_completed(futures):
        result = future.result()
        if result:
            filename, data = result
            all_data[filename] = data

files = list(all_data.keys())


In [3]:
records = []

for file, dictionary in all_data.items():
    records.append(dictionary)

# Create the DataFrame
df = pd.DataFrame(records)


# df = df.dropna(
#     subset=[col for col in df.columns if col not in ['file', 'dataset_name', 'method_name', 'missing_type', 'pct', 'random_state']],
#     how='all'
# )


# df.drop(columns=['file'], inplace=True)
print(df.shape)



(9136, 13)


In [4]:
file

'pc1_original_k20_rs671156.pkl'

In [5]:
# Main methods: RF-ICE, Conformity (based on three proximity types), difference in probabilities (independent of proximities)

In [6]:
df.columns

Index(['prox_method', 'conformity_k', 'random_state', 'oob_score_',
       'diff_proba_auc', 'diff_proba_auc_test', 'conformity_auc',
       'conformity_auc_test', 'ice_auc', 'ice_auc_test', 'name', 'n_features',
       'n_samples'],
      dtype='object')

In [7]:
df[['name', 'random_state', 'prox_method', 'ice_auc_test', 'oob_score_']].groupby(['name', 'random_state', 'prox_method']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ice_auc_test,oob_score_
name,random_state,prox_method,Unnamed: 3_level_1,Unnamed: 4_level_1
Bioresponse,54887,oob,0.875623,0.795429
Bioresponse,54887,original,0.875623,0.795429
Bioresponse,54887,rfgap,0.875623,0.795429
Bioresponse,110269,oob,0.871979,0.799619
Bioresponse,110269,original,0.871979,0.799619
...,...,...,...,...
wilt,671156,original,0.997294,0.981990
wilt,671156,rfgap,0.997294,0.981990
wilt,732181,oob,0.990492,0.979037
wilt,732181,original,0.990492,0.979037


In [8]:
df.keys()

Index(['prox_method', 'conformity_k', 'random_state', 'oob_score_',
       'diff_proba_auc', 'diff_proba_auc_test', 'conformity_auc',
       'conformity_auc_test', 'ice_auc', 'ice_auc_test', 'name', 'n_features',
       'n_samples'],
      dtype='object')

In [9]:
df_long = df.melt(id_vars=['name', 'random_state', 'prox_method', 'conformity_k',
                 ],
         value_vars=['ice_auc', 'diff_proba_auc', 'conformity_auc','ice_auc_test', 'diff_proba_auc_test', 'conformity_auc_test'],
         var_name='metric',
         value_name='auc'
         )

In [10]:
df_long

Unnamed: 0,name,random_state,prox_method,conformity_k,metric,auc
0,kc1,671156,rfgap,10,ice_auc,0.948925
1,cnae-9,365839,oob,20,ice_auc,0.974804
2,qsar-biodeg,671156,original,100,ice_auc,0.939296
3,spambase,259179,oob,20,ice_auc,0.974518
4,wdbc,137338,oob,100,ice_auc,0.979574
...,...,...,...,...,...,...
54811,dna,732181,rfgap,500,conformity_auc_test,0.997656
54812,churn,131933,original,200,conformity_auc_test,0.997659
54813,segment,121959,rfgap,200,conformity_auc_test,0.996474
54814,first-order-theorem-proving,137338,original,5,conformity_auc_test,0.891185


In [11]:
df_long

Unnamed: 0,name,random_state,prox_method,conformity_k,metric,auc
0,kc1,671156,rfgap,10,ice_auc,0.948925
1,cnae-9,365839,oob,20,ice_auc,0.974804
2,qsar-biodeg,671156,original,100,ice_auc,0.939296
3,spambase,259179,oob,20,ice_auc,0.974518
4,wdbc,137338,oob,100,ice_auc,0.979574
...,...,...,...,...,...,...
54811,dna,732181,rfgap,500,conformity_auc_test,0.997656
54812,churn,131933,original,200,conformity_auc_test,0.997659
54813,segment,121959,rfgap,200,conformity_auc_test,0.996474
54814,first-order-theorem-proving,137338,original,5,conformity_auc_test,0.891185


In [12]:
df_group = df_long.groupby(['prox_method', 'conformity_k', 'metric'])['auc'].agg(['mean', 'std']).reset_index()


In [13]:
df_group

Unnamed: 0,prox_method,conformity_k,metric,mean,std
0,oob,1,conformity_auc,0.933367,0.098686
1,oob,1,conformity_auc_test,0.935085,0.095599
2,oob,1,diff_proba_auc,0.925066,0.132599
3,oob,1,diff_proba_auc_test,0.928680,0.131200
4,oob,1,ice_auc,0.929255,0.110240
...,...,...,...,...,...
139,rfgap,500,conformity_auc_test,0.976824,0.030789
140,rfgap,500,diff_proba_auc,0.921422,0.097848
141,rfgap,500,diff_proba_auc_test,0.928455,0.088636
142,rfgap,500,ice_auc,0.918767,0.089627


In [14]:
df_group.pivot_table(index=['prox_method', 'conformity_k'], columns='metric', values='mean').reset_index()

metric,prox_method,conformity_k,conformity_auc,conformity_auc_test,diff_proba_auc,diff_proba_auc_test,ice_auc,ice_auc_test
0,oob,1,0.933367,0.935085,0.925066,0.92868,0.929255,0.923646
1,oob,5,0.954189,0.951043,0.925066,0.92868,0.929255,0.923646
2,oob,10,0.961109,0.956693,0.925066,0.92868,0.929255,0.923646
3,oob,20,0.965103,0.960298,0.923843,0.927479,0.928362,0.92253
4,oob,50,0.96613,0.963034,0.918564,0.922264,0.925081,0.91878
5,oob,100,0.978737,0.976025,0.933154,0.936883,0.938159,0.934127
6,oob,200,0.97886,0.976559,0.931577,0.935708,0.935515,0.931892
7,oob,500,0.985214,0.983209,0.935573,0.941055,0.931753,0.935867
8,original,1,0.932874,0.934676,0.923843,0.927479,0.928362,0.92253
9,original,5,0.953649,0.950597,0.923843,0.927479,0.928362,0.92253


In [15]:
from rfgap import RFGAP
rf = RFGAP(n_estimators=100, random_state=42, oob_score=True)

X = np.random.rand(150, 10)  # Example feature matrix
y = np.random.randint(0, 2, size=150)  # Example binary

rf.fit(X, y)

In [16]:
rf.get_oob_conformity(X, y, X[:10], y[:10])

In [17]:
rf.oob_conformity

array([0.74285714, 0.57894737, 0.43902439, 0.62162162, 0.675     ,
       0.63414634, 0.5625    , 0.70588235, 0.53488372, 0.56756757,
       0.55555556, 0.44444444, 0.26666667, 0.38235294, 0.38888889,
       0.5862069 , 0.85365854, 0.41025641, 0.62162162, 0.45833333,
       0.54054054, 0.60526316, 0.61764706, 0.61538462, 0.625     ,
       0.25641026, 0.2       , 0.56097561, 0.33333333, 0.45238095,
       0.52083333, 0.5       , 0.65853659, 0.4516129 , 0.64102564,
       0.51351351, 0.61764706, 0.34210526, 0.55555556, 0.42424242,
       0.6       , 0.79411765, 0.58064516, 0.4       , 0.43478261,
       0.58823529, 0.74285714, 0.7       , 0.325     , 0.7       ,
       0.69444444, 0.74193548, 0.26315789, 0.35714286, 0.625     ,
       0.67567568, 0.71052632, 0.4375    , 0.26666667, 0.45454545,
       0.63636364, 0.28947368, 0.5625    , 0.62222222, 0.44117647,
       0.44736842, 0.6744186 , 0.72972973, 0.71875   , 0.38095238,
       0.43902439, 0.21621622, 0.64102564, 0.23529412, 0.22857

In [18]:
rf.oob_conformity_test

array([0.91, 0.84, 0.18, 0.86, 0.87, 0.85, 0.86, 0.9 , 0.8 , 0.21])