In [None]:
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
import pickle
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.metrics import brier_score_loss
from sklearn.preprocessing import StandardScaler
import scorecardpy as sc
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from nonconformist.cp import IcpClassifier
from nonconformist.nc import ClassifierNc
from nonconformist.nc import MarginErrFunc
from nonconformist.nc import InverseProbabilityErrFunc
from nonconformist.base import ClassifierAdapter
from nonconformist.nc import ClassificationErrFunc
from Helper_Functions_AL_imbalance import no_al, benchmark_ri, random_al, icp, icp_prob, icp_nnmargin, data_prep

In [None]:
# Pandas Version 1.3.5 or lower is required by scorecardpy
pd.__version__

'1.3.5'

In [None]:
# Placeholders for dataframes
starters=[0,1,2,3,4]
AL=[0,1,2,3,4,5,6,7,8,9]

In [None]:
starters[0] = pd.read_csv('start_0.csv')
starters[1] = pd.read_csv('start_1.csv')
starters[2] = pd.read_csv('start_2.csv')
starters[3] = pd.read_csv('start_3.csv')
starters[4] = pd.read_csv('LC_start.csv')

starters = starters + starters

AL[0] = pd.read_csv('AL_original_0.csv')
AL[1] = pd.read_csv('AL_original_1.csv')
AL[2] = pd.read_csv('AL_original_2.csv')
AL[3] = pd.read_csv('AL_original_3.csv')
AL[4] = pd.read_csv('LC_accepted.csv')
AL[5] = pd.read_csv('AL_MC_0.csv')
AL[6] = pd.read_csv('AL_MC_1.csv')
AL[7] = pd.read_csv('AL_MC_2.csv')
AL[8] = pd.read_csv('AL_MC_3.csv')
AL[9] = pd.read_csv('LC_all.csv')

In [None]:
# The non-conformist library requires error functions to be classes with only the predicted probabilities and the true 
# outcomes as inputs. For this reason, other inputs have to be defined outside the class, which requires the class to 
# be defined in the same notebook as the input, here the respective threshold.

class InverseProbabilityErrFunc_cost(ClassificationErrFunc):

    def __init__(self):
        super(InverseProbabilityErrFunc_cost, self).__init__()
    def apply(self, prediction, y):
        prob = np.zeros(y.size, dtype=np.float32)
        for i, y_ in enumerate(y):
            if y_ >= prediction.shape[1]:
                prob[i] = 0
            else:
                prob[i] = prediction[i, int(y_)]
        # give FP a smaller weight than FN
        return (1-y)*(1-prob)*(threshold) + y*(1-prob)*(1-threshold)
    
    
    
    
class NearestNeighbourMargin_cost(ClassificationErrFunc):

    def __init__(self):
        super(NearestNeighbourMargin_cost, self).__init__()

    def apply(self, prediction, y):
        prob = np.zeros(y.size, dtype=np.float32)
        ratios=[]
        for i, y_ in enumerate(y):
            if y_ >= prediction.shape[1]:
                prob[i] = 0
            else:        
                prob[i] = prediction[i, int(y_)]
                # distance to nearest neighbour with different class
                diff_neigh= np.min(np.absolute(prob[i] - prediction[:,int(1-y_)]))
                # distance to nearest neughbour with same class
                same_neigh= np.min(np.absolute(prob[i] - prediction[:,int(y_)]))
                ratio = diff_neigh / same_neigh
                ratios.append(ratio)
        # give FP a smaller weight than FN
        return np.array(ratios)*(1-y)*(threshold) + np.array(ratios)*y*(1-threshold)
    

# Cost Versions of the Conformal Prediction Strategies    
def icp_prob_cost(start, start_trans, test, test_trans_, threshold_, cost_matrix_, cost_matrix_test_):
    return icp(start, start_trans, test, test_trans_, threshold_, cost_matrix_, cost_matrix_test_, InverseProbabilityErrFunc_cost())


def icp_nnmargin_cost(start, start_trans, test, test_trans_, threshold_, cost_matrix_, cost_matrix_test_):
    return icp(start, start_trans, test, test_trans_, threshold_, cost_matrix_, cost_matrix_test_, NearestNeighbourMargin_cost())




In [None]:
# List of strategies
strategies=[no_al, benchmark_ri, random_al, icp_prob, icp_prob_cost, icp_nnmargin, icp_nnmargin_cost]

In [None]:
# List of names of each dataset, required for each cost function
names=['Small', 'German', 'Deloitte', 'Large', 'LC']
names = names + names

In [None]:
# AL experiment. For each dataset, perform each strategy for each split of the 9 test set splits and save all 
# performance metrics and the threshold .

performance_df = []
for start, test, name in zip(starters,AL,names):
    
    # Shuffle the Dataframe and convert it to an array
    df_al_array = test.copy().sample(frac=1, random_state=888).values

    # Split the array into parts of specified size
    splits = np.array_split(df_al_array, 9)

    # Convert each split back to a DataFrame and store it in a list
    df_al_splits = [pd.DataFrame(split, columns=test.columns).astype(test.dtypes.to_dict()) for split in splits]
    
    performance_str=[]
    
    for strategy in strategies:
        start_str = start.copy()
        performance_round=pd.DataFrame(columns=['AUC', 'PAUC', 'BS', 'Cost','Threshold'])
        for round_ in df_al_splits:
          # Prepare data
            start_trans, round_trans, cost_matrix, threshold, cost_matrix_round = data_prep(start_str, round_, name)
            # Conduct strategy
            start_str, metrics = strategy(start_str, start_trans,round_, round_trans, threshold, cost_matrix, cost_matrix_round)
            # Evaluate strategy
        performance_str.append(performance_round)
    performance_df.append(performance_str)

[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning 

[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning 

[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning 

[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning 

Binning on 3138 rows and 5 columns in 00:00:17
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
Binning on 6114 rows and 5 columns in 00:00:26
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
Binning on 8979 rows and 5 columns in 00:00:34
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
Binning on 11879 rows and 5 columns in 00:00:41
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
Binning on 14751 rows and 5 columns in 00:00:52
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
Binning on 17654 rows and 5 columns in 00:01:04
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
Binning on 20565 rows and 5 columns in 00:01:15
[INFO] converting i

[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe v

[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe v

[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe v

Binning on 3138 rows and 5 columns in 00:00:17
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
Binning on 23934 rows and 5 columns in 00:00:23
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
Binning on 30882 rows and 5 columns in 00:00:25
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
Binning on 36863 rows and 5 columns in 00:00:26
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
Binning on 42489 rows and 5 columns in 00:00:27
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
Binning on 47886 rows and 5 columns in 00:00:28
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
Binning on 53006 rows and 5 columns in 00:00:29
[INFO] converting

[INFO] converting into woe values ...
[INFO] creating woe binning ...
Binning on 22431 rows and 5 columns in 00:00:27
[INFO] converting into woe values ...
[INFO] converting into woe values ...


In [None]:
# Save thresholds seperately.
thresholds = [[df['Threshold'] for df in dfs] for dfs in performance_df]
performance_df = [[df.drop(columns=['Threshold']) for df in dfs] for dfs in performance_df]

In [None]:
with open('thresholds_imbalance.pickle', 'wb') as f:
    pickle.dump(thresholds, f)

In [None]:
with open('results_imbalance.pickle', 'wb') as f:
    pickle.dump(performance_df, f)

In [None]:
# Run the same experiment, but with the first 8 rounds concatenated together.

performance_df = []
for start, test, name in zip(starters,AL,names):
    
    # Shuffle the Dataframe and convert it to an array
    df_al_array = test.copy().sample(frac=1, random_state=888).values

    # Split the array into parts of specified size
    splits = np.array_split(df_al_array, 9)

    # Convert each split back to a DataFrame and store it in a list
    df_al_splits = [pd.DataFrame(split, columns=test.columns).astype(test.dtypes.to_dict()) for split in splits]
    
    concatenated_df = pd.concat(df_al_splits[:8], axis=0, ignore_index=True)
    df_al_splits=[concatenated_df, df_al_splits[8]]
    
    performance_str=[]
    
    for strategy in strategies:
        start_str = start.copy()
        performance_round=pd.DataFrame(columns=['AUC', 'PAUC', 'BS', 'Cost', 'Threshold'])
        for round_ in df_al_splits:
          # Prepare data
            start_trans, round_trans, cost_matrix, threshold, cost_matrix_round = data_prep(start_str, round_, name)
            # Conduct strategy
            start_str, metrics = strategy(start_str, start_trans,round_, round_trans, threshold, cost_matrix, cost_matrix_round)
            # Evaluate strategy
        performance_str.append(performance_round)
    performance_df.append(performance_str)

[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning 

[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning ...
[INFO] converting into woe values ...
[INFO] converting into woe values ...
[INFO] creating woe binning 

[INFO] converting into woe values ...


In [None]:
# Drop thresholds
performance_df = [[df.drop(columns=['Threshold']) for df in dfs] for dfs in performance_df]

In [None]:
with open('results_no_rounds_imbalance.pickle', 'wb') as f:
    pickle.dump(performance_df, f)