In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm

In [20]:
%run ../shared_functions.py
%run ../my_shared_functions.py

DIR_INPUT = '../../fraud-detection-handbook/simulated-data-transformed/data/'
END_DATE = "2018-09-14"

print("Load  files")
%time transactions_df=read_from_files(DIR_INPUT, "2018-06-11", END_DATE)
print("{0} transactions loaded, containing {1} fraudulent transactions".format(len(transactions_df),
                                                                    transactions_df.TX_FRAUD.sum()))

output_feature="TX_FRAUD"
input_features=['TX_AMOUNT','TX_DURING_WEEKEND', 'TX_DURING_NIGHT', 'CUSTOMER_ID_NB_TX_1DAY_WINDOW',
       'CUSTOMER_ID_AVG_AMOUNT_1DAY_WINDOW', 'CUSTOMER_ID_NB_TX_7DAY_WINDOW',
       'CUSTOMER_ID_AVG_AMOUNT_7DAY_WINDOW', 'CUSTOMER_ID_NB_TX_30DAY_WINDOW',
       'CUSTOMER_ID_AVG_AMOUNT_30DAY_WINDOW', 'TERMINAL_ID_NB_TX_1DAY_WINDOW',
       'TERMINAL_ID_RISK_1DAY_WINDOW', 'TERMINAL_ID_NB_TX_7DAY_WINDOW',
       'TERMINAL_ID_RISK_7DAY_WINDOW', 'TERMINAL_ID_NB_TX_30DAY_WINDOW',
       'TERMINAL_ID_RISK_30DAY_WINDOW']

BEGIN_DATE = "2018-08-08"
start_date_training = datetime.datetime.strptime(BEGIN_DATE, "%Y-%m-%d")
delta_train=7
delta_delay=7
delta_test=7
delta_valid = delta_test

(train_df, valid_df)=get_train_test_set(transactions_df,start_date_training,
                            delta_train=delta_train,delta_delay=delta_delay,delta_test=delta_test)

SEQ_LEN = 5

# By default, scales input data
(train_df, valid_df)=scaleData(train_df, valid_df,input_features)

Load  files
CPU times: total: 531 ms
Wall time: 533 ms
919767 transactions loaded, containing 8195 fraudulent transactions


In [22]:
models = [
    { # 0
        'model_name': 'cnn',
        'model_type': 'handbook'
    },
    { # 1
        'model_name': 'lstm',
        'model_type': 'handbook'
    },
    { # 2
        'model_name': 'lstm_attention',
        'model_type': 'handbook'
    },
    { # 3
        'model_name': 'cnn_hypertuned',
        'model_type': 'handbook'    
    },
    { # 4
        'model_name': 'lstm_hypertuned',
        'model_type': 'handbook'
    },
    { # 5
        'model_name': 'lstm_attention_hypertuned',
        'model_type': 'handbook'
    },
    { # 6
        'model_name': 'lstm_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 7
        'model_name': 'fcn_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 8
        'model_name': 'gmlp_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 9
        'model_name': 'gru_fcn_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 10
        'model_name': 'gru_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 11
        'model_name': 'inceptiontime_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 12
        'model_name': 'lstm_fcn_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 13
        'model_name': 'mlstm_fcn_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 14
        'model_name': 'omniscalecnn_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 15
        'model_name': 'rescnn_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 16
        'model_name': 'resnet_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 17
        'model_name': 'tsit_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 18
        'model_name': 'tst_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 19
        'model_name': 'xcm_fit_one_cycle',
        'model_type': 'tsai'
    }
]

In [23]:
# https://github.com/sbobek/inxai/blob/main/inxai/global_metrics.py

def stability(X, all_importances, epsilon=3,perturber=None, perturber_strategy='mean', dissimilarity='euclidean', confidence=None):
        """Stability as Lipschitz coefficient.

        :param X:
        :param all_importances:
        :param epsilon:
        :return:
        """
        l_values = []

        if  not isinstance(all_importances, np.ndarray):
            all_importances = np.array(all_importances)

        if confidence is None:
            confidence = np.ones(all_importances.shape[0])

        for data_idx, (_, observation) in enumerate(X.iterrows()):
            max_val = 0
            for idx, (_, other_observation) in enumerate(X.iterrows()):
                dist = np.linalg.norm(observation - other_observation)
                if dist < epsilon:
                    l_val = np.linalg.norm(
                        pd.core.series.Series(all_importances[data_idx]) - pd.core.series.Series(
                            all_importances[idx])) / (dist+1) * confidence[data_idx]
                    if l_val > max_val:
                        max_val = l_val
            #if max_val:
            l_values.append(1/(max_val+1))
        return l_values

In [53]:
stability_scores = []

for model in tqdm(models, total=len(models)):
    outliers_indices = np.load(f'generator_output/outliers_indices/{model["model_type"]}/{model["model_name"]}.npy')
    X = valid_df.loc[valid_df.reset_index(drop=True).index.isin(outliers_indices), input_features]

    flattened_aas = np.load(f'generator_output/outliers_attribute_scores/{model["model_type"]}/synthetic/{model["model_name"]}.npy')
    aggregated_aas = np.array([np.sum(outlier_aas, axis=0) for outlier_aas in flattened_aas.reshape(flattened_aas.shape[0], SEQ_LEN, len(input_features))])
    
    flattened_aas_stability = stability(X, flattened_aas)
    aggregated_aas_stability = stability(X, aggregated_aas)

    sequential_shap_values = np.load(f'generator_output/outliers_shap_values/deep_explainer/reduced_background_data/{model["model_type"]}/{model["model_name"]}.npy')
    aggregated_shap_values = np.array([np.sum(shap_values, axis=1) for shap_values in sequential_shap_values])
    flattened_shap_values = sequential_shap_values.transpose(0, 2, 1).reshape(sequential_shap_values.shape[0], -1)

    aggregated_shap_values_stability = stability(X, aggregated_shap_values)
    flattened_shap_values_stability = stability(X, flattened_shap_values)

    stability_scores.append([model['model_name'], model['model_type'], flattened_aas_stability, aggregated_aas_stability, flattened_shap_values_stability,
                             aggregated_shap_values_stability])
    
stability_scores_df = pd.DataFrame(np.array(stability_scores), columns=['model_name', 'model_type', 'flat_aas_stability', 'agg_aas_stability',
                                                                        'flat_shap_stability', 'agg_shap_stability'])

100%|██████████| 20/20 [23:35<00:00, 70.75s/it]


~23min to compute

In [54]:
stability_scores_df.to_csv('stability_scores.csv', index=False)

In [71]:
stability_scores_df = pd.read_csv('stability_scores.csv')
stability_scores_df['flat_aas_stability'] = stability_scores_df['flat_aas_stability'].apply(eval).tolist()
stability_scores_df['agg_aas_stability'] = stability_scores_df['agg_aas_stability'].apply(eval).tolist()
stability_scores_df['flat_shap_stability'] = stability_scores_df['flat_shap_stability'].apply(eval).tolist()
stability_scores_df['agg_shap_stability'] = stability_scores_df['agg_shap_stability'].apply(eval).tolist()
stability_scores_df

Unnamed: 0,model_name,model_type,flat_aas_stability,agg_aas_stability,flat_shap_stability,agg_shap_stability
0,cnn,handbook,"[0.9635748277516275, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.9545989834994387, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.9481488150551541, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.9398659731590328, 1.0, 1.0, 1.0, 1.0, 1.0, ..."
1,lstm,handbook,"[0.964520094862425, 1.0, 0.9294356975335197, 1...","[0.9574143135091029, 1.0, 0.9427059717052554, ...","[0.8851913261428459, 1.0, 0.8859802600432258, ...","[0.8113876844316619, 1.0, 0.8130525380806803, ..."
2,lstm_attention,handbook,"[0.9625599845716911, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.9558564078507928, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.9318100112447267, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.8907423706769079, 1.0, 1.0, 1.0, 1.0, 1.0, ..."
3,cnn_hypertuned,handbook,"[1.0, 1.0, 1.0, 1.0, 1.0, 0.9635308046613902, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 0.9450847886994633, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 0.9538728293575137, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 0.9435752284141529, ..."
4,lstm_hypertuned,handbook,"[0.9651290544317966, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.9588469931498252, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.9299283774673414, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.8980027102783341, 1.0, 1.0, 1.0, 1.0, 1.0, ..."
5,lstm_attention_hypertuned,handbook,"[0.9644756001473717, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.9593917988015757, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.9418606856472364, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.9148829469551575, 1.0, 1.0, 1.0, 1.0, 1.0, ..."
6,lstm_fit_one_cycle,tsai,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.96110988...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.94905926...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.76017843...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.70677464..."
7,fcn_fit_one_cycle,tsai,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9664093264979...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9518141143705...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9559736260578...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9545704307105..."
8,gmlp_fit_one_cycle,tsai,"[0.9689918254279015, 1.0, 0.8604172937130693, ...","[0.9592692475148868, 1.0, 0.8783855880266562, ...","[0.7697894116868191, 1.0, 0.8324757242795218, ...","[0.6652327732205582, 1.0, 0.7272928002416332, ..."
9,gru_fcn_fit_one_cycle,tsai,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.95542673...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.94687669...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.92711083...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.91413434..."


In [75]:
stability_method_columns = ['flat_aas_stability', 'agg_aas_stability', 'flat_shap_stability', 'agg_shap_stability']
stability_scores_describe_list = []
for row_i, row in stability_scores_df.iterrows():
    stability_scores_describe = []
    for col in stability_method_columns:
        stability_scores_describe.extend([np.array(row[col]).mean(), np.array(row[col]).std()])
    stability_scores_describe_list.append(stability_scores_describe)
    
stability_scores_stats_columns = np.array([[method+'_mean', method+'_std'] for \
                                                               method in stability_method_columns]).flatten()
stability_scores_stats_df = pd.DataFrame(stability_scores_describe_list, columns=stability_scores_stats_columns)
stability_scores_stats_df['model_name'] = stability_scores_df['model_name']
stability_scores_stats_df['model_type'] = stability_scores_df['model_type']
stability_scores_stats_df = stability_scores_stats_df[['model_name', 'model_type'] + stability_scores_stats_columns.tolist()]
stability_scores_stats_df

Unnamed: 0,model_name,model_type,flat_aas_stability_mean,flat_aas_stability_std,agg_aas_stability_mean,agg_aas_stability_std,flat_shap_stability_mean,flat_shap_stability_std,agg_shap_stability_mean,agg_shap_stability_std
0,cnn,handbook,0.978976,0.021147,0.977467,0.022957,0.956598,0.055005,0.962086,0.040407
1,lstm,handbook,0.976722,0.022189,0.975308,0.022957,0.951768,0.048146,0.927298,0.074136
2,lstm_attention,handbook,0.974324,0.021799,0.971606,0.024364,0.958567,0.036853,0.945832,0.051111
3,cnn_hypertuned,handbook,0.979204,0.022194,0.975814,0.025577,0.957091,0.04646,0.954082,0.050979
4,lstm_hypertuned,handbook,0.974894,0.022751,0.973374,0.023675,0.961826,0.035664,0.950603,0.047698
5,lstm_attention_hypertuned,handbook,0.976794,0.021385,0.974352,0.023613,0.967017,0.031482,0.95854,0.040088
6,lstm_fit_one_cycle,tsai,0.976996,0.022447,0.97506,0.024021,0.861697,0.129011,0.830833,0.159693
7,fcn_fit_one_cycle,tsai,0.979084,0.022473,0.977004,0.024819,0.980971,0.024815,0.982129,0.020262
8,gmlp_fit_one_cycle,tsai,0.973452,0.025197,0.971123,0.025702,0.864649,0.109789,0.818106,0.149683
9,gru_fcn_fit_one_cycle,tsai,0.977842,0.021225,0.975072,0.023861,0.957495,0.039199,0.955372,0.0395


In [76]:
stability_scores_stats_df.to_csv('stability_scores_stats.csv', index=False)

In [2]:
stability_scores_stats_df = pd.read_csv('stability_scores_stats.csv')

In [3]:
stability_scores_stats_df.describe()

Unnamed: 0,flat_aas_stability_mean,flat_aas_stability_std,agg_aas_stability_mean,agg_aas_stability_std,flat_shap_stability_mean,flat_shap_stability_std,agg_shap_stability_mean,agg_shap_stability_std
count,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0
mean,0.976442,0.022456,0.974553,0.024143,0.914755,0.079711,0.904011,0.089115
std,0.002612,0.001268,0.002468,0.000914,0.084998,0.078995,0.093506,0.086729
min,0.97043,0.020864,0.969195,0.022739,0.612297,0.024815,0.584429,0.020262
25%,0.974751,0.021528,0.973161,0.023567,0.880328,0.035586,0.871018,0.039941
50%,0.97706,0.022191,0.975066,0.024107,0.954792,0.049589,0.948217,0.051045
75%,0.978617,0.022792,0.976604,0.024685,0.961494,0.10959,0.958829,0.119819
max,0.979964,0.025197,0.977696,0.025769,0.980971,0.373011,0.982129,0.398584


In [5]:
stability_scores_stats_df.describe().to_csv('stability_scores_stats_describe.csv', index=True)