NDCG (Normalized Discounted Cumulative Gain): calculate Discounted Cumulative Gain for a ranking and normalize by diving by Ideal Discounted Cumulative Gain (DCG for perfect ranking order)

In [1]:
import numpy as np
from sklearn.metrics import ndcg_score
import pandas as pd

### NDCG using ground-truth ranking

In [2]:
input_features=['TX_AMOUNT','TX_DURING_WEEKEND', 'TX_DURING_NIGHT', 'CUSTOMER_ID_NB_TX_1DAY_WINDOW',
       'CUSTOMER_ID_AVG_AMOUNT_1DAY_WINDOW', 'CUSTOMER_ID_NB_TX_7DAY_WINDOW',
       'CUSTOMER_ID_AVG_AMOUNT_7DAY_WINDOW', 'CUSTOMER_ID_NB_TX_30DAY_WINDOW',
       'CUSTOMER_ID_AVG_AMOUNT_30DAY_WINDOW', 'TERMINAL_ID_NB_TX_1DAY_WINDOW',
       'TERMINAL_ID_RISK_1DAY_WINDOW', 'TERMINAL_ID_NB_TX_7DAY_WINDOW',
       'TERMINAL_ID_RISK_7DAY_WINDOW', 'TERMINAL_ID_NB_TX_30DAY_WINDOW',
       'TERMINAL_ID_RISK_30DAY_WINDOW']

SEQ_LEN = 5

models = [
    { # 0
        'model_name': 'cnn',
        'model_type': 'handbook'
    },
    { # 1
        'model_name': 'lstm',
        'model_type': 'handbook'
    },
    { # 2
        'model_name': 'lstm_attention',
        'model_type': 'handbook'
    },
    { # 3
        'model_name': 'cnn_hypertuned',
        'model_type': 'handbook'    
    },
    { # 4
        'model_name': 'lstm_hypertuned',
        'model_type': 'handbook'
    },
    { # 5
        'model_name': 'lstm_attention_hypertuned',
        'model_type': 'handbook'
    },
    { # 6
        'model_name': 'lstm_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 7
        'model_name': 'fcn_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 8
        'model_name': 'gmlp_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 9
        'model_name': 'gru_fcn_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 10
        'model_name': 'gru_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 11
        'model_name': 'inceptiontime_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 12
        'model_name': 'lstm_fcn_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 13
        'model_name': 'mlstm_fcn_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 14
        'model_name': 'omniscalecnn_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 15
        'model_name': 'rescnn_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 16
        'model_name': 'resnet_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 17
        'model_name': 'tsit_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 18
        'model_name': 'tst_fit_one_cycle',
        'model_type': 'tsai'
    },
    { # 19
        'model_name': 'xcm_fit_one_cycle',
        'model_type': 'tsai'
    }
]

In [3]:
pd.read_csv('ground_truth_feature_ranking.csv')

Unnamed: 0,FEATURE_NAME,FEATURE_CORRELATION,INPUT_FEATURE_INDEX
0,TERMINAL_ID_RISK_1DAY_WINDOW,0.331542,10
1,TERMINAL_ID_RISK_7DAY_WINDOW,0.250657,12
2,TERMINAL_ID_RISK_30DAY_WINDOW,0.133213,14
3,TX_AMOUNT,0.030936,0
4,CUSTOMER_ID_AVG_AMOUNT_1DAY_WINDOW,0.027232,4
5,CUSTOMER_ID_AVG_AMOUNT_7DAY_WINDOW,0.020854,6
6,CUSTOMER_ID_AVG_AMOUNT_30DAY_WINDOW,0.012846,8
7,TERMINAL_ID_NB_TX_30DAY_WINDOW,0.008227,13
8,TERMINAL_ID_NB_TX_7DAY_WINDOW,0.00772,11
9,TERMINAL_ID_NB_TX_1DAY_WINDOW,0.002931,9


In [5]:
ground_truth_feature_idx_ranking = pd.read_csv('ground_truth_feature_ranking.csv')['INPUT_FEATURE_INDEX'].values.tolist()
ndcg_scores = []
for model in models:
    outliers_aas = np.load(f'generator_output/outliers_attribute_scores/{model["model_type"]}/synthetic/{model["model_name"]}.npy')
    aggregated_aas = np.array([np.sum(outlier_aas, axis=0) for outlier_aas in outliers_aas.reshape(outliers_aas.shape[0], SEQ_LEN, len(input_features))])
    aas_df = pd.DataFrame({'FEATURE_NAME': input_features, 
    'AAS': np.mean(aggregated_aas, axis=0)}).sort_values(by='AAS', ascending=False)
    aas_all_ndcg_result = ndcg_score([ground_truth_feature_idx_ranking], [aas_df.index.values.tolist()])
    # TERMINAL_ID_RISK, TX_AMOUNT and CUSTOMER_ID_AVG_AMOUNT are the features which are directly impacted during fraud scenario creation
    # these features occupy top 7 positions in the ground truth feature ranking, that's why we only consider top 7 positions for "top" ndcg score calculation
    aas_top_ndcg_result = ndcg_score([ground_truth_feature_idx_ranking], [aas_df.index.values.tolist()], k=7)
    
    outliers_shap_values = np.load(f'generator_output/outliers_shap_values/deep_explainer/reduced_background_data/{model["model_type"]}/{model["model_name"]}.npy')
    aggregated_shap = np.array([np.sum(shap_values, axis=1) for shap_values in outliers_shap_values])
    shap_df = pd.DataFrame({'FEATURE_NAME': input_features, 
    'SHAP_VALUE': np.mean(aggregated_shap, axis=0)}).sort_values(by='SHAP_VALUE', ascending=False)
    shap_all_ndcg_result = ndcg_score([ground_truth_feature_idx_ranking], [shap_df.index.values.tolist()])
    shap_top_ndcg_result = ndcg_score([ground_truth_feature_idx_ranking], [shap_df.index.values.tolist()], k=7)

    ndcg_scores.append([model['model_name'], model['model_type'], aas_all_ndcg_result, aas_top_ndcg_result, shap_all_ndcg_result, shap_top_ndcg_result])
ndcg_scores_df = pd.DataFrame(np.array(ndcg_scores), columns=['model_name', 'model_type', 'aas_all_ndcg', 'aas_top7_ndcg',
                                                              'shap_all_ndcg', 'shap_top7_ndcg'])
ndcg_scores_df

Unnamed: 0,model_name,model_type,aas_all_ndcg,aas_top7_ndcg,shap_all_ndcg,shap_top7_ndcg
0,cnn,handbook,0.9233846105393264,0.8013284384907852,0.7854364774895732,0.6327168930129353
1,lstm,handbook,0.9162660942372374,0.8019585506732424,0.81469657503556,0.5601868779385555
2,lstm_attention,handbook,0.7395241333511757,0.4860460770682512,0.7937225523222607,0.6220573984882423
3,cnn_hypertuned,handbook,0.8907848826903969,0.747372244557738,0.7856050998737352,0.6214239134599044
4,lstm_hypertuned,handbook,0.9275954350684372,0.8048180516301517,0.9332902230260716,0.8009997059241046
5,lstm_attention_hypertuned,handbook,0.9346930962136272,0.7975100927847381,0.7251372470403751,0.4784199330652947
6,lstm_fit_one_cycle,tsai,0.9146726117943158,0.7642141575173071,0.8010947750314047,0.5746531935001431
7,fcn_fit_one_cycle,tsai,0.9405438133370648,0.8282479067110364,0.7166199266039969,0.4178753607948102
8,gmlp_fit_one_cycle,tsai,0.9326626225680894,0.7698829451250141,0.730810022900012,0.4188712031417272
9,gru_fcn_fit_one_cycle,tsai,0.931663715466138,0.8048180516301517,0.8941358092377202,0.7904136794540111


In [6]:
ndcg_scores_df.to_csv('ndcg_scores.csv', index=False)

In [2]:
ndcg_scores_df = pd.read_csv('ndcg_scores.csv', dtype={'aas_all_ndcg': float, 'aas_top7_ndcg': float, 'shap_all_ndcg': float,
                                                       'shap_top7_ndcg': float})

In [3]:
ndcg_scores_df[['aas_all_ndcg', 'aas_top7_ndcg', 'shap_all_ndcg', 'shap_top7_ndcg']].describe()

Unnamed: 0,aas_all_ndcg,aas_top7_ndcg,shap_all_ndcg,shap_top7_ndcg
count,20.0,20.0,20.0,20.0
mean,0.897703,0.749526,0.793193,0.584452
std,0.059519,0.102418,0.061316,0.122543
min,0.739524,0.486046,0.705013,0.355079
25%,0.863529,0.656502,0.742501,0.535795
50%,0.92549,0.795259,0.789664,0.60036
75%,0.935987,0.804818,0.816462,0.651646
max,0.964591,0.890593,0.93329,0.801


In [5]:
ndcg_scores_df[['aas_all_ndcg', 'aas_top7_ndcg', 'shap_all_ndcg', 'shap_top7_ndcg']].describe().to_csv(
    'ndcg_scores_descriptive_stats.csv', index=True
)