**PROJECT:** "Fraud prevention and detection"

**GROUP N°:** 8 - *MAPA, Connecting Ideas*

**MEMBERS:** Mariana Uro - Pablo Tamayo

**OBJECTIVE:** The project’s objetive is predict frauds over distance of financial transactions​.

**DESCRIPTION OF THE VARIABLES:**

    distance_from_home: the distance from home to where the transaction took place
    distance_from_last_transaction: the distance from the last transaction 
    ratio_to_median_purchase_price: relationship between of the purchase Price and the average purchase price
    repeat_retailer: the transaction from the retailer.
    used_chip: is the transaction that was made using chip (credit card).
    used_pin_number: is the transaction that was made using PIN number.
    online_order: the transaction is an online order.
    fraud: ¿Is the transaction a fraud?

## SETUP DEL PROYECTO

**IMPORTACIÓN DE LIBRERÍAS**

In [None]:
# Numeric, scientific and data science libreries
#============================================================================================================
import pandas as pd
import numpy as np
from statsmodels.stats.outliers_influence import variance_inflation_factor
import sidetable
from statistics import mean, stdev
from scipy.stats import gmean

#Visualization
#============================================================================================================
import matplotlib.pyplot as plt
import seaborn as sns
import missingno as msno

# Sklearn imports
#============================================================================================================

# Data Preprocessing
from sklearn import preprocessing
from sklearn.preprocessing import quantile_transform
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import PowerTransformer

# Data preparation for training
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold
from sklearn.model_selection import learning_curve

# Evaluation metrics and visualization
from sklearn.metrics import recall_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_curve, auc

# Models from Sklearn and Boosting
#----------------------------------------------------------------------------------------
#Tree Model
from sklearn.tree import DecisionTreeClassifier

#Lineal model
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import RidgeClassifier

#Neigbhbors model
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import NearestCentroid 

#Ensemble model
from sklearn. ensemble import StackingClassifier
from sklearn. ensemble import RandomForestClassifier
 
#Neural model
import sklearn.neural_network

#Boosting model
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
#----------------------------------------------------------------------------------------

#Feature Importance
from sklearn.inspection import permutation_importance

#Optimization
#============================================================================================================
import optuna

#Time
#============================================================================================================
from time import time

In [None]:
# Import everything from the utils file
#============================================================================================================
from utils import *

**IMPORTACIÓN Y ANÁLISIS DEL DATASET**

In [None]:
# Importamos el dataset
#============================================================================================================

df_fraude_creditcard = pd.read_csv("card_transdata.csv")
df_fraude_creditcard.head()

In [None]:
# Calculamos el tamaño del dataset
#============================================================================================================

df_fraude_creditcard.shape

In [None]:
# Verificamos la existencia de datos "Nan" gráficamente.
#============================================================================================================

msno.matrix(df_fraude_creditcard)

In [None]:
# Realizamos un análisis de las principales medidas estadísticas 
#============================================================================================================

df_fraude_creditcard.describe().round(2)

# Análisis de correlación de las variables

In [None]:
# Definimos un dataframe que muestra las correlaciones a través del método "corr"
#============================================================================================================

df_corr = df_fraude_creditcard.corr()
df_corr

In [None]:
# Realizamos un heatmap utilizando seaborn.
#=========================================================================================================================

plt.figure(figsize=(20,15))
cor = df_corr
sns.heatmap(cor, annot=True, cmap="bwr_r" , linewidths=.4)
plt.show()


In [None]:
# Analizamos el sesgo de las variables.
#============================================================================================================

df_fraude_creditcard.skew()

# MACHINE LEARNING

Se entrenaron distintos modelos de clasificación, tomando como referencia el modelo Logistic Regression:

<ul>
<li>MODELO LINEAL</li> 
  <ul>
  <li>Logistic Regression
  <li>Ridge Classifier
  </ul>
<li> MODELO TREE </li>
  <ul>
  <li> Decision Tree Classifier
  </ul>
<li> MODELO ENSEMBLE</li>
  <ul>
  <li>Random Forest Classifier
  <li>XGBoost Classifier 
  <li>Cat Boost Classifier
  <li>LightBoost Classifier
  </ul>

<li>MODELO NEIGHBORS</li>
  <ul>
  <li>KNeighbors Classifier
  <li>Nearest Centroid
  </ul>
</ul>

Para la ejecución de cada modelo, se ulizó el StratifiedKFold y luego se reliazó la Optimización Bayesiana. Además, para analizar la perfomance de cada modelo se aplicó la metrica Recall, ya que tuvo los mejores scores en comparación a la Presicion y F1.
 

<li>	Finalmente, en base a los resultados obtenidos, se seleccionó a los mejores modelos para construir un modelo ensamblado, utilizando stacking.
</ol>

In [None]:
# Define the X features dataframe
#=============================================================================================================================

X = df_fraude_creditcard.iloc[:, :-1].copy(deep=True)
X.head()

In [None]:
# Verify the symmetries
#=============================================================================================================================

X.skew()

In [None]:
#Initialize the transformers
#=============================================================================================================================

nrm_transformer = Normalizer()
pow_transformer = PowerTransformer()

In [None]:
#Initialize the instances for the transformations: because of having the information to biased, we did 3 concatenated transformations.
#=============================================================================================================================

X = quantile_transform(X)
X = nrm_transformer.fit_transform(X)
X = pow_transformer.fit_transform(X)

In [None]:
# Final X Dataframe
#=============================================================================================================================

X = pd.DataFrame(X)
X.head(10)

In [None]:
# Verify the symmetries again
#=============================================================================================================================

X.skew()

In [None]:
# Define the target variable
#=============================================================================================================================

y = df_fraude_creditcard["fraud"].copy(deep=True)
y

In [None]:
# Initialize the models
#=============================================================================================================================

clf = DecisionTreeClassifier()
rfc = RandomForestClassifier()

bst = XGBClassifier()
ctb = CatBoostClassifier()
lgh = LGBMClassifier()

knn = KNeighborsClassifier()
knc = NearestCentroid()


lgr = LogisticRegression()
rdc = RidgeClassifier()


### StratifiedKFold

In [None]:
#Precarga de Datos para entrenamiento y modelado

X = df_fraude_creditcard.iloc[:, :-1].copy(deep=True)
nrm_transformer = Normalizer()
pow_transformer = PowerTransformer()

X = quantile_transform(X)
X = nrm_transformer.fit_transform(X)
X = pow_transformer.fit_transform(X)
y = df_fraude_creditcard["fraud"].copy(deep=True)

In [None]:
#Ejecucion de Stratified

skf = StratifiedKFold(n_splits=5,random_state=0, shuffle=True)
skf.get_n_splits(X, y)

In [None]:
print(skf)


In [None]:
#Aplicacion Basica del Modelo

for train_index, test_index in skf.split(X, y):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]    

In [None]:
#Aplicacion de la validacion para uno de los tres modelos

# Feature Scaling for input features.
scaler = preprocessing.MinMaxScaler()
x_scaled = scaler.fit_transform(X)

# Decision Tree Classifier

In [None]:
lst_metric_stratified_clf  = []

for train_index, test_index in skf.split(X, y):
    x_train_fold, x_test_fold = x_scaled[train_index], x_scaled[test_index]
    y_train_fold, y_test_fold = y[train_index], y[test_index]
    clf.fit(x_train_fold, y_train_fold)
    lst_metric_stratified_clf.append(recall_score(y_test, clf.predict(X_test)))

lst_metric_stratified_clf

# Random Forest

In [None]:
lst_metric_stratified_rfc  = []

for train_index, test_index in skf.split(X, y):
    x_train_fold, x_test_fold = x_scaled[train_index], x_scaled[test_index]
    y_train_fold, y_test_fold = y[train_index], y[test_index]
    rfc.fit(x_train_fold, y_train_fold)
    lst_metric_stratified_rfc.append(recall_score(y_test, rfc.predict(X_test)))

lst_metric_stratified_rfc

# XGBOOST

In [None]:
lst_metric_stratified_bst  = []

for train_index, test_index in skf.split(X, y):
    x_train_fold, x_test_fold = x_scaled[train_index], x_scaled[test_index]
    y_train_fold, y_test_fold = y[train_index], y[test_index]
    bst.fit(x_train_fold, y_train_fold)
    lst_metric_stratified_bst.append(recall_score(y_test, bst.predict(X_test)))

lst_metric_stratified_bst


# CatBoost

In [None]:
lst_metric_stratified_ctb  = []

for train_index, test_index in skf.split(X, y):
    x_train_fold, x_test_fold = x_scaled[train_index], x_scaled[test_index]
    y_train_fold, y_test_fold = y[train_index], y[test_index]
    ctb.fit(x_train_fold, y_train_fold)
    lst_metric_stratified_ctb.append(recall_score(y_test, ctb.predict(X_test)))

lst_metric_stratified_ctb

# Lightboots

In [None]:
lst_metric_stratified_lgh = []

for train_index, test_index in skf.split(X, y):
    x_train_fold, x_test_fold = x_scaled[train_index], x_scaled[test_index]
    y_train_fold, y_test_fold = y[train_index], y[test_index]
    lgh.fit(x_train_fold, y_train_fold)
    lst_metric_stratified_lgh.append(recall_score(y_test, lgh.predict(X_test)))

lst_metric_stratified_lgh

 # Nearest Neighbors Classification (KNN)

In [None]:
lst_metric_stratified_knn  = []

for train_index, test_index in skf.split(X, y):
    x_train_fold, x_test_fold = x_scaled[train_index], x_scaled[test_index]
    y_train_fold, y_test_fold = y[train_index], y[test_index]
    knn.fit(x_train_fold, y_train_fold)
    lst_metric_stratified_knn.append(recall_score(y_test, knn.predict(X_test)))

lst_metric_stratified_knn

# Nearest Centroid Classifier (KNC)

In [None]:
lst_metric_stratified_knc  = []

for train_index, test_index in skf.split(X, y):
    x_train_fold, x_test_fold = x_scaled[train_index], x_scaled[test_index]
    y_train_fold, y_test_fold = y[train_index], y[test_index]
    knc.fit(x_train_fold, y_train_fold)
    lst_metric_stratified_knc.append(recall_score(y_test, knc.predict(X_test)))

lst_metric_stratified_knc

# Logistic regression

In [None]:
lst_metric_stratified_lgr  = []

for train_index, test_index in skf.split(X, y):
    x_train_fold, x_test_fold = x_scaled[train_index], x_scaled[test_index]
    y_train_fold, y_test_fold = y[train_index], y[test_index]
    lgr.fit(x_train_fold, y_train_fold)
    lst_metric_stratified_lgr.append(recall_score(y_test, lgr.predict(X_test)))

lst_metric_stratified_lgr

# Ridge Classifier (rdc)

In [None]:
lst_metric_stratified_rdc = []

for train_index, test_index in skf.split(X, y):
    x_train_fold, x_test_fold = x_scaled[train_index], x_scaled[test_index]
    y_train_fold, y_test_fold = y[train_index], y[test_index]
    rdc.fit(x_train_fold, y_train_fold)
    lst_metric_stratified_rdc.append(recall_score(y_test, rdc.predict(X_test)))

lst_metric_stratified_rdc

# Analisis de Resultados

**List with name model**

In [None]:
name_model= ["Logistic regression", "Ridge classifier","Decision Tree Classifier","Random Forest","XGBoost","CatBoost","LightBoost","KNN","KNC"]
name_model

**Dataframe with metric list**

In [None]:
df_score = pd.DataFrame(list(zip(
                                lst_metric_stratified_lgr,
                                lst_metric_stratified_rdc,
                                lst_metric_stratified_clf,
                                lst_metric_stratified_rfc,                            
                                lst_metric_stratified_bst,
                                lst_metric_stratified_ctb,
                                lst_metric_stratified_lgh,
                                lst_metric_stratified_knn,
                                lst_metric_stratified_knc,
                                )),
                        
                        columns= name_model
                       )
df_score

In [None]:
df_score.describe()

**Values maximo for model**

In [None]:
value_max = df_score.max()
print(value_max.sort_values(ascending=False))

**Boxplot with model**

In [None]:
# Conventir en array el df

array_df_score = df_score.to_numpy()
array_df_score

In [None]:
# Conventir en array the reference model 

array_df_score_lgr = df_score['Logistic regression'].to_numpy()
array_df_score_lgr

In [None]:
plt.figure(figsize=(20,12))
plt.title('Recall Results with Stratified-K-folds', fontsize=30)
plt.boxplot(df_score, labels=name_model)

plt.xticks(fontsize=25, rotation=90)
plt.yticks(fontsize=15)

plt.axhline(y=np.mean(array_df_score[0]), label='Mean all model', color="blue", ls='--', lw=3)
plt.axhline(y=np.mean(array_df_score_lgr[0]), label='Mean reference model', color="green", ls='--', lw=3)
plt.legend(loc='upper left', fontsize=12)
plt.grid(axis='y')
plt.show()

**Conslusion:** Al observar Recall Results with Stratified-K-folds, calculamos la media de nuestra modelo de referencia (Logistic Regression), y se puede ver que, salvo el Ridge Classifier model, todos los otros modelos tienen mejor score. Es por ello, que decidimos calcular la media de todos los resultados de los modelos seleccionados. Con estos dos análisis, seleccionamos un modelo por cada familia de modelo:
    
    1. Logistic Regression
    2. XGBoost
    3. Decision Tree Classifier
    4. KNC
    
En el siguiente paso, se analizará los tiempos de estos modelos, a traves del Learning Curve.

In [None]:
plot_learning_curve(bst, "Test", X, y, train_sizes=np.linspace(0.2, 1.0, 5), scoring='recall', cv=skf, ylim=(0.6, 1))
plt.show()

In [None]:
plot_learning_curve(clf, "Test", X, y, train_sizes=np.linspace(0.2, 1.0, 5), scoring='recall', cv=skf, ylim=(0.6, 1))
plt.show()

In [None]:
plot_learning_curve(lgr, "Test", X, y, train_sizes=np.linspace(0.2, 1.0, 5), scoring='recall', cv=skf, ylim=(0.4, 1))
plt.show()

**Conclusion:** Se puede observar que, el KNC model resulto ser el más económico en termino de tiempo de ejecución y con los mejores resultados de los modelos seleccionados, mientras que el XGBoost fue el más caro con el segundo mejor resultado. El reference model, resultó económico pero con los resultados más bajos.  

# OPTIMIZATION BAYESIANA

# Optimization Logistic Regression Model

In [None]:
# Objective function for the Logistic Regression
# ==============================================================================

def objective_lgr(trial):
    # hyperparameter setting 
    param= {
            'penalty' : trial.suggest_categorical('penalty', ['l1', 'l2', 'elasticnet', 'none']),
            'C': trial.suggest_float('C', 0.0, 1.0),
            'l1_ratio' : trial.suggest_float('l1_ratio', 0.0, 1.0),
        }
    
    # model training and evaluation
    lgr = LogisticRegression(**param, solver='saga')
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
    skf_result_lgr = []
    
    for train_index, test_index in skf.split(X, y):
        x_train_fold, x_test_fold = x_scaled[train_index], x_scaled[test_index]
        y_train_fold, y_test_fold = y[train_index], y[test_index]
        lgr.fit(x_train_fold, y_train_fold)
        skf_result_lgr.append(recall_score(y_test, lgr.predict(X_test)))
        
    # output: evaluation score
    return gmean(skf_result_lgr)

In [None]:
# Define the objective function
# ==============================================================================
study_lgr = optuna.create_study(direction='maximize')
study_lgr.optimize(objective_lgr, n_trials=50)

In [None]:
# Metrics of the optimization
# ==============================================================================
print('Best Value: {}'.format(study_lgr.best_value))
print('Best Parameters: {}'.format(study_lgr.best_params))

In [None]:
# Optimization History
# ==============================================================================
optuna.visualization.plot_optimization_history(study_lgr)

In [None]:
# Parameter Importance
# ==============================================================================
optuna.visualization.plot_param_importances(study_lgr)

In [None]:
# Relationship between the hyper-parameters
# ==============================================================================
optuna.visualization.plot_parallel_coordinate(study_lgr)

In [None]:
# Optimization values for each parameter
# ==============================================================================
optuna.visualization.plot_slice(study_lgr)

# Optimization KNC model

In [None]:
# Objective function for the KNC Model
# ==============================================================================

def objective_knc(trial):
    # hyperparameter setting 
    metric_trial = trial.suggest_categorical("metric", ['euclidean','manhattan','minkowski'])
    shrink_threshold_trial = trial.suggest_float('shrink_threshold', 0.0, 1.0)
    
    # model training and evaluation
    knc = NearestCentroid(metric=metric_trial, shrink_threshold=shrink_threshold_trial)
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
    skf_result_knc = []
    
    for train_index, test_index in skf.split(X, y):
        x_train_fold, x_test_fold = x_scaled[train_index], x_scaled[test_index]
        y_train_fold, y_test_fold = y[train_index], y[test_index]
        knc.fit(x_train_fold, y_train_fold)
        skf_result_knc.append(recall_score(y_test, knc.predict(X_test)))
        
    # output: evaluation score
    return gmean(skf_result_knc)

In [None]:
# Define the objective function
# ==============================================================================
study_knc = optuna.create_study(direction='maximize')
study_knc.optimize(objective_knc, n_trials=30)

In [None]:
# Metrics of the optimization
# ==============================================================================
print('Best Value: {}'.format(study_knc.best_value))
print('Best Parameters: {}'.format(study_knc.best_params))

In [None]:
# Optimization History
# ==============================================================================
optuna.visualization.plot_optimization_history(study_knc)

In [None]:
# Parameter Importance
# ==============================================================================
optuna.visualization.plot_param_importances(study_knc)

In [None]:
# Relationship between the hyper-parameters
# ==============================================================================
optuna.visualization.plot_parallel_coordinate(study_knc)

In [None]:
# Optimization values for each parameter
# ==============================================================================
optuna.visualization.plot_slice(study_knc)

# Optimization Decision Tree Classifier

In [None]:
# Objective function for the Decision Tree Classifier Model
# ==============================================================================

def objective_clf(trial):
    # hyperparameter setting 
    param= {
            'max_features' : trial.suggest_int('max_features', 1, 7, 1),
            'max_depth': trial.suggest_int('max_depth', 1, 128),
            'min_samples_split' : trial.suggest_int('min_samples_split', 2, 11, 1),
        }
    
    # model training and evaluation
    clf = DecisionTreeClassifier(**param)
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
    skf_result_clf = []
    
    for train_index, test_index in skf.split(X, y):
        x_train_fold, x_test_fold = x_scaled[train_index], x_scaled[test_index]
        y_train_fold, y_test_fold = y[train_index], y[test_index]
        clf.fit(x_train_fold, y_train_fold)
        skf_result_clf.append(recall_score(y_test, clf.predict(X_test)))
        
    # output: evaluation score
    return gmean(skf_result_clf)

In [None]:
# Define the objective function
# ==============================================================================
study_clf = optuna.create_study(direction='maximize')
study_clf.optimize(objective_clf, n_trials=50)

In [None]:
# Metrics of the optimization
# ==============================================================================
print('Best Value: {}'.format(study_clf.best_value))
print('Best Parameters: {}'.format(study_clf.best_params))

In [None]:
# Optimization History
# ==============================================================================
optuna.visualization.plot_optimization_history(study_clf)

In [None]:
# Parameter Importance
# ==============================================================================
optuna.visualization.plot_param_importances(study_clf)

In [None]:
# Relationship between the hyper-parameters
# ==============================================================================
optuna.visualization.plot_parallel_coordinate(study_clf)

In [None]:
# Optimization values for each parameter
# ==============================================================================
optuna.visualization.plot_slice(study_clf)

# Optimization XGBoost Model

In [None]:
# Objective function for the XGBoost Model
# ==============================================================================

def objective_bst(trial):
    # hyperparameter setting 
    param= {
            'n_estimators' : trial.suggest_int('n_estimators', 300, 1000),
            'max_depth': trial.suggest_categorical('max_depth', [7,9,11,13,15,17,19,21]),
            'max_features': trial.suggest_int('max_features', 1, 7, 1)
    }
    
    # model training and evaluation
    bst = XGBClassifier(**param)
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
    skf_result_bst = []
    
    for train_index, test_index in skf.split(X, y):
        x_train_fold, x_test_fold = x_scaled[train_index], x_scaled[test_index]
        y_train_fold, y_test_fold = y[train_index], y[test_index]
        bst.fit(x_train_fold, y_train_fold)
        skf_result_bst.append(recall_score(y_test, bst.predict(X_test)))
        
    # output: evaluation score
    return gmean(skf_result_bst)

In [None]:
# Define the objective function
# ==============================================================================
study_bst = optuna.create_study(direction='maximize')
study_bst.optimize(objective_bst, n_trials=30)

In [None]:
# Metrics of the optimization
# ==============================================================================
print('Best Value: {}'.format(study_bst.best_value))
print('Best Parameters: {}'.format(study_bst.best_params))

In [None]:
# Optimization History
# ==============================================================================
optuna.visualization.plot_optimization_history(study_bst)

In [None]:
# Parameter Importance
# ==============================================================================
optuna.visualization.plot_param_importances(study_bst)

In [None]:
# Relationship between the hyper-parameters
# ==============================================================================
optuna.visualization.plot_parallel_coordinate(study_clf)

In [None]:
# Optimization values for each parameter
# ==============================================================================
optuna.visualization.plot_slice(study_clf)

 # Summary of All Runs

In [None]:
# Simple scores vs Optimized scores
# ==============================================================================

print('Reference Linear Regression Score: {}'.format(ref_score_lgr))
print('KNC Score: {} | Optimized KNC Score: {}'.format(score_knc, format(study_knc.best_value)))
print('Decision Tree Classifier Score: {} | Optimized Decision Tree Classifier Score: {}'.format(score_clf, format(study_clf.best_value)))
print('XGBoost Score: {} | Optimized XGBoost Score: {}'.format(score_bst, format(study_bst.best_value)))

# Ensembel model

In [None]:
# Define de models to combine
# ==============================================================================

clf = DecisionTreeClassifier(**study_clf.best_params)
bst = XGBClassifier(**study_bst.best_params)
knc = NearestCentroid(**study_knc.best_params)

In [None]:
# Define the meta learner
# The Logistic Regressionmodel as been choosen for being a standar in classification problems
# ==============================================================================

lgr = LogisticRegression()

In [None]:
# Store the models to combine
# ==============================================================================

estimators = [("Decision Tree Classifier", clf),("XGBoost", bst), ("KNC", knc)]

In [None]:
# Define the ensemble model
# ==============================================================================

ensemble = StackingClassifier(estimators=estimators, final_estimator = lgr)

skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=0)
y_test                = []
predictions           = []
skf_result_ensembelmodel = []
counter = 0

for train_index, test_index in skf.split(X, y):
        x_train_fold, x_test_fold = x_scaled[train_index], x_scaled[test_index]
        y_train_fold, y_test_fold = y[train_index], y[test_index]
        ensemble.fit(x_train_fold, y_train_fold)
        predictions.append(ensemble.predict(x_test_fold))
        y_test.append(y_test_fold)
        skf_result_ensembelmodel.append(recall_score(predictions[counter], y_test[counter]))
        counter +=1

In [None]:
# Evalue the final model 
# ==============================================================================

recall_ensemble = gmean(skf_result_supermodel)
print ('Recall Score: %.3f' % recall_ensemble)

# Evaluate

In [None]:
# Confusion Matriz for general reference
# ==============================================================================

cf_matrix = confusion_matrix(y_test[0], predictions [0])

In [None]:
# Preparate the visualization of the matrix
# ==============================================================================

group_names = ['True Neg', 'False Pos', 'False neg', 'True Pos']

group_counts = ["{0:0.0f}".format(value) for value in 
               cf_matrix.flatten()]

In [None]:
# Improve the visualization of the confusion matrix
# ==============================================================================

group_percentages = ["{0:.2%}".format(value) for value in 
               cf_matrix.flatten()/np.sum(cf_matrix)]

labels = [f"{v1}\n{v2}\n{v3}" for v1, v2, v3 in 
         zip(group_names, group_counts,group_percentages)]

labels = np.asarray(labels).reshape(2,2)

In [None]:
# Final confusion matrix
# ==============================================================================

plt.figure(figsize = (8,5))

ax = sns.heatmap(cf_matrix, annot= labels, fmt = '', cmap='Blues')
ax.set_title('Seaborn Confusion Matrix with labels\n\n');
ax.set_xlabel('\nPredicted Values')
ax.set_ylabel('Actual Values ');

## Ticket labels - List must be in alphabetical order
ax.yaxis.set_ticklabels(['False', 'True'])
ax.yaxis.set_ticklabels(['False', 'True'])

## Display the visualization of the Confusion Matrix
plt.show()

# Interpretation

In [None]:
train_idx, test_idx = next(skf.split(X,y))

In [None]:
# Feature ranking using permutation Feature Importance
# ==============================================================================

feature_ranking = permutation_importance(ensemble, x_scaled[train_index], y[train_index],n_repeats = 100)

In [None]:
# Ordering the feature by their relevance
# ==============================================================================

perm_sorted_idx = feature_ranking.importances_mean.argsort()

In [None]:
# Plot the feature ranking using permutation Feature Importance
# ==============================================================================

plt.figure(figsize=(15,8))
plt.boxplot(feature_ranking.importances[perm_sorted_idx].T, vert=False,
           labels = df_fraude_creditcard.iloc[:, :-1].columns[perm_sorted_idx])
plt.show()

The TOP 3 is composed by:
    1. Ratio-to_median_purchase_price
    2. online_order
    3. repeat_retailer

En una segunda instancia del trabajo, se podría no considerar las variables "used_chip" y "used_pin_number" ya que las mismas no parecen tener mucha importancia