# Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import itertools

In [None]:
from sklearn.metrics import roc_curve,auc, roc_auc_score, classification_report
from sklearn.metrics import confusion_matrix

__________________________________

# Data

In [None]:
data = pd.read_csv('data/data.csv')

In [None]:
data.head()

________________________________________________

# New Data
- create new columns based on models predicted scores

In [None]:
# creating new columns based on models predicted scores

data['sas_predicted'] = np.where(data['sas_pred'] >= .500000, 1, 0)
data['xgboost_predicted'] = np.where(data['xgboost_pred'] >= .500000, 1, 0)

In [None]:
data.head()

________________________________

# **Confusion Matrix**

In [None]:
#Evaluation of Model - Confusion Matrix Plot
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()

# SAS

In [None]:
# Compute confusion matrix
sas_cnf_matrix = confusion_matrix(data.isFraud, data.sas_predicted)

# Sas matrix not normalized
plt.figure()
plot_confusion_matrix(sas_cnf_matrix, classes=['Not Fraud','Fraud'],
                      title='SAS')

# SAS matrix normalized
plt.figure()
plot_confusion_matrix(sas_cnf_matrix, normalize = True, classes=['Not Fraud','Fraud'],
                      title='SAS - Normalized')

# XG Boost

In [None]:
# Compute confusion matrix
xgb_cnf_matrix = confusion_matrix(data.isFraud, data.xgboost_predicted)
# Plot confusion matrix

plt.figure()
plot_confusion_matrix(xgb_cnf_matrix, classes=['Not Fraud','Fraud'],
                      title='Xg Boost')

plt.figure()
plot_confusion_matrix(xgb_cnf_matrix, normalize = True, classes=['Not Fraud','Fraud'],
                      title='Xg Boost - Normalized')

_____________________________________________

# **ROC Curve**

In [None]:
# from sklearn.metrics import roc_curve,auc, roc_auc_score, classification_report
# from sklearn.metrics import confusion_matrix

# **SAS**

In [None]:
FPR, TPR, thresholds = roc_curve(data.isFraud, data.sas_pred)

def plot_roc_curve(FPR, TPR):
    plt.plot(FPR, TPR, color='orange', label='ROC')
    plt.plot([0, 1], [0, 1], color='darkblue', linestyle='--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC) Curve\nSAS')
    plt.legend()
    plt.show()

plot_roc_curve(FPR, TPR)


# **XG Boost**

In [None]:
fpr, tpr, thresholds = roc_curve(data.isFraud, data.xgboost_pred)

def plot_roc_curve(fpr, tpr):
    plt.plot(fpr, tpr, color='blue', label='ROC')
    plt.plot([0, 1], [0, 1], color='darkblue', linestyle='--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC) Curve \n XG Boost')
    plt.legend()
    plt.show()

plot_roc_curve(fpr, tpr)

# **ROC Layered**

In [None]:
plt.figure(figsize=(10,6))

FPR, TPR, thresholds = roc_curve(data.isFraud, data.sas_pred)
fpr, tpr, thresholds = roc_curve(data.isFraud, data.xgboost_pred)

def plot_roc_curve(FPR, TPR, fpr, tpr):
    plt.plot(fpr, tpr, color='blue', label='XG Boost')
    plt.plot(FPR, TPR, color = 'orange', label = "SAS")
    plt.plot([0, 1], [0, 1], color='darkblue', linestyle='--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC) Curve\n SAS & XBG')
    plt.legend(loc="lower right")
    plt.show()

plot_roc_curve(FPR, TPR, fpr, tpr)


_________________________________________

# **Results**

# SAS

In [None]:
print("SAS Report\n")
print('roc_auc_score: %0.3f'% roc_auc_score(data.isFraud, data.sas_predicted))
print("----------------------------------------------------------------------")
print("SAS", classification_report(data.isFraud, data.sas_predicted))
print("----------------------------------------------------------------------")
print("Confusion matrix \n",confusion_matrix(data.isFraud, data.sas_predicted))

# XG Boost

In [None]:
print("XG Boost Report\n")
print('roc_auc_score: %0.3f'% roc_auc_score(data.isFraud, data.xgboost_predicted))
print("----------------------------------------------------------------------")
print("XG Boost", classification_report(data.isFraud, data.xgboost_predicted))
print("----------------------------------------------------------------------")
print("Confusion matrix \n",confusion_matrix(data.isFraud, data.xgboost_predicted))

______________________________

# Density Plots

In [None]:
data.head()

In [None]:
data = data.drop([
    'sas_predicted', 
    'xgboost_predicted', 
    'TransactionID', 
    'ID', 
    'Unnamed: 0'], axis = 1)

# SAS
- Filter TP 
- Filter FN

In [None]:
# SAS True Positive 
sas_true_pos = data[(data['sas_pred'] >= 0.5000000) & (data['isFraud'] == 1 )]
print(sas_true_pos.isFraud.value_counts())

In [None]:
# SAS False Negative
sas_false_neg = data[(data['sas_pred'] <= 0.49999999999) & (data['isFraud'] == 1 )]
print(sas_false_neg.isFraud.value_counts())

# XG Boost
- Filter TP
- Filter FN

In [None]:
# XGBoost True Positive 
xgb_true_pos = data[(data['xgboost_pred'] >= 0.5000000) & (data['isFraud'] == 1 )]
print(xgb_true_pos.isFraud.value_counts())

In [None]:
# XGBoost False Negative
xgb_false_neg = data[(data['xgboost_pred'] <= 0.49999999999) & (data['isFraud'] == 1 )]
print(xgb_false_neg.isFraud.value_counts())

# Plots
- XGB TP vs SAS FN
- SAS TP vs XBG FN

In [None]:
import seaborn as sns

In [None]:
plt.figure(figsize = (10, 6))
XGB_TP= sns.kdeplot(xgb_true_pos['xgboost_pred'], shade = True, color = "b", label = 'XGB TP');
SAS_FN= sns.kdeplot(sas_false_neg['sas_pred'], shade = True, color = "r", label = 'SAS FN');
plt.title('XG-Boost True Positive &\n SAS False Negative')

In [None]:
plt.figure(figsize = (10, 6))
XGB_FN= sns.kdeplot(xgb_false_neg['xgboost_pred'], shade = True, color = "b", label = 'XGB FN');
SAS_TP= sns.kdeplot(sas_true_pos['sas_pred'], shade = True, color = "r", label = 'SAS TP');
plt.title('XG-Boost False Negative &\n SAS True Positive')

In [None]:
# False Negatives Comparison

XGB_FN= sns.kdeplot(xgb_false_neg['xgboost_pred'], shade = True, color = "b", label = 'XGB FN');
SAS_FN= sns.kdeplot(sas_false_neg['sas_pred'], shade = True, color = "r", label = 'SAS FN');
plt.title('XG-Boost & SAS \nFalse Negatives')

In [None]:
# True Positives Comparison

SAS_TP= sns.kdeplot(sas_true_pos['sas_pred'], shade = True, color = "r", label = 'SAS TP');
XGB_TP= sns.kdeplot(xgb_true_pos['xgboost_pred'], shade = True, color = "b", label = 'XGB TP');
plt.title('XG-Boost & SAS\n True Positive')