In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold, cross_val_score
from sklearn import metrics
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.utils import resample

In [2]:
data_ss = pd.read_csv('Data/data_ss.csv',index_col=0)
data_rs = pd.read_csv('Data/data_rs.csv',index_col=0)
data_mms = pd.read_csv('Data/data_mms.csv',index_col=0)
data = pd.read_csv('Data/Part3_data.csv',index_col=0)
data_dic = {"Standard Scaler" : data_ss,"Robust Scaler": data_rs,"MinMax Scaler" : data_mms}

In [3]:
def split_data(data,test_size=0.3,random_state = 101):
    data, validation_df  = train_test_split(data,test_size=test_size,random_state=random_state)
    X_train, X_test, y_train, y_test = train_test_split(data.drop('FraudFound_P',axis = 1),data['FraudFound_P'],test_size=test_size,random_state=random_state)
    return validation_df,X_train, X_test, y_train, y_test

In [4]:
#predictions are based on probablity with threshold=0.5
def eval_result(model, X_test, y_test):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        pred = model.predict(X_test)
        print(classification_report(y_test, pred, target_names = ['Not Fraud', 'Fraud']))
        display(pd.DataFrame(confusion_matrix(y_test, pred),columns = ['Predicted Not Fraud', 'Predicted Fraud'],index = ['Not Fraud', 'Fraud']))
        
#predictions are based on probablity with a user defined threshold. if threshold= 0.7, the probility of class 0 should be 
#more than 0.7 to assign 0 to the prediction. so the desicions are biased to have more class 1 in the predictions.
def eval_result_with_threshold(model, X_test, y_test,threshold=0.5):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        probebilities = model.predict_proba(X_test)
        pred = []
        for probs in probebilities :
            if probs[0] <= threshold :
                pred.append(1)
            else :
                pred.append(0)
        print(classification_report(y_test, pred, target_names = ['Not Fraud', 'Fraud']))
        display(pd.DataFrame(confusion_matrix(y_test, pred),columns = ['Predicted Not Fraud', 'Predicted Fraud'],index = ['Not Fraud', 'Fraud']))        

### Logistic Regression

In [5]:
for name,data in data_dic.items() :
    validation_df,X_train, X_test, y_train, y_test = split_data(data)
    lgmodel = LogisticRegression(solver='lbfgs',n_jobs = -1,class_weight="balanced")
    lgmodel.fit(X_train, y_train)
    print("\n***************************************************************************************")
    print("                                       {}".format(name))
    print("***************************************************************************************\n\n")
    eval_result(lgmodel, X_test, y_test)


***************************************************************************************
                                       Standard Scaler
***************************************************************************************


              precision    recall  f1-score   support

   Not Fraud       0.98      0.66      0.79      2264
       Fraud       0.15      0.84      0.26       162

    accuracy                           0.67      2426
   macro avg       0.57      0.75      0.52      2426
weighted avg       0.93      0.67      0.76      2426



Unnamed: 0,Predicted Not Fraud,Predicted Fraud
Not Fraud,1499,765
Fraud,26,136



***************************************************************************************
                                       Robust Scaler
***************************************************************************************


              precision    recall  f1-score   support

   Not Fraud       0.98      0.66      0.79      2264
       Fraud       0.15      0.83      0.25       162

    accuracy                           0.67      2426
   macro avg       0.57      0.74      0.52      2426
weighted avg       0.93      0.67      0.75      2426



Unnamed: 0,Predicted Not Fraud,Predicted Fraud
Not Fraud,1486,778
Fraud,27,135



***************************************************************************************
                                       MinMax Scaler
***************************************************************************************


              precision    recall  f1-score   support

   Not Fraud       0.98      0.66      0.79      2264
       Fraud       0.15      0.83      0.25       162

    accuracy                           0.67      2426
   macro avg       0.57      0.75      0.52      2426
weighted avg       0.93      0.67      0.75      2426



Unnamed: 0,Predicted Not Fraud,Predicted Fraud
Not Fraud,1497,767
Fraud,27,135


### Decision Tree

In [6]:
for name,data in data_dic.items() :
    validation_df,X_train, X_test, y_train, y_test = split_data(data)
    dtc = DecisionTreeClassifier()
    dtc.fit(X_train, y_train)
    print("\n***************************************************************************************")
    print("                                       {}".format(name))
    print("***************************************************************************************\n\n")
    eval_result(dtc, X_test, y_test)


***************************************************************************************
                                       Standard Scaler
***************************************************************************************


              precision    recall  f1-score   support

   Not Fraud       0.94      0.94      0.94      2264
       Fraud       0.20      0.20      0.20       162

    accuracy                           0.89      2426
   macro avg       0.57      0.57      0.57      2426
weighted avg       0.89      0.89      0.89      2426



Unnamed: 0,Predicted Not Fraud,Predicted Fraud
Not Fraud,2133,131
Fraud,129,33



***************************************************************************************
                                       Robust Scaler
***************************************************************************************


              precision    recall  f1-score   support

   Not Fraud       0.94      0.94      0.94      2264
       Fraud       0.20      0.20      0.20       162

    accuracy                           0.89      2426
   macro avg       0.57      0.57      0.57      2426
weighted avg       0.89      0.89      0.89      2426



Unnamed: 0,Predicted Not Fraud,Predicted Fraud
Not Fraud,2135,129
Fraud,130,32



***************************************************************************************
                                       MinMax Scaler
***************************************************************************************


              precision    recall  f1-score   support

   Not Fraud       0.94      0.95      0.94      2264
       Fraud       0.21      0.20      0.21       162

    accuracy                           0.90      2426
   macro avg       0.58      0.57      0.58      2426
weighted avg       0.89      0.90      0.90      2426



Unnamed: 0,Predicted Not Fraud,Predicted Fraud
Not Fraud,2141,123
Fraud,129,33


### XGB Classifier

In [7]:
for name,data in data_dic.items() :
    validation_df,X_train, X_test, y_train, y_test = split_data(data)
    xgbr = XGBClassifier(n_jobs = -1,scale_pos_weight = 10,eval_metric = 'logloss')
    xgbr.fit(X_train, y_train)
    print("\n***************************************************************************************")
    print("                                       {}".format(name))
    print("***************************************************************************************\n\n")
    eval_result(xgbr, X_test, y_test)


***************************************************************************************
                                       Standard Scaler
***************************************************************************************


              precision    recall  f1-score   support

   Not Fraud       0.95      0.97      0.96      2264
       Fraud       0.33      0.23      0.27       162

    accuracy                           0.92      2426
   macro avg       0.64      0.60      0.61      2426
weighted avg       0.91      0.92      0.91      2426



Unnamed: 0,Predicted Not Fraud,Predicted Fraud
Not Fraud,2190,74
Fraud,125,37



***************************************************************************************
                                       Robust Scaler
***************************************************************************************


              precision    recall  f1-score   support

   Not Fraud       0.94      0.97      0.96      2264
       Fraud       0.30      0.20      0.24       162

    accuracy                           0.92      2426
   macro avg       0.62      0.58      0.60      2426
weighted avg       0.90      0.92      0.91      2426



Unnamed: 0,Predicted Not Fraud,Predicted Fraud
Not Fraud,2189,75
Fraud,130,32



***************************************************************************************
                                       MinMax Scaler
***************************************************************************************


              precision    recall  f1-score   support

   Not Fraud       0.94      0.97      0.95      2264
       Fraud       0.29      0.20      0.24       162

    accuracy                           0.91      2426
   macro avg       0.62      0.58      0.60      2426
weighted avg       0.90      0.91      0.91      2426



Unnamed: 0,Predicted Not Fraud,Predicted Fraud
Not Fraud,2185,79
Fraud,129,33


### MLP Classifier

I realized by changing the decision threshold, we can get a better F-score for MLP Classifier

In [8]:
for name,data in data_dic.items() :
    validation_df,X_train, X_test, y_train, y_test = split_data(data)
    mlpc = MLPClassifier(hidden_layer_sizes=20,learning_rate="adaptive",max_iter=1000)
    mlpc.fit(X_train, y_train)
    print("\n***************************************************************************************")
    print("                                       {}".format(name))
    print("***************************************************************************************\n\n")
    eval_result_with_threshold(mlpc, X_test, y_test,threshold=0.9)


***************************************************************************************
                                       Standard Scaler
***************************************************************************************


              precision    recall  f1-score   support

   Not Fraud       0.96      0.87      0.91      2264
       Fraud       0.19      0.43      0.27       162

    accuracy                           0.84      2426
   macro avg       0.57      0.65      0.59      2426
weighted avg       0.90      0.84      0.87      2426



Unnamed: 0,Predicted Not Fraud,Predicted Fraud
Not Fraud,1975,289
Fraud,93,69



***************************************************************************************
                                       Robust Scaler
***************************************************************************************


              precision    recall  f1-score   support

   Not Fraud       0.96      0.81      0.88      2264
       Fraud       0.18      0.57      0.27       162

    accuracy                           0.79      2426
   macro avg       0.57      0.69      0.57      2426
weighted avg       0.91      0.79      0.84      2426



Unnamed: 0,Predicted Not Fraud,Predicted Fraud
Not Fraud,1832,432
Fraud,70,92



***************************************************************************************
                                       MinMax Scaler
***************************************************************************************


              precision    recall  f1-score   support

   Not Fraud       0.96      0.81      0.88      2264
       Fraud       0.17      0.55      0.26       162

    accuracy                           0.79      2426
   macro avg       0.57      0.68      0.57      2426
weighted avg       0.91      0.79      0.84      2426



Unnamed: 0,Predicted Not Fraud,Predicted Fraud
Not Fraud,1835,429
Fraud,73,89


### Random Forest Classifier

I realized by changing the decision threshold, we can get a better F-score for Random Forest Classifier

In [13]:
for name,data in data_dic.items() :
    validation_df,X_train, X_test, y_train, y_test = split_data(data)
    rfc = RandomForestClassifier(class_weight="balanced_subsample")
    rfc.fit(X_train, y_train)
    print("\n***************************************************************************************")
    print("                                       {}".format(name))
    print("***************************************************************************************\n\n")
    eval_result_with_threshold(rfc, X_test, y_test,threshold=0.8)


***************************************************************************************
                                       Standard Scaler
***************************************************************************************


              precision    recall  f1-score   support

   Not Fraud       0.95      0.96      0.95      2264
       Fraud       0.31      0.25      0.28       162

    accuracy                           0.91      2426
   macro avg       0.63      0.60      0.61      2426
weighted avg       0.90      0.91      0.91      2426



Unnamed: 0,Predicted Not Fraud,Predicted Fraud
Not Fraud,2176,88
Fraud,122,40



***************************************************************************************
                                       Robust Scaler
***************************************************************************************


              precision    recall  f1-score   support

   Not Fraud       0.95      0.96      0.95      2264
       Fraud       0.30      0.24      0.27       162

    accuracy                           0.91      2426
   macro avg       0.62      0.60      0.61      2426
weighted avg       0.90      0.91      0.91      2426



Unnamed: 0,Predicted Not Fraud,Predicted Fraud
Not Fraud,2174,90
Fraud,123,39



***************************************************************************************
                                       MinMax Scaler
***************************************************************************************


              precision    recall  f1-score   support

   Not Fraud       0.94      0.96      0.95      2264
       Fraud       0.28      0.20      0.24       162

    accuracy                           0.91      2426
   macro avg       0.61      0.58      0.59      2426
weighted avg       0.90      0.91      0.91      2426



Unnamed: 0,Predicted Not Fraud,Predicted Fraud
Not Fraud,2179,85
Fraud,129,33


### Results

- Overall **standard scaler** showed a better average performance than the other normalization methods.
<br>
- The best F-score for **Not Fraud** category is **0.96** and the best F-score for **Fraud** category is **0.28**. It could be because of the bias in the data where we had much less fraudulent data than non fraudulent data.
<br>
- The best F-score macro avg is **0.61** for **XGBosst** and **RandomForestClassifier**. with **0.92** and **0.91** accuracy respectively.
<br>
- Logistic Regression Classifier has significantly better Recal macro Avg than the other algorithms. That means this Alg tries to predict as much as Positive cases as possible even at the cost of sacrificing the precision (have more Faluse Psotive cases)
<br>
- Based on the priority of the company if they want to have better precision or recall we can tune the hyperparameters of algorithms to make them the best posiible for either precision or recall. In this notebook I tried to focus on F-score rather than precision or recall individuslly.
