### All imports in one place

In [25]:
import math
import pickle
import warnings
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from keras.models import model_from_json
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

### Ignoring warnings

In [26]:
warnings.filterwarnings('ignore')

### Loading testing dataset and labels

In [27]:
x_test = pd.read_csv('corpora/x_test.csv')
y_test = pd.read_csv('corpora/y_test.csv')

### Replacing the type column values with integers

In [28]:
x_test['type'].replace({
    'CASH_OUT' : 1,
    'PAYMENT'  : 2,
    'CASH_IN'  : 3,
    'TRANSFER' : 4,
    'DEBIT'    : 5
}, inplace=True)

### Converting the nameOrig and nameDest into integers

In [29]:
for i in x_test.index:
    nameOrig = x_test.at[i, 'nameOrig']
    nameOrigP = '0' + nameOrig[1:]
    x_test.loc[i, 'nameOrig'] = nameOrigP 
    nameDest = x_test.at[i, 'nameDest']
    if nameDest[0] == 'C':
        nameDestP = '0' + nameDest[1:]
        x_test.loc[i, 'nameDest'] = nameDestP
    elif nameDest[0] == 'M':
        nameDestP = '1' + nameDest[1:]
        x_test.loc[i, 'nameDest'] = nameDestP

### Type casting all the columns to float64

In [30]:
x_test.astype('float64').dtypes

step              float64
type              float64
amount            float64
nameOrig          float64
oldbalanceOrg     float64
newbalanceOrig    float64
nameDest          float64
oldbalanceDest    float64
newbalanceDest    float64
isFlaggedFraud    float64
dtype: object

### Scaling all the columns using the StandardScaler

In [32]:
standardScaler = StandardScaler()
columns_to_scale = [
    'step',
    'amount',
    'nameOrig',
    'oldbalanceOrg',
    'newbalanceOrig',
    'nameDest',
    'oldbalanceDest', 
    'newbalanceDest'
]
x_test[columns_to_scale] = standardScaler.fit_transform(x_test[columns_to_scale])

### Function to generate evaluation metrics

In [33]:
def evaluation_metrics(results, y_test):
    matrix = [[0, 0], [0, 0]]
    actual_yes, actual_no, predicted_yes = 0, 0, 0
    for i in y_test.index:
        if y_test.at[i, 'isFraud'] == 1.0:
            actual_yes += 1
        elif y_test.at[i, 'isFraud'] == 0.0:
            actual_no += 1
        if results[i] == 1.0:
            predicted_yes += 1
        matrix[int(y_test.at[i, 'isFraud'])][int(results[i])] += 1
    TP = matrix[1][1]
    TN = matrix[0][0]
    FP = matrix[0][1]
    FN = matrix[1][0]
    total = len(results)
    accuracy = (TP + TN) / total
    misclassfication = (FP + FN) / total
    recall = TP / actual_yes
    specificity = TN / actual_no
    precision = TP / predicted_yes
    f_score = 2 * ((recall * precision) / (recall + precision))
    print('Confusion Matrix: ', matrix)
    print('Accuracy: ' + str(math.floor(accuracy * 100)) + '%')    
    print('Misclassfication Rate: ' + str(round(misclassfication * 100, 2)) + '%')
    print('True Positive Rate (Recall): ' + str(math.floor(recall * 100)) + '%')
    print('True Negative Rate (Specificity): ' + str(math.floor(specificity * 100)) + '%')
    print('Precision: ' + str(math.floor(precision * 100)) + '%')
    print('F1 Score: ' + str(math.floor(f_score * 100)) + '%')
    return

### KNN Testing

In [34]:
knn_classifier = pickle.load(open('models/knn_classifier.sav', 'rb'))
results = knn_classifier.predict(x_test)
evaluation_metrics(results, y_test)

Confusion Matrix:  [[6591, 4], [49, 333]]
Accuracy: 99%
Misclassfication Rate: 0.76%
True Positive Rate (Recall): 87%
True Negative Rate (Specificity): 99%
Precision: 98%
F1 Score: 92%


### SVC Testing

In [35]:
svc_classifier = pickle.load(open('models/svc_classifier.sav', 'rb'))
results = svc_classifier.predict(x_test)
evaluation_metrics(results, y_test)

Confusion Matrix:  [[6595, 0], [43, 339]]
Accuracy: 99%
Misclassfication Rate: 0.62%
True Positive Rate (Recall): 88%
True Negative Rate (Specificity): 100%
Precision: 100%
F1 Score: 94%


### DT Testing

In [36]:
dt_classifier = pickle.load(open('models/dt_classifier.sav', 'rb'))
results = dt_classifier.predict(x_test)
evaluation_metrics(results, y_test)

Confusion Matrix:  [[6594, 1], [30, 352]]
Accuracy: 99%
Misclassfication Rate: 0.44%
True Positive Rate (Recall): 92%
True Negative Rate (Specificity): 99%
Precision: 99%
F1 Score: 95%


### Function to load Neural Network

In [37]:
def get_nn_model():
    json_file = open('models/neural_network.json', 'r')
    loaded_model_json = json_file.read()
    model = model_from_json(loaded_model_json)
    model.load_weights('models/neural_network.h5') 
    model.compile(loss='binary_crossentropy', optimizer='adam')
    json_file.close()
    return model

### Testing Neural Network model

In [38]:
x_test = pd.DataFrame(x_test).to_numpy()
model = get_nn_model()
results = model.predict(x_test, batch_size = 200)
nn_results = []
for result in results:
    if result[0] < 0.5:
        nn_results.append(0)
    else:
        nn_results.append(1)
evaluation_metrics(nn_results, y_test)

Confusion Matrix:  [[6594, 1], [30, 352]]
Accuracy: 99%
Misclassfication Rate: 0.44%
True Positive Rate (Recall): 92%
True Negative Rate (Specificity): 99%
Precision: 99%
F1 Score: 95%
