In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

In [None]:
data = pd.read_csv('fraud_pred.csv')

In [None]:
data.head()

In [None]:
data.describe()

In [None]:
def plot_dist(df, features=[]):
    fig, ax = plt.subplots(2, 3, figsize=(20, 10))
    ax = ax.flatten()
    index = 0
    for k in pred_clms:
        sns.distplot(df[k], ax=ax[index])
        index += 1

In [None]:
pred_clms = ['step', 'amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest'] # 'CASH_IN', 'CASH_OUT', 'DEBIT', 'PAYMENT', 'TRANSFER'
plot_dist(data, pred_clms)

In [None]:
from sklearn.model_selection import train_test_split
def split(data, target, test_size=0.2, shuffle=True, random_state=42):
    x = data.drop(columns=[target])
    y = data[target]
    x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=random_state, shuffle=shuffle, test_size=test_size)
    return x_train, x_test, y_train, y_test

In [None]:
def execute_model(model, x_train, x_test, y_train, y_test):
    model.fit(x_train, y_train)
    print('Test score : ', model.score(x_test, y_test))

In [None]:
from sklearn.preprocessing import StandardScaler

def scale(x_train, x_test):
    scaler = StandardScaler()
    x_train= scaler.fit_transform(x_train)
    x_test= scaler.fit_transform(x_test)
    return x_train, x_test

In [None]:
from scipy.special import boxcox1p


def test_model(model):

    x_train, x_test, y_train, y_test = split(data,'isFraud', test_size=0.3)

    print("Raw Data")
    print("Before scaling: ")
    execute_model(model, x_train, x_test, y_train, y_test)

    x_train, x_test = scale(x_train, x_test)
    print("After scaling: ")
    execute_model(model, x_train, x_test, y_train, y_test)
    print()

    
    # Log Transform
    data_log = data.copy()
    for k in pred_clms:
        data_log[k] = np.log(np.ma.array(data_log[k]))

    x_train, x_test, y_train, y_test = split(data_log,'isFraud', test_size=0.3)

    print("Log Normal")
    print("Before scaling: ")
    execute_model(model, x_train, x_test, y_train, y_test)

    x_train, x_test = scale(x_train, x_test)
    print("After scaling: ")
    execute_model(model, x_train, x_test, y_train, y_test)
    print()
    
    
    # Cube root Transform
    data_cube = data.copy()
    for k in pred_clms:
        data_cube[k] = data_cube[k] ** (1/3)

    x_train, x_test, y_train, y_test = split(data_cube,'isFraud', test_size=0.3)

    print("Cube root")
    print("Before scaling: ")
    execute_model(model, x_train, x_test, y_train, y_test)

    x_train, x_test = scale(x_train, x_test)
    print("After scaling: ")
    execute_model(model, x_train, x_test, y_train, y_test)
    print()
    
    
    # Box Transform
    data_box = data.copy()
    for k in pred_clms:
        data_box[k] = boxcox1p(data_box[k], 0.001)

    x_train, x_test, y_train, y_test = split(data_box,'isFraud', test_size=0.3)

    print("BoxCox")
    print("Before scaling: ")
    execute_model(model, x_train, x_test, y_train, y_test)

    x_train, x_test = scale(x_train, x_test)
    print("After scaling: ")
    execute_model(model, x_train, x_test, y_train, y_test)

In [None]:
from sklearn.model_selection import GridSearchCV

def grid_search(model, params, x_train, y_train):
    grid = GridSearchCV(model, params)
    grid.fit(x_train, y_train)
    print(grid.best_params_)
    print(grid.best_score_)

# KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier
test_model(KNeighborsClassifier())

# Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
test_model(LogisticRegression())

# Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB
test_model(GaussianNB())

In [None]:
from sklearn.naive_bayes import BernoulliNB
test_model(BernoulliNB())

# SVM

In [None]:
from sklearn.svm import SVC
test_model(SVC())

In [None]:
from sklearn.svm import LinearSVC
test_model(LinearSVC())

In [None]:
from sklearn.svm import NuSVC
test_model(NuSVC())