# Credit Card Fraud Detection Models Training

**Models**
* Logistic Regression
* Decision Tree
* Random Forest

In [1]:
# Import necessary libraries
import pandas as pd 
import numpy as np
import os, sys
import warnings
warnings.filterwarnings('ignore')

# Get working directory
sys.path.append(os.path.abspath('..'))

# import model training scripts
from scripts.ml_model_training import *

In [2]:
# load credit card data
filepath = '../data/creditcard.csv'
df = load_data(filepath)

# get feature and target variable
X, y = df.drop(columns=['Class']), df['Class']

# get train and test set
X_train, X_test, y_train, y_test = get_train_test_split(X, y)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((227845, 30), (56962, 30), (227845,), (56962,))

## Training on Logistic Regression Model

In [3]:
# Call the model
lr_model = LogisticRegressionModel()

# Fit into train dataset
lr_model.fit(X_train, y_train)

# Make a prediction
y_pred_lr = lr_model.predict(X_test)

# Evaluate the model
lr_metrics = lr_model.evaluate(y_test, y_pred_lr)


Fitting 5 folds for each of 24 candidates, totalling 120 fits
Logistic Regression Evaluation
Accuracy: 0.9991
Precision: 0.8636
Recall: 0.5816
F1: 0.6951
Roc_auc: 0.7907
Confusion Matrix:
[[56855     9]
 [   41    57]]


## Training on Decesion Tree Model

In [None]:
# Call the model
dt_model  = DecisionTreeModel()

# Fit into train dataset
dt_model.fit(X_train, y_train)

# Make a Prediction
y_pred_dt = dt_model.predict(X_test)

# Evaluate the model
dt_metrics = dt_model.evaluate(y_test, y_pred_dt)

Fitting 5 folds for each of 72 candidates, totalling 360 fits


KeyboardInterrupt: 

: 

## Training on Random Forest Model

In [None]:
# Call the model
rf_model = RandomForestModel()

# Fit into train Dataset
rf_model.fit(X_train, y_train)

# Make a prediction
y_pred_rf = rf_model.predict(X_test)

# Evaluate the model
rf_metrics = rf_model.evaluate(y_test, y_pred_rf)

Fitting 5 folds for each of 288 candidates, totalling 1440 fits


## Model Comparison

In [None]:
model_metrics = {
    'Logistic Regression': lr_metrics,
    'Decision Tree': dt_metrics,
    'Random Forest': rf_metrics
    }
model_comparison = compare_model(model_metrics)