# Importing the Necessary Packages

In [10]:
import mlflow
import shap
import lime
from lime.lime_tabular import LimeTabularExplainer
import matplotlib.pyplot as plt
import os, sys
import warnings
warnings.filterwarnings('ignore')

# Setting the Path for the Script

In [11]:
sys.path.append(os.path.abspath('../scripts'))

In [12]:
from preprocessing import load_data, feature_target_split, train_test_split_data
from model_training import random_forest_model, gradient_boosting_model, mlp_model, lstm_model
from mlops_utils import log_experiment_results

# Loading the Data

In [13]:
filepath1 = r'C:\Users\Blen\OneDrive\Desktop\10Academy\FraudDetection\data\scaled_creditcard_data.csv'
filepath2 = r'C:\Users\Blen\OneDrive\Desktop\10Academy\FraudDetection\data\scaled_fraud_data.csv'

In [14]:
creditcard_data, fraud_data = load_data(filepath1, filepath2)

In [15]:
creditcard_data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,-1.996583,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,0.244964,0
1,-1.996583,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,-0.342475,0
2,-1.996562,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,1.160686,0
3,-1.996562,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,0.140534,0
4,-1.996541,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,-0.073403,0


# 1. Working on Fraud Data Prediction using the Credit Card Dataset

## 1.1 Split the Credit Card Dataset into Features and Target

In [16]:
X_credit, y_credit = feature_target_split(creditcard_data, 'Class')

## 1.2 Split the Credit Card Data into Training and Test Sets

In [17]:
X_train_credit, X_test_credit, y_train_credit, y_test_credit = train_test_split_data(X_credit, y_credit)

## 1.3 Start the MLFlow Experiment

In [18]:
mlflow.set_experiment("Fraud Detection Model Comparison")

<Experiment: artifact_location='file:///c:/Users/Blen/OneDrive/Desktop/10Academy/FraudDetection/notebooks/mlruns/883479795960385366', creation_time=1729618962326, experiment_id='883479795960385366', last_update_time=1729618962326, lifecycle_stage='active', name='Fraud Detection Model Comparison', tags={}>

# 2. Train Models Based on the Credit Card Dataset

## 2.1 Random Forest Model on Credit Card Data

In [19]:
print("Training Random Forest on Credit Card Data...")
rf_model_credit = random_forest_model(X_train_credit, y_train_credit, X_test_credit, y_test_credit)
log_experiment_results('Random Forest Credit Card', y_test_credit, rf_model_credit.predict(X_test_credit))

Training Random Forest on Credit Card Data...


KeyboardInterrupt: 

## 2.2 Gradient Boosting Model on Credit Card Data

In [None]:
print("Training Gradient Boosting on Credit Card Data...")
gb_model_credit = gradient_boosting_model(X_train_credit, y_train_credit, X_test_credit, y_test_credit)
log_experiment_results('Gradient Boosting Credit Card', y_test_credit, gb_model_credit.predict(X_test_credit))

## 2.3  Multi-Layer Perceptron (MLP) Model on Credit Card Data

In [None]:
print("Training MLP on Credit Card Data...")
mlp_model_credit = mlp_model(X_train_credit, y_train_credit, X_test_credit, y_test_credit)
log_experiment_results('MLP Credit Card', y_test_credit, mlp_model_credit.predict(X_test_credit))

## 2.4 LSTM Model on Credit Card Data

In [None]:
import numpy as np

# Check if the data contains any non-numeric values
print("Data type of X_train_credit:", X_train_credit.dtype)
print("Data type of X_test_credit:", X_test_credit.dtype)

# Ensure the data is converted to float32 for TensorFlow
X_train_credit = X_train_credit.astype(np.float32)
X_test_credt = X_test_credit.astype(np.float32)

print("Training LSTM on Credit Data...")
lstm_model_credit = lstm_model(X_train_credit, y_train_credit, X_test_credit, y_test_credit)

# Log experiment results
log_experiment_results('LSTM Credit', y_test_credit, 
                       (lstm_model_credit.predict(X_test_credit.reshape((X_test_credit.shape[0], 1, X_test_credit.shape[1]))) > 0.5).astype("int32"))