In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, LSTM, GRU, Dropout
import mlflow
import mlflow.tensorflow

In [2]:
# Load preprocessed data
pre_credit_data = pd.read_csv('../data/preprocessed_creditcard_data.csv')
pre_fraud_data_df = pd.read_csv('../data/preprocessed_fraud_data.csv')

In [3]:
#Define functions for model building and evaluation

def build_cnn_model(input_shape):
    model = Sequential()
    model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(1, activation='sigmoid'))
    return model

In [4]:
def build_rnn_model(input_shape):
    model = Sequential()
    model.add(GRU(units=64, return_sequences=True, input_shape=input_shape))
    model.add(GRU(units=32))
    model.add(Dense(1, activation='sigmoid'))
    return model

In [5]:
def build_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=64, return_sequences=True, input_shape=input_shape))
    model.add(LSTM(units=32))
    model.add(Dense(1, activation='sigmoid'))
    return model

In [6]:
def train_and_evaluate_model(model, X_train, y_train, X_test, y_test, model_name, experiment_name):
    # MLflow tracking
    mlflow.set_tracking_uri('http://localhost:5000')  # Set your MLflow tracking server
    mlflow.set_experiment(experiment_name)
    with mlflow.start_run():
        mlflow.log_param('model', model_name)

        # Compile the model
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

        # Train the model
        history = model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test))

        # Evaluate the model
        y_pred = model.predict(X_test)
        y_pred_binary = (y_pred > 0.5).astype(int)

        accuracy = accuracy_score(y_test, y_pred_binary)
        precision = precision_score(y_test, y_pred_binary)
        recall = recall_score(y_test, y_pred_binary)
        f1 = f1_score(y_test, y_pred_binary)
        roc_auc = roc_auc_score(y_test, y_pred)

        # Log metrics to MLflow
        mlflow.log_metric('accuracy', accuracy)
        mlflow.log_metric('precision', precision)
        mlflow.log_metric('recall', recall)
        mlflow.log_metric('f1_score', f1)
        mlflow.log_metric('roc_auc', roc_auc)

        # Log model to MLflow
        mlflow.tensorflow.log_model(model, artifact_path='model')

        print(f'Model: {model_name}')
        print(f'Accuracy: {accuracy:.4f}')
        print(f'Precision: {precision:.4f}')
        print(f'Recall: {recall:.4f}')
        print(f'F1 Score: {f1:.4f}')
        print(f'ROC AUC: {roc_auc:.4f}')

In [7]:
# Data Preparation

# Credit Card Data
X_credit = pre_credit_data.drop('Class', axis=1)
y_credit = pre_credit_data['Class']
X_credit_train, X_credit_test, y_credit_train, y_credit_test = train_test_split(X_credit, y_credit, test_size=0.2, random_state=42)

In [10]:
print("Credit Card Data - LSTM")

Credit Card Data - LSTM


In [9]:
# Assuming X_credit_train and X_credit_test are 2D, reshape to 3D
X_credit_train = np.reshape(X_credit_train, (X_credit_train.shape[0], 1, X_credit_train.shape[1]))
X_credit_test = np.reshape(X_credit_test, (X_credit_test.shape[0], 1, X_credit_test.shape[1]))

# Build and train the LSTM model
lstm_model_credit = build_lstm_model(X_credit_train.shape[1:])
train_and_evaluate_model(lstm_model_credit, X_credit_train, y_credit_train, X_credit_test, y_credit_test, 'LSTM', 'Credit Card Fraud Detection')

2024/10/22 02:53:20 INFO mlflow.tracking.fluent: Experiment with name 'Credit Card Fraud Detection' does not exist. Creating a new experiment.


Epoch 1/5
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 6ms/step - accuracy: 0.9969 - loss: 0.0318 - val_accuracy: 0.9984 - val_loss: 0.0119
Epoch 2/5
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 6ms/step - accuracy: 0.9982 - loss: 0.0133 - val_accuracy: 0.9984 - val_loss: 0.0118
Epoch 3/5
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 5ms/step - accuracy: 0.9983 - loss: 0.0123 - val_accuracy: 0.9984 - val_loss: 0.0118
Epoch 4/5
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 6ms/step - accuracy: 0.9983 - loss: 0.0125 - val_accuracy: 0.9984 - val_loss: 0.0119
Epoch 5/5
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 6ms/step - accuracy: 0.9983 - loss: 0.0127 - val_accuracy: 0.9984 - val_loss: 0.0119
[1m1774/1774[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
2024/10/22 02:57:17 INFO mlflow.tracking._tracking_service.client: 🏃 View run sneaky-carp-108 at: http://localhost:5000/#/experiments/393830309254537149/runs/a5d73960e3cc445ebf7a1ecdf65f3416.
2024/10/22 02:57:17 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/393830309254537149.


Model: LSTM
Accuracy: 0.9984
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000
ROC AUC: 0.4995


In [11]:
print("Credit Card Data - RNN")
rnn_model_credit = build_rnn_model(X_credit_train.shape[1:])
train_and_evaluate_model(rnn_model_credit, X_credit_train, y_credit_train, X_credit_test, y_credit_test, 'RNN', 'Credit Card Fraud Detection')

Credit Card Data - RNN


  super().__init__(**kwargs)


Epoch 1/5
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 7ms/step - accuracy: 0.9971 - loss: 0.0195 - val_accuracy: 0.9984 - val_loss: 0.0118
Epoch 2/5
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 7ms/step - accuracy: 0.9982 - loss: 0.0135 - val_accuracy: 0.9984 - val_loss: 0.0120
Epoch 3/5
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 6ms/step - accuracy: 0.9982 - loss: 0.0130 - val_accuracy: 0.9984 - val_loss: 0.0119
Epoch 4/5
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 6ms/step - accuracy: 0.9983 - loss: 0.0126 - val_accuracy: 0.9984 - val_loss: 0.0118
Epoch 5/5
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 8ms/step - accuracy: 0.9983 - loss: 0.0128 - val_accuracy: 0.9984 - val_loss: 0.0118
[1m1774/1774[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
2024/10/22 03:02:41 INFO mlflow.tracking._tracking_service.client: 🏃 View run respected-grub-505 at: http://localhost:5000/#/experiments/393830309254537149/runs/e3afd85ebece49028d39d492485bd42b.
2024/10/22 03:02:41 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/393830309254537149.


Model: RNN
Accuracy: 0.9984
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000
ROC AUC: 0.5003


In [15]:
def build_cnn_model(input_shape):
    model = Sequential()
    model.add(Conv1D(filters=32, kernel_size=1, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=1))  # Adjust pool size to match input timestep
    model.add(Flatten())
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model


In [16]:
# Credit Card Data
print("Credit Card Data - CNN")
cnn_model_credit = build_cnn_model(X_credit_train.shape[1:])
train_and_evaluate_model(cnn_model_credit, X_credit_train, y_credit_train, X_credit_test, y_credit_test, 'CNN', 'Credit Card Fraud Detection')

Credit Card Data - CNN
Epoch 1/5
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 5ms/step - accuracy: 0.9961 - loss: 10.0486 - val_accuracy: 0.9983 - val_loss: 11.9084
Epoch 2/5
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 4ms/step - accuracy: 0.9964 - loss: 7.2604 - val_accuracy: 0.9983 - val_loss: 13.9744
Epoch 3/5
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 5ms/step - accuracy: 0.9965 - loss: 7.2621 - val_accuracy: 0.9983 - val_loss: 12.9085
Epoch 4/5
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 5ms/step - accuracy: 0.9967 - loss: 5.4645 - val_accuracy: 0.9984 - val_loss: 11.9979
Epoch 5/5
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 5ms/step - accuracy: 0.9968 - loss: 4.4849 - val_accuracy: 0.9982 - val_loss: 2.1769
[1m1774/1774[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step


2024/10/22 03:15:04 INFO mlflow.tracking._tracking_service.client: 🏃 View run adventurous-lark-47 at: http://localhost:5000/#/experiments/393830309254537149/runs/f09c9cd28e7b494c9560c2415a8f22fe.
2024/10/22 03:15:04 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/393830309254537149.


Model: CNN
Accuracy: 0.9982
Precision: 0.3409
Recall: 0.1667
F1 Score: 0.2239
ROC AUC: 0.5778
