# Task 2 - Model Building and Training

**Loading the datasets for modeling**

In [1]:
# Import necessary libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os, sys
# Add the 'scripts' directory to the Python path for module imports
sys.path.append(os.path.abspath(os.path.join('..', 'scripts')))

# Set max rows and columns to display
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 200)

# Configure logging
from logger import SetupLogger
# Assuming this class is defined in scripts/
from data_processor import DataPreprocessor  

logger = SetupLogger(log_file='../logs/notebooks.log').get_logger()

Load the datasets

In [2]:
# Initialize the DataPreprocessor with the logger and the path to the dataset
load_fraud = DataPreprocessor(filepath='../data/processed_fraud_data.csv', logger=logger)
load_credit = DataPreprocessor(filepath='../data/creditcard.csv', logger=logger)
fraud_data = load_fraud.load_dataset().set_index('user_id')
credit_data = load_credit.load_dataset()

In [3]:
fraud_data.head()

Unnamed: 0_level_0,Unnamed: 0,signup_time,purchase_time,purchase_value,device_id,age,ip_address,class,purchase_delay,hour_of_day,day_of_week,user_transaction_frequency,device_transaction_frequency,user_transaction_velocity,source_Direct,source_SEO,browser_FireFox,browser_IE,browser_Opera,browser_Safari,sex_M
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
22058,0,2015-02-24 22:55:49,2015-04-18 02:47:11,-0.160204,QVPSPJUOCKZAR,0.679914,732758400.0,0,-0.136057,-1.377455,0.99102,0.0,-0.261514,-0.230128,False,True,False,False,False,False,True
333320,1,2015-06-07 20:39:50,2015-06-08 01:38:54,-1.142592,EOGFQPIZPYXFZ,2.304476,350311400.0,0,-1.571877,-1.522122,-1.501259,0.0,-0.261514,-0.229874,False,False,False,False,False,False,False
1359,2,2015-01-01 18:52:44,2015-01-01 18:52:45,-1.197169,YSSKYOSJHPPLJ,2.304476,2621474000.0,1,-1.577617,0.937208,-0.005891,0.0,3.941861,4.345476,False,True,False,False,True,False,True
150084,3,2015-04-28 21:13:25,2015-05-04 13:54:50,0.385567,ATGTXKYKUDUQN,0.911994,3840542000.0,0,-1.420213,0.213876,-1.501259,0.0,-0.261514,-0.23012,False,True,False,False,False,True,True
221365,4,2015-07-21 07:09:52,2015-09-09 18:40:53,0.112681,NAUITBZFJKHWW,1.376155,415583100.0,0,-0.182509,0.937208,-0.504347,0.0,-0.261514,-0.230128,False,False,False,False,False,True,True


In [4]:
# Explore the few rows
credit_data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,0.090794,-0.5516,-0.617801,-0.99139,-0.311169,1.468177,-0.470401,0.207971,0.025791,0.403993,0.251412,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,-0.166974,1.612727,1.065235,0.489095,-0.143772,0.635558,0.463917,-0.114805,-0.183361,-0.145783,-0.069083,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,0.207643,0.624501,0.066084,0.717293,-0.165946,2.345865,-2.890083,1.109969,-0.121359,-2.261857,0.52498,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,-0.054952,-0.226487,0.178228,0.507757,-0.287924,-0.631418,-1.059647,-0.684093,1.965775,-1.232622,-0.208038,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,0.753074,-0.822843,0.538196,1.345852,-1.11967,0.175121,-0.451449,-0.237033,-0.038195,0.803487,0.408542,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [5]:
# Understand the shapes
credit_data.shape, fraud_data.shape

((284807, 31), (151112, 21))

In [6]:
# Check any missing values
print(credit_data.isnull().sum())
print(fraud_data.isnull().sum())

Time      0
V1        0
V2        0
V3        0
V4        0
V5        0
V6        0
V7        0
V8        0
V9        0
V10       0
V11       0
V12       0
V13       0
V14       0
V15       0
V16       0
V17       0
V18       0
V19       0
V20       0
V21       0
V22       0
V23       0
V24       0
V25       0
V26       0
V27       0
V28       0
Amount    0
Class     0
dtype: int64
Unnamed: 0                      0
signup_time                     0
purchase_time                   0
purchase_value                  0
device_id                       0
age                             0
ip_address                      0
class                           0
purchase_delay                  0
hour_of_day                     0
day_of_week                     0
user_transaction_frequency      0
device_transaction_frequency    0
user_transaction_velocity       0
source_Direct                   0
source_SEO                      0
browser_FireFox                 0
browser_IE                      0
bro

In [13]:
# For fraud_data (after loading but before train-test split)
# Convert timestamp to datetime and extract features
fraud_data['signup_time'] = pd.to_datetime(fraud_data['signup_time'])
fraud_data['hour'] = fraud_data['signup_time'].dt.hour
fraud_data['day_of_week'] = fraud_data['signup_time'].dt.dayofweek
fraud_data = fraud_data.drop(columns=['signup_time'])  # Remove original datetime column

# Now proceed with DataPreparation and train-test split

In [None]:
# For fraud_data (after loading but before train-test split)
# Convert timestamp to datetime and extract features
fraud_data['purchase_time'] = pd.to_datetime(fraud_data['purchase_time'])
fraud_data['hour'] = fraud_data['purchase_time'].dt.hour
fraud_data['day_of_week'] = fraud_data['purchase_time'].dt.dayofweek
fraud_data = fraud_data.drop(columns=['purchase_time'])  # Remove original datetime column

# Now proceed with DataPreparation and train-test split

**Data Preparation:**

* Feature and Target Separation [‘Class’(creditcard), ‘class’(Fraud_Data)] Train-Test Split
  
For creditcard dataset (target column 'Class'):

In [7]:
from data_preparation import DataPreparation
# Assuming df_creditcard is the DataFrame for the credit card dataset
_creditcard = DataPreparation(credit_data, target_column='Class')
_creditcard.train_test_split(test_size=0.2, random_state=42)

# Retrieving the train and test sets
X_train_cc, X_test_cc, y_train_cc, y_test_cc = _creditcard.get_train_test_data()

Data split into training and testing sets successfully.


For Fraud_Data dataset (target column 'class'):

In [8]:
# Assuming df_fraud is the DataFrame for the fraud dataset
_fraud = DataPreparation(fraud_data, target_column='class')
_fraud.train_test_split(test_size=0.2, random_state=42)

# Retrieving the train and test sets
X_train_fd, X_test_fd, y_train_fd, y_test_fd = _fraud.get_train_test_data()

Data split into training and testing sets successfully.


In [9]:
from model_pipeline import ModelPipeline

In [10]:
# Credit Card Dataset
model_pipeline_cc = ModelPipeline(X_train_cc, X_test_cc, y_train_cc, y_test_cc)
best_model_cc, best_model_name_cc = model_pipeline_cc.train_and_evaluate()
model_pipeline_cc.save_best_models(best_model_cc, best_model_name_cc, 'creditcard')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Quick tuning for Logistic Regression...
Quick tuning for Decision Tree...
Quick tuning for Random Forest...
Quick tuning for Gradient Boosting...




Logistic Regression took 2.25 seconds to train


Registered model 'logistic_regression' already exists. Creating a new version of this model...
Created version '3' of model 'logistic_regression'.


Logistic Regression model trained and logged with MLflow
Decision Tree took 11.35 seconds to train


Registered model 'decision_tree' already exists. Creating a new version of this model...
Created version '3' of model 'decision_tree'.


Decision Tree model trained and logged with MLflow
Random Forest took 141.62 seconds to train


Registered model 'random_forest' already exists. Creating a new version of this model...
Created version '3' of model 'random_forest'.


Random Forest model trained and logged with MLflow
Gradient Boosting took 364.39 seconds to train


Registered model 'gradient_boosting' already exists. Creating a new version of this model...
Created version '3' of model 'gradient_boosting'.


Gradient Boosting model trained and logged with MLflow
[1m1781/1781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 569us/step
MLP took 13.25 seconds to train


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Registered model 'mlp' already exists. Creating a new version of this model...
Created version '2' of model 'mlp'.


MLP model trained and logged with MLflow
[1m1781/1781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
RNN took 34.21 seconds to train


Registered model 'rnn' already exists. Creating a new version of this model...
Created version '2' of model 'rnn'.


RNN model trained and logged with MLflow
[1m1781/1781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step




LSTM took 76.34 seconds to train


Registered model 'lstm' already exists. Creating a new version of this model...
Created version '2' of model 'lstm'.


LSTM model trained and logged with MLflow
[1m1781/1781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 842us/step




CNN took 19.35 seconds to train




CNN model trained and logged with MLflow
Random Forest best model saved.


Registered model 'cnn' already exists. Creating a new version of this model...
Created version '2' of model 'cnn'.


In [11]:
# Fraud Dataset
model_pipeline_fd = ModelPipeline(X_train_fd, X_test_fd, y_train_fd, y_test_fd)
best_model_fd, best_model_name_fd = model_pipeline_fd.train_and_evaluate()
model_pipeline_fd.save_best_models(best_model_fd, best_model_name_fd, 'fraud')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Quick tuning for Logistic Regression...


ValueError: 
All the 4 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
2 fits failed with the following error:
Traceback (most recent call last):
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\sklearn\pipeline.py", line 654, in fit
    Xt = self._fit(X, y, routed_params, raw_params=params)
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\sklearn\pipeline.py", line 588, in _fit
    X, fitted_transformer = fit_transform_one_cached(
                            ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\joblib\memory.py", line 312, in __call__
    return self.func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\sklearn\pipeline.py", line 1551, in _fit_transform_one
    res = transformer.fit_transform(X, y, **params.get("fit_transform", {}))
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\sklearn\utils\_set_output.py", line 319, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\sklearn\base.py", line 921, in fit_transform
    return self.fit(X, y, **fit_params).transform(X)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\sklearn\preprocessing\_data.py", line 894, in fit
    return self.partial_fit(X, y, sample_weight)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\sklearn\preprocessing\_data.py", line 930, in partial_fit
    X = validate_data(
        ^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\sklearn\utils\validation.py", line 2944, in validate_data
    out = check_array(X, input_name="X", **check_params)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\sklearn\utils\validation.py", line 973, in check_array
    array = array.astype(new_dtype)
            ^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\pandas\core\generic.py", line 6643, in astype
    new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\pandas\core\internals\managers.py", line 430, in astype
    return self.apply(
           ^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\pandas\core\internals\managers.py", line 363, in apply
    applied = getattr(b, f)(**kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\pandas\core\internals\blocks.py", line 758, in astype
    new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\pandas\core\dtypes\astype.py", line 237, in astype_array_safe
    new_values = astype_array(values, dtype, copy=copy)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\pandas\core\dtypes\astype.py", line 182, in astype_array
    values = _astype_nansafe(values, dtype, copy=copy)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\pandas\core\dtypes\astype.py", line 133, in _astype_nansafe
    return arr.astype(dtype, copy=True)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ValueError: could not convert string to float: '2015-05-05 12:01:57'

--------------------------------------------------------------------------------
2 fits failed with the following error:
Traceback (most recent call last):
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\sklearn\pipeline.py", line 654, in fit
    Xt = self._fit(X, y, routed_params, raw_params=params)
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\sklearn\pipeline.py", line 588, in _fit
    X, fitted_transformer = fit_transform_one_cached(
                            ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\joblib\memory.py", line 312, in __call__
    return self.func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\sklearn\pipeline.py", line 1551, in _fit_transform_one
    res = transformer.fit_transform(X, y, **params.get("fit_transform", {}))
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\sklearn\utils\_set_output.py", line 319, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\sklearn\base.py", line 921, in fit_transform
    return self.fit(X, y, **fit_params).transform(X)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\sklearn\preprocessing\_data.py", line 894, in fit
    return self.partial_fit(X, y, sample_weight)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\sklearn\preprocessing\_data.py", line 930, in partial_fit
    X = validate_data(
        ^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\sklearn\utils\validation.py", line 2944, in validate_data
    out = check_array(X, input_name="X", **check_params)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\sklearn\utils\validation.py", line 973, in check_array
    array = array.astype(new_dtype)
            ^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\pandas\core\generic.py", line 6643, in astype
    new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\pandas\core\internals\managers.py", line 430, in astype
    return self.apply(
           ^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\pandas\core\internals\managers.py", line 363, in apply
    applied = getattr(b, f)(**kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\pandas\core\internals\blocks.py", line 758, in astype
    new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\pandas\core\dtypes\astype.py", line 237, in astype_array_safe
    new_values = astype_array(values, dtype, copy=copy)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\pandas\core\dtypes\astype.py", line 182, in astype_array
    values = _astype_nansafe(values, dtype, copy=copy)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\KMAI3\e-commerce-and-banking-fraud-detection\.venv\Lib\site-packages\pandas\core\dtypes\astype.py", line 133, in _astype_nansafe
    return arr.astype(dtype, copy=True)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ValueError: could not convert string to float: '2015-01-05 06:37:15'


**Adjust Data Reshaping for Neural Networks**
* Modify the training loop to handle tabular data for CNNs/RNNs:

In [None]:
# Inside train_and_evaluate()
if name in ['LSTM', 'CNN', 'RNN']:  # Add RNN here
    X_train_reshaped = self.X_train.values.reshape(self.X_train.shape[0], self.X_train.shape[1], 1)
    X_test_reshaped = self.X_test.values.reshape(self.X_test.shape[0], self.X_test.shape[1], 1)
    # Train with reshaped data

**MLflow Tracking Enhancements**
* Ensure all parameters and metrics are logged for new models:

In [None]:
# Inside train_and_evaluate()
if name == 'Logistic Regression':
    mlflow.log_param("C", model.get_params()['C'])
    mlflow.log_param("solver", model.get_params()['solver'])
elif name == 'Decision Tree':
    mlflow.log_param("max_depth", model.get_params()['max_depth'])

## Model Selection
* Import ModelPipeline class from model_pipeline
* Train multiple models
* hyperparameter tune
* evaluate the model
* compare the model

In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # Disable CUDA

# Import the class
from model_pipeline import ModelPipeline

**Train and Evaluate the models on the e-commerce fruad dataset**

In [None]:
# Create instance of the pipeline
model_pipeline = ModelPipeline(X_train_fd, X_test_fd, y_train_fd, y_test_fd)

# add models
model_pipeline.add_models()
# Train and evaluate, logging with MLflow
best_model, best_model_name = model_pipeline.train_and_evaluate()
# Save the best model
model_pipeline.save_best_models(best_model, best_model_name, 'fraud')