  Task 2: Model Building and Training
   This notebook:
  Loads the preprocessed fraud dataset.
- Prepares the data for training.
- Trains various models using functions from `model_training.py`.
- Evaluates each model and logs experiments with MLflow.

In [None]:
import pandas as pd
from scripts.model_training import (prepare_data, train_logistic_regression, train_decision_tree,
                                    train_random_forest, train_gradient_boosting, train_mlp, evaluate_model)

 Load preprocessed fraud data

In [None]:

fraud_df = pd.read_csv("../data/Fraud_Data_Featured.csv", parse_dates=['purchase_time'])
selected_columns = ['purchase_value', 'age', 'hour_of_day', 'day_of_week', 'transaction_count', 'time_diff', 'class']
fraud_df = fraud_df[selected_columns].dropna()

Prepare data

In [None]:


#  Prepare data
X_train, X_test, y_train, y_test = prepare_data(fraud_df, 'class')


#  Train and evaluate models
print("Training Logistic Regression...")
lr_model = train_logistic_regression(X_train, y_train)
evaluate_model(lr_model, X_test, y_test)

print("Training Decision Tree...")
dt_model = train_decision_tree(X_train, y_train)
evaluate_model(dt_model, X_test, y_test)

print("Training Random Forest...")
rf_model = train_random_forest(X_train, y_train)
evaluate_model(rf_model, X_test, y_test)

print("Training Gradient Boosting...")
gb_model = train_gradient_boosting(X_train, y_train)
evaluate_model(gb_model, X_test, y_test)

print("Training MLP...")
mlp_model = train_mlp(X_train, y_train)
evaluate_model(mlp_model, X_test, y_test)






Save one model for deployment (e.g., Random Forest)

In [None]:
import pickle
with open("../scripts/trained_model.pkl", "wb") as f:
    pickle.dump(rf_model, f)
print("Random Forest model saved as trained_model.pkl")