# MLFlow

In [1]:
import pandas as pd
import numpy as np
import mlflow


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
from sklearn.metrics import classification_report, confusion_matrix
pd.options.display.float_format = '{:,.2f}'.format

In [3]:
import dagshub
dagshub.init(repo_owner='Pranay5519', repo_name='fraud_detection', mlflow=True)


In [4]:
mlflow.set_tracking_uri("https://dagshub.com/Pranay5519/fraud_detection.mlflow")

In [5]:
df = pd.read_csv(r"D:\accredian\data\cleaned_fraud.csv")
df  = df[['step', 'type', 'isFraud', 'isMerchant',
       'orig_balance_diff', 'dest_balance_diff', 'log_amount']]

In [15]:

# ------------------------------
# 2. ENCODE CATEGORICAL COLUMN
# ------------------------------

df = pd.get_dummies(df, columns=['type'], drop_first=True)

# ------------------------------
# 3. DEFINE X AND y
# ------------------------------

X = df.drop(columns=['isFraud'])
y = df['isFraud']

# ------------------------------
# 4. TRAIN‚ÄìTEST SPLIT
# ------------------------------

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    stratify=y,
    random_state=42
)

# ------------------------------
# 5. SCALE FEATURES
# ------------------------------

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ------------------------------
# 6. UNDERSAMPLE (TRAINING DATA ONLY)
# ------------------------------
from imblearn.over_sampling import SMOTE

smote = SMOTE(
    sampling_strategy='auto',
    random_state=42,
    k_neighbors=5
)

X_train_bal, y_train_bal = smote.fit_resample(
    X_train_scaled,
    y_train
)

print("\nAfter SMOTE:")
print(y_train_bal.value_counts())

2026/01/16 15:32:40 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '0082918849274eefa75490de8d335162', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


üèÉ View run exultant-moose-502 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/0/runs/0082918849274eefa75490de8d335162
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/0

After SMOTE:
isFraud
0    5083526
1    5083526
Name: count, dtype: int64


In [16]:
# ------------------------------
# 7. TRAIN MODEL (PRECISION-FOCUSED)
# ------------------------------

class_weight = {0: 50, 1: 10}

lr = LogisticRegression(
    max_iter=1000,
    solver='lbfgs',
    class_weight=class_weight
)


In [17]:
# ------------------------------
# 7. START MLFLOW RUN
mlflow.sklearn.autolog()
mlflow.set_experiment("Fraud_Detection_LogisticRegression")
with mlflow.start_run(run_name="test run 3"):

    lr.fit(X_train_scaled, y_train)

    y_proba = lr.predict_proba(X_test_scaled)[:, 1]

    threshold = 0.9
    y_pred = (y_proba >= threshold).astype(int)

    # --------------------------
    # 8. METRICS
    # --------------------------

    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_proba)

    # --------------------------
    # 9. LOG PARAMETERS
    # --------------------------

    mlflow.log_param("model", "LogisticRegression")
    mlflow.log_param("class_weight", class_weight)
    mlflow.log_param("threshold", threshold)

    # --------------------------
    # 10. LOG METRICS
    # --------------------------

    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("f1_score", f1)
    mlflow.log_metric("roc_auc", roc_auc)

    # --------------------------
    # 11. LOG MODEL
    # --------------------------

    mlflow.sklearn.log_model(lr, "model")

    print("Run logged to MLflow")
    print(classification_report(y_test, y_pred, digits=4))

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Run logged to MLflow
              precision    recall  f1-score   support

           0     0.9987    1.0000    0.9994   1270881
           1     0.0000    0.0000    0.0000      1643

    accuracy                         0.9987   1272524
   macro avg     0.4994    0.5000    0.4997   1272524
weighted avg     0.9974    0.9987    0.9981   1272524



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


üèÉ View run test run 3 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/0/runs/7ecd27e423c7414aac30dcad2ab46574
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/0


# Decision Tree

In [7]:
import pandas as pd
import mlflow
import mlflow.sklearn
import optuna

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE

# ------------------------------
# 1. LOAD DATA
# ------------------------------

df = pd.read_csv(r"D:\accredian\data\cleaned_fraud.csv")
df.drop(columns=["Unnamed: 0"], inplace=True)

# ------------------------------
# 2. ENCODE CATEGORICAL COLUMN
# ------------------------------

df = pd.get_dummies(df, columns=['type'], drop_first=True)

# ------------------------------
# 3. DEFINE X AND y
# ------------------------------

X = df.drop(columns=['isFraud'])
y = df['isFraud']

# ------------------------------
# 4. TRAIN‚ÄìTEST SPLIT
# ------------------------------

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    stratify=y,
    random_state=42
)

# ------------------------------
# 5. SMOTE (TRAIN ONLY)
# ------------------------------

smote = SMOTE(random_state=42, k_neighbors=5)
X_train_bal, y_train_bal = smote.fit_resample(X_train, y_train)

# ------------------------------
# 6. MLFLOW + OPTUNA SETUP
# ------------------------------

mlflow.set_tracking_uri("https://dagshub.com/Pranay5519/fraud_detection.mlflow")
mlflow.set_experiment("Fraud_DecisionTree_Optuna Exp2")

mlflow.sklearn.autolog()

# ------------------------------
# 7. OPTUNA OBJECTIVE FUNCTION
# ------------------------------

def objective(trial):

    params = {
        "max_depth": trial.suggest_int("max_depth", 15, 150),
        "min_samples_split": trial.suggest_int("min_samples_split", 30, 100),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 40),
        "criterion": trial.suggest_categorical("criterion", ["gini", "entropy"]),
        "class_weight": trial.suggest_categorical(
            "class_weight",
            [{0: 5, 1: 1}, {0: 10, 1: 1}, {0: 7, 1: 1},{0: 12, 1: 1}]
        ),
        "random_state": 42
    }

    with mlflow.start_run(nested=True):

        model = DecisionTreeClassifier(**params)
        model.fit(X_train_bal, y_train_bal)

        y_test_pred = model.predict(X_test)

        # ---------- Classification Report ----------
        classification_rep = classification_report(
            y_test, y_test_pred, output_dict=True, zero_division=0
        )

        for label, metrics in classification_rep.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Optuna optimization target ‚Üí fraud recall
        return classification_rep["1"]["recall"]

# ------------------------------
# 8. RUN OPTUNA STUDY
# ------------------------------

with mlflow.start_run(run_name="DecisionTree_Run"):

    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=30)

    mlflow.log_params({f"best_{k}": v for k, v in study.best_params.items()})

    best_params = study.best_params

    # --------------------------
    # 9. TRAIN FINAL MODEL
    # --------------------------

    final_model = DecisionTreeClassifier(**best_params, random_state=42)
    final_model.fit(X_train_bal, y_train_bal)

    y_test_pred = final_model.predict(X_test)

    classification_rep = classification_report(
        y_test, y_test_pred, output_dict=True, zero_division=0
    )

    for label, metrics in classification_rep.items():
        if isinstance(metrics, dict):
            for metric, value in metrics.items():
                mlflow.log_metric(f"final_{label}_{metric}", value)

    print("Best Params:", best_params)


2026/01/17 12:46:58 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '7630b52f00cc46ba97e506dbb555bfd0', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


üèÉ View run stately-robin-891 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/7630b52f00cc46ba97e506dbb555bfd0
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 12:47:10,881] A new study created in memory with name: no-name-068e0489-8210-4ef5-867b-8608bde3e1e8


üèÉ View run able-shoat-468 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/70950feba32845e78d6c118afa4e1418
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 12:49:26,867] Trial 0 finished with value: 0.8618381010346926 and parameters: {'max_depth': 86, 'min_samples_split': 39, 'min_samples_leaf': 5, 'criterion': 'gini', 'class_weight': {0: 10, 1: 1}}. Best is trial 0 with value: 0.8618381010346926.


üèÉ View run caring-ape-86 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/69ec06fef9724af7b5eeee9cf988cdb0
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 12:51:39,491] Trial 1 finished with value: 0.8587948874010956 and parameters: {'max_depth': 24, 'min_samples_split': 83, 'min_samples_leaf': 10, 'criterion': 'gini', 'class_weight': {0: 10, 1: 1}}. Best is trial 0 with value: 0.8618381010346926.


üèÉ View run abrasive-moth-873 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/dbe9a8205e0a49a581f8358da63ae447
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 12:53:53,011] Trial 2 finished with value: 0.8727936701156421 and parameters: {'max_depth': 144, 'min_samples_split': 46, 'min_samples_leaf': 32, 'criterion': 'entropy', 'class_weight': {0: 7, 1: 1}}. Best is trial 2 with value: 0.8727936701156421.


üèÉ View run learned-gnat-865 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/e65886c4111b4c61a0736ec009b86d77
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 12:56:01,801] Trial 3 finished with value: 0.8788800973828362 and parameters: {'max_depth': 15, 'min_samples_split': 45, 'min_samples_leaf': 31, 'criterion': 'entropy', 'class_weight': {0: 12, 1: 1}}. Best is trial 3 with value: 0.8788800973828362.


üèÉ View run calm-snake-758 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/aae83c0f307241399986aa004e4cc55f
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 12:58:14,116] Trial 4 finished with value: 0.8667072428484479 and parameters: {'max_depth': 31, 'min_samples_split': 38, 'min_samples_leaf': 14, 'criterion': 'entropy', 'class_weight': {0: 10, 1: 1}}. Best is trial 3 with value: 0.8788800973828362.


üèÉ View run spiffy-midge-120 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/3fd31ad6c2e74117a88b9ecfc5113516
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 13:00:26,275] Trial 5 finished with value: 0.8800973828362751 and parameters: {'max_depth': 66, 'min_samples_split': 87, 'min_samples_leaf': 37, 'criterion': 'gini', 'class_weight': {0: 5, 1: 1}}. Best is trial 5 with value: 0.8800973828362751.


üèÉ View run vaunted-dog-33 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/b3cbfc6d2fd045468de44d4cca46caa8
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 13:02:38,226] Trial 6 finished with value: 0.8648813146682898 and parameters: {'max_depth': 97, 'min_samples_split': 78, 'min_samples_leaf': 14, 'criterion': 'entropy', 'class_weight': {0: 10, 1: 1}}. Best is trial 5 with value: 0.8800973828362751.


üèÉ View run fortunate-frog-71 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/994e1c8564df4ff885a919b5c8eed864
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 13:04:49,674] Trial 7 finished with value: 0.877054169202678 and parameters: {'max_depth': 69, 'min_samples_split': 72, 'min_samples_leaf': 20, 'criterion': 'gini', 'class_weight': {0: 5, 1: 1}}. Best is trial 5 with value: 0.8800973828362751.


üèÉ View run honorable-grub-212 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/7a1a34007a9947a08c063dec5829afba
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 13:07:01,701] Trial 8 finished with value: 0.8557516737674985 and parameters: {'max_depth': 109, 'min_samples_split': 49, 'min_samples_leaf': 9, 'criterion': 'gini', 'class_weight': {0: 12, 1: 1}}. Best is trial 5 with value: 0.8800973828362751.


üèÉ View run wise-donkey-635 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/9ac7a88cf46f435cba693ab7fa1dfdd0
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 13:09:12,752] Trial 9 finished with value: 0.8691418137553256 and parameters: {'max_depth': 45, 'min_samples_split': 55, 'min_samples_leaf': 14, 'criterion': 'gini', 'class_weight': {0: 7, 1: 1}}. Best is trial 5 with value: 0.8800973828362751.


üèÉ View run unique-chimp-344 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/93aca319567f425d97200beafcc89cf6
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 13:11:22,264] Trial 10 finished with value: 0.8782714546561169 and parameters: {'max_depth': 62, 'min_samples_split': 99, 'min_samples_leaf': 40, 'criterion': 'gini', 'class_weight': {0: 5, 1: 1}}. Best is trial 5 with value: 0.8800973828362751.


üèÉ View run mysterious-seal-26 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/a6b11f790d004bf283740f9c8079a337
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 13:13:31,693] Trial 11 finished with value: 0.8660986001217286 and parameters: {'max_depth': 18, 'min_samples_split': 62, 'min_samples_leaf': 31, 'criterion': 'entropy', 'class_weight': {0: 12, 1: 1}}. Best is trial 5 with value: 0.8800973828362751.


üèÉ View run bedecked-bug-33 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/0af9b2a332044a419022d11962b485cf
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 13:15:45,188] Trial 12 finished with value: 0.8849665246500305 and parameters: {'max_depth': 49, 'min_samples_split': 92, 'min_samples_leaf': 32, 'criterion': 'entropy', 'class_weight': {0: 5, 1: 1}}. Best is trial 12 with value: 0.8849665246500305.


üèÉ View run salty-foal-808 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/745f00d34c9f4d10a5a755a68c770813
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 13:17:59,356] Trial 13 finished with value: 0.8831405964698722 and parameters: {'max_depth': 54, 'min_samples_split': 96, 'min_samples_leaf': 39, 'criterion': 'entropy', 'class_weight': {0: 5, 1: 1}}. Best is trial 12 with value: 0.8849665246500305.


üèÉ View run delightful-wren-279 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/82b44d8c60014e98b4dcb9a9e03b86e6
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 13:20:11,294] Trial 14 finished with value: 0.8800973828362751 and parameters: {'max_depth': 46, 'min_samples_split': 97, 'min_samples_leaf': 24, 'criterion': 'entropy', 'class_weight': {0: 5, 1: 1}}. Best is trial 12 with value: 0.8849665246500305.


üèÉ View run traveling-shark-772 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/ece498bf207b4aa19582e54c47c83da5
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 13:22:25,870] Trial 15 finished with value: 0.8819233110164334 and parameters: {'max_depth': 46, 'min_samples_split': 91, 'min_samples_leaf': 25, 'criterion': 'entropy', 'class_weight': {0: 5, 1: 1}}. Best is trial 12 with value: 0.8849665246500305.


üèÉ View run welcoming-kit-592 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/895ac65fe77a4901a6b09d61c4bac2cf
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 13:24:38,827] Trial 16 finished with value: 0.8825319537431527 and parameters: {'max_depth': 115, 'min_samples_split': 73, 'min_samples_leaf': 36, 'criterion': 'entropy', 'class_weight': {0: 5, 1: 1}}. Best is trial 12 with value: 0.8849665246500305.


üèÉ View run calm-ox-308 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/01e16ee48b4e4aa9924866aba4e750b1
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 13:26:50,454] Trial 17 finished with value: 0.8819233110164334 and parameters: {'max_depth': 55, 'min_samples_split': 92, 'min_samples_leaf': 26, 'criterion': 'entropy', 'class_weight': {0: 5, 1: 1}}. Best is trial 12 with value: 0.8849665246500305.


üèÉ View run serious-mole-179 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/b2fb8541c8174a70b704333c7285b135
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 13:29:02,740] Trial 18 finished with value: 0.8831405964698722 and parameters: {'max_depth': 81, 'min_samples_split': 100, 'min_samples_leaf': 40, 'criterion': 'entropy', 'class_weight': {0: 5, 1: 1}}. Best is trial 12 with value: 0.8849665246500305.


üèÉ View run upset-sponge-662 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/0c889da81da544efb26fe7df46178fec
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 13:31:14,109] Trial 19 finished with value: 0.877054169202678 and parameters: {'max_depth': 32, 'min_samples_split': 82, 'min_samples_leaf': 34, 'criterion': 'entropy', 'class_weight': {0: 7, 1: 1}}. Best is trial 12 with value: 0.8849665246500305.


üèÉ View run luxuriant-kite-909 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/53f40034269341c0b4d7956c69a0014e
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 13:33:26,459] Trial 20 finished with value: 0.8825319537431527 and parameters: {'max_depth': 78, 'min_samples_split': 64, 'min_samples_leaf': 28, 'criterion': 'entropy', 'class_weight': {0: 5, 1: 1}}. Best is trial 12 with value: 0.8849665246500305.


üèÉ View run blushing-whale-331 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/56b4c8946ecf466dbf5c9a14fdcbd4eb
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 13:35:41,077] Trial 21 finished with value: 0.8831405964698722 and parameters: {'max_depth': 81, 'min_samples_split': 100, 'min_samples_leaf': 39, 'criterion': 'entropy', 'class_weight': {0: 5, 1: 1}}. Best is trial 12 with value: 0.8849665246500305.


üèÉ View run mercurial-cow-567 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/350c052c461c4fa8bc5c064b8a754758
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 13:37:53,362] Trial 22 finished with value: 0.8837492391965917 and parameters: {'max_depth': 95, 'min_samples_split': 92, 'min_samples_leaf': 36, 'criterion': 'entropy', 'class_weight': {0: 5, 1: 1}}. Best is trial 12 with value: 0.8849665246500305.


üèÉ View run stately-kit-332 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/13df1d45b8ad4fec902a1fb59a2a245a
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 13:40:07,461] Trial 23 finished with value: 0.8837492391965917 and parameters: {'max_depth': 133, 'min_samples_split': 91, 'min_samples_leaf': 34, 'criterion': 'entropy', 'class_weight': {0: 5, 1: 1}}. Best is trial 12 with value: 0.8849665246500305.


üèÉ View run selective-cod-948 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/1bff3570a1c747a2817b43b7c65853de
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 13:42:20,184] Trial 24 finished with value: 0.8819233110164334 and parameters: {'max_depth': 142, 'min_samples_split': 88, 'min_samples_leaf': 29, 'criterion': 'entropy', 'class_weight': {0: 5, 1: 1}}. Best is trial 12 with value: 0.8849665246500305.


üèÉ View run upbeat-horse-510 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/149ea70b002d41d984511af153cddbf9
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 13:44:35,176] Trial 25 finished with value: 0.8794887401095557 and parameters: {'max_depth': 123, 'min_samples_split': 75, 'min_samples_leaf': 21, 'criterion': 'entropy', 'class_weight': {0: 5, 1: 1}}. Best is trial 12 with value: 0.8849665246500305.


üèÉ View run able-gull-518 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/8c30d969b9044c02a1f8e2aa7807fe07
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 13:46:47,364] Trial 26 finished with value: 0.8813146682897139 and parameters: {'max_depth': 132, 'min_samples_split': 83, 'min_samples_leaf': 35, 'criterion': 'entropy', 'class_weight': {0: 5, 1: 1}}. Best is trial 12 with value: 0.8849665246500305.


üèÉ View run unleashed-robin-959 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/29854b93949e43a7bacb7c13bd7ca7c3
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 13:48:58,029] Trial 27 finished with value: 0.8612294583079733 and parameters: {'max_depth': 95, 'min_samples_split': 91, 'min_samples_leaf': 32, 'criterion': 'entropy', 'class_weight': {0: 12, 1: 1}}. Best is trial 12 with value: 0.8849665246500305.


üèÉ View run trusting-worm-375 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/b0f937525a944a79b709082303ee8b74
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 13:51:06,372] Trial 28 finished with value: 0.8709677419354839 and parameters: {'max_depth': 105, 'min_samples_split': 30, 'min_samples_leaf': 21, 'criterion': 'entropy', 'class_weight': {0: 7, 1: 1}}. Best is trial 12 with value: 0.8849665246500305.


üèÉ View run thoughtful-kit-307 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/5c05fddba9454a54822fd9fdadb759a6
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


[I 2026-01-17 13:53:17,887] Trial 29 finished with value: 0.8667072428484479 and parameters: {'max_depth': 133, 'min_samples_split': 70, 'min_samples_leaf': 35, 'criterion': 'entropy', 'class_weight': {0: 10, 1: 1}}. Best is trial 12 with value: 0.8849665246500305.


Best Params: {'max_depth': 49, 'min_samples_split': 92, 'min_samples_leaf': 32, 'criterion': 'entropy', 'class_weight': {0: 5, 1: 1}}
üèÉ View run DecisionTree_Run at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7/runs/e5a8fbf61e04466d8f67d00d4dfb7a61
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/7


# LogisticRegression

In [2]:
import dagshub
dagshub.init(repo_owner='Pranay5519', repo_name='fraud_detection', mlflow=True)


In [None]:
import pandas as pd
import numpy as np
import mlflow
import mlflow.sklearn
import optuna

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from imblearn.over_sampling import SMOTE
from sklearn.metrics import (
    roc_auc_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    classification_report
)
import tempfile
import os
# ------------------------------
# 1. LOAD DATA
# ------------------------------

df = pd.read_csv(r"D:\accredian\data\cleaned_fraud.csv")

# ------------------------------
# 2. ENCODE CATEGORICAL COLUMN
# ------------------------------

df = pd.get_dummies(df, columns=['type'], drop_first=True)

# ------------------------------
# 3. DEFINE X AND y
# ------------------------------

X = df.drop(columns=['isFraud'])
y = df['isFraud']

# ------------------------------
# 4. TRAIN‚ÄìTEST SPLIT
# ------------------------------

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    stratify=y,
    random_state=42
)

# ------------------------------
# 5. SCALE FEATURES
# ------------------------------

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ------------------------------
# 6. SMOTE (TRAIN ONLY)
# ------------------------------

smote = SMOTE(random_state=42, k_neighbors=5)

X_train_bal, y_train_bal = smote.fit_resample(
    X_train_scaled,
    y_train
)

# ------------------------------
# 7. MLFLOW + OPTUNA SETUP
# ------------------------------

mlflow.set_tracking_uri("https://dagshub.com/Pranay5519/fraud_detection.mlflow")
mlflow.set_experiment("Fraud_LogReg_Optuna")

mlflow.sklearn.autolog(disable=False)

# ------------------------------
# 8. OPTUNA OBJECTIVE FUNCTION
# ------------------------------

def objective(trial):

    params = {
        "C": trial.suggest_float("C", 0.001, 10.0, log=True),
        "solver": trial.suggest_categorical("solver", ["lbfgs", "liblinear"]),
        "class_weight": trial.suggest_categorical(
            "class_weight",
            [{0: 1, 1: 1}, {0: 2, 1: 1}, {0: 5, 1: 1}]
        ),
        "max_iter": 1000
    }

    with mlflow.start_run(nested=True):

        model = LogisticRegression(**params)
        model.fit(X_train_bal, y_train_bal)

        # ---------- Predictions ----------
        y_proba = model.predict_proba(X_test_scaled)[:, 1]
        y_pred = (y_proba >= 0.5).astype(int)

        # ---------- Metrics ----------
        roc_auc = roc_auc_score(y_test, y_proba)
        precision = precision_score(y_test, y_pred, zero_division=0)
        recall = recall_score(y_test, y_pred, zero_division=0)
        f1 = f1_score(y_test, y_pred, zero_division=0)

        # ---------- Log metrics ----------
        mlflow.log_metric("test_roc_auc", roc_auc)
        mlflow.log_metric("test_precision", precision)
        mlflow.log_metric("test_recall", recall)
        mlflow.log_metric("test_f1_score", f1)

        # ---------- Confusion Matrix ----------
        cm = confusion_matrix(y_test, y_pred)

        # ---------- Save artifacts ----------
        with tempfile.TemporaryDirectory() as tmpdir:
            cm_path = os.path.join(tmpdir, "confusion_matrix.txt")
            cr_path = os.path.join(tmpdir, "classification_report.txt")

            with open(cm_path, "w") as f:
                f.write(str(cm))

            with open(cr_path, "w") as f:
                f.write(classification_report(y_test, y_pred, digits=4))

            mlflow.log_artifact(cm_path)
            mlflow.log_artifact(cr_path)

        return roc_auc
# ------------------------------
# 9. RUN OPTUNA STUDY
# ------------------------------

with mlflow.start_run(run_name="Log-Reg_run"):

    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=3)

    mlflow.log_params(
        {f"best_{k}": v for k, v in study.best_params.items()}
    )
    best_params = study.best_params
    
    final_model = LogisticRegression(
                                **best_params,
                                max_iter=1000
                                    )

    final_model.fit(X_train_bal, y_train_bal)
    
    # Train
    y_train_proba = final_model.predict_proba(X_train_scaled)[:, 1]
    y_train_pred = (y_train_proba >= 0.5).astype(int)

    # Test
    y_test_proba = final_model.predict_proba(X_test_scaled)[:, 1]
    y_test_pred = (y_test_proba >= 0.5).astype(int)
    metrics = {
                # TRAIN
                "train_precision": precision_score(y_train, y_train_pred, zero_division=0),
                "train_recall": recall_score(y_train, y_train_pred, zero_division=0),
                "train_f1": f1_score(y_train, y_train_pred, zero_division=0),
                "train_roc_auc": roc_auc_score(y_train, y_train_proba),

                # TEST
                "test_precision": precision_score(y_test, y_test_pred, zero_division=0),
                "test_recall": recall_score(y_test, y_test_pred, zero_division=0),
                "test_f1": f1_score(y_test, y_test_pred, zero_division=0),
                "test_roc_auc": roc_auc_score(y_test, y_test_proba),
                }
    for k, v in metrics.items():
        mlflow.log_metric(k, v)
        
    mlflow.log_metric("best_roc_auc", study.best_value)

    print("Best ROC-AUC:", study.best_value)
    print("Best Params:", study.best_params)


  from .autonotebook import tqdm as notebook_tqdm
[I 2026-01-16 21:54:03,771] A new study created in memory with name: no-name-3b3d664b-278d-4872-a63d-9bae18476c5a


üèÉ View run persistent-seal-209 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/1/runs/ae614334d184440a9fc14014e126ca23
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/1


[I 2026-01-16 21:55:09,340] Trial 0 finished with value: 0.9506688264865168 and parameters: {'C': 0.05892696682793512, 'solver': 'liblinear', 'class_weight': {0: 5, 1: 1}}. Best is trial 0 with value: 0.9506688264865168.


üèÉ View run chill-steed-658 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/1/runs/bf8aa477128d44f2b97801e97079e0ff
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/1


[I 2026-01-16 21:56:17,137] Trial 1 finished with value: 0.9721546339248937 and parameters: {'C': 0.18614995516565452, 'solver': 'lbfgs', 'class_weight': {0: 5, 1: 1}}. Best is trial 1 with value: 0.9721546339248937.


üèÉ View run defiant-grouse-645 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/1/runs/0cb4aa3aa314484d8737629ee672884e
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/1


[I 2026-01-16 21:57:15,320] Trial 2 finished with value: 0.9372275226773534 and parameters: {'C': 0.012224662915859554, 'solver': 'lbfgs', 'class_weight': {0: 5, 1: 1}}. Best is trial 1 with value: 0.9721546339248937.


Best ROC-AUC: 0.9721546339248937
Best Params: {'C': 0.18614995516565452, 'solver': 'lbfgs', 'class_weight': {0: 5, 1: 1}}
üèÉ View run Log-Reg_run at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/1/runs/ba719e181484446fa53b71906dd889fc
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/1


# Random Forest Undersampling

In [1]:
import dagshub
dagshub.init(repo_owner='Pranay5519', repo_name='fraud_detection', mlflow=True)


In [None]:
import pandas as pd
import mlflow
import mlflow.sklearn
import optuna
from mlflow.models import infer_signature
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from imblearn.under_sampling import RandomUnderSampler

# ------------------------------
# 1. LOAD DATA
# ------------------------------

df = pd.read_csv(r"D:\accredian\data\cleaned_fraud.csv")
df.drop(columns=["Unnamed: 0"], inplace=True)
# ------------------------------
# 2. ENCODE CATEGORICAL COLUMN
# ------------------------------

df = pd.get_dummies(df, columns=['type'], drop_first=True)

# ------------------------------
# 3. DEFINE X AND y
# ------------------------------

X = df.drop(columns=['isFraud'])
y = df['isFraud']

# ------------------------------
# 4. TRAIN‚ÄìTEST SPLIT
# ------------------------------

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    stratify=y,
    random_state=42
)

# ------------------------------
# 5. SCALE FEATURES
# (Not required for RF, kept for consistency)
# ------------------------------

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ------------------------------
# 6. UNDERSAMPLING (TRAIN ONLY)
# ------------------------------

rus = RandomUnderSampler(random_state=42)
X_train_bal, y_train_bal = rus.fit_resample(X_train_scaled, y_train)

# ------------------------------
# 7. MLFLOW SETUP
# ------------------------------

mlflow.set_tracking_uri("https://dagshub.com/Pranay5519/fraud_detection.mlflow")
mlflow.set_experiment("Fraud_RandomForest_Optuna")

mlflow.sklearn.autolog()

# ------------------------------
# 8. OPTUNA OBJECTIVE FUNCTION
# ------------------------------

def objective(trial):

    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 400),
        "max_depth": trial.suggest_int("max_depth", 5, 30),
        "min_samples_split": trial.suggest_int("min_samples_split", 2, 20),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 20),
        "max_features": trial.suggest_categorical("max_features", ["sqrt", "log2"]),
        "class_weight": trial.suggest_categorical(
            "class_weight",
            [{0: 1, 1: 1}, {0: 2, 1: 1}, {0: 5, 1: 1}]
        ),
        "random_state": 42,
        "n_jobs": -1
    }

    with mlflow.start_run(nested=True):

        model = RandomForestClassifier(**params)
        model.fit(X_train_bal, y_train_bal)

        y_test_pred = model.predict(X_test_scaled)

        # ---------- Classification Report ----------
        report = classification_report(
            y_test, y_test_pred, output_dict=True, zero_division=0
        )

        for label, metrics in report.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Optuna optimization target ‚Üí fraud recall
        return report["1"]["precision"]

# ------------------------------
# 9. RUN OPTUNA STUDY
# ------------------------------

with mlflow.start_run(run_name="RandomForest_Run"):

    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=10)

    mlflow.log_params({f"best_{k}": v for k, v in study.best_params.items()})

    best_params = study.best_params

    # --------------------------
    # 10. TRAIN FINAL MODEL
    # --------------------------

    final_model = RandomForestClassifier(
        **best_params,
        random_state=42,
        n_jobs=-1
    )

    final_model.fit(X_train_bal, y_train_bal)

    y_test_pred = final_model.predict(X_test_scaled)

    final_report = classification_report(
        y_test, y_test_pred, output_dict=True, zero_division=0
    )

    for label, metrics in final_report.items():
        if isinstance(metrics, dict):
            for metric, value in metrics.items():
                mlflow.log_metric(f"final_{label}_{metric}", value)

    print("Best Params:", best_params)


  from .autonotebook import tqdm as notebook_tqdm
2026/01/17 09:25:35 INFO mlflow.tracking.fluent: Experiment with name 'Fraud_RandomForest_Optuna' does not exist. Creating a new experiment.
[I 2026-01-17 09:25:37,363] A new study created in memory with name: no-name-5659941a-2993-4da3-a865-9bcc4385b8da


üèÉ View run valuable-ray-410 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/4/runs/313d7f19facd4e7a97fbff4c2c891f1c
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/4


[I 2026-01-17 09:26:06,866] Trial 0 finished with value: 0.03346670770493485 and parameters: {'n_estimators': 104, 'max_depth': 12, 'min_samples_split': 9, 'min_samples_leaf': 6, 'max_features': 'sqrt', 'class_weight': {0: 2, 1: 1}}. Best is trial 0 with value: 0.03346670770493485.


üèÉ View run merciful-moose-52 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/4/runs/471e60c77fe04878b4b5ee36ae45367b
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/4


[I 2026-01-17 09:26:44,807] Trial 1 finished with value: 0.09868967721316714 and parameters: {'n_estimators': 280, 'max_depth': 15, 'min_samples_split': 9, 'min_samples_leaf': 19, 'max_features': 'sqrt', 'class_weight': {0: 5, 1: 1}}. Best is trial 1 with value: 0.09868967721316714.


üèÉ View run sedate-tern-569 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/4/runs/a18b8a235dd24805b6803367c1f5ddfe
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/4


[I 2026-01-17 09:27:22,793] Trial 2 finished with value: 0.06648374841705361 and parameters: {'n_estimators': 320, 'max_depth': 19, 'min_samples_split': 19, 'min_samples_leaf': 3, 'max_features': 'log2', 'class_weight': {0: 5, 1: 1}}. Best is trial 1 with value: 0.09868967721316714.


üèÉ View run merciful-smelt-885 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/4/runs/4e381f24606c4629a42186ae2d9a8fa8
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/4


[I 2026-01-17 09:28:00,788] Trial 3 finished with value: 0.05413569561823304 and parameters: {'n_estimators': 146, 'max_depth': 27, 'min_samples_split': 12, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'class_weight': {0: 5, 1: 1}}. Best is trial 1 with value: 0.09868967721316714.


üèÉ View run angry-smelt-183 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/4/runs/91428354859948bc8260766b854ddd96
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/4


[I 2026-01-17 09:28:38,831] Trial 4 finished with value: 0.09791983764586504 and parameters: {'n_estimators': 287, 'max_depth': 17, 'min_samples_split': 15, 'min_samples_leaf': 11, 'max_features': 'sqrt', 'class_weight': {0: 5, 1: 1}}. Best is trial 1 with value: 0.09868967721316714.


üèÉ View run worried-crow-266 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/4/runs/3863338db9684c23b12b9e1eec0e881f
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/4


[I 2026-01-17 09:29:16,790] Trial 5 finished with value: 0.10067991631799163 and parameters: {'n_estimators': 288, 'max_depth': 29, 'min_samples_split': 5, 'min_samples_leaf': 16, 'max_features': 'log2', 'class_weight': {0: 5, 1: 1}}. Best is trial 5 with value: 0.10067991631799163.


üèÉ View run polite-ape-646 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/4/runs/9f5378ae11e44948ad8e8cde474b5b4b
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/4


[I 2026-01-17 09:29:54,859] Trial 6 finished with value: 0.06324348139544199 and parameters: {'n_estimators': 236, 'max_depth': 29, 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_features': 'sqrt', 'class_weight': {0: 5, 1: 1}}. Best is trial 5 with value: 0.10067991631799163.


üèÉ View run illustrious-hare-319 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/4/runs/2399b894ea1d47ac965383cec11b96ef
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/4


[I 2026-01-17 09:30:32,825] Trial 7 finished with value: 0.031079131227217498 and parameters: {'n_estimators': 349, 'max_depth': 7, 'min_samples_split': 8, 'min_samples_leaf': 2, 'max_features': 'log2', 'class_weight': {0: 1, 1: 1}}. Best is trial 5 with value: 0.10067991631799163.


üèÉ View run carefree-toad-427 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/4/runs/d1e3ed54320a4dd6805c1cf91f7b8e2a
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/4


[I 2026-01-17 09:31:10,811] Trial 8 finished with value: 0.03313840155945419 and parameters: {'n_estimators': 239, 'max_depth': 29, 'min_samples_split': 4, 'min_samples_leaf': 12, 'max_features': 'sqrt', 'class_weight': {0: 2, 1: 1}}. Best is trial 5 with value: 0.10067991631799163.


üèÉ View run brawny-roo-990 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/4/runs/d2fa2fe8d16348c7b0648ce8546f7992
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/4


[I 2026-01-17 09:31:48,834] Trial 9 finished with value: 0.033398901729366444 and parameters: {'n_estimators': 299, 'max_depth': 27, 'min_samples_split': 19, 'min_samples_leaf': 12, 'max_features': 'log2', 'class_weight': {0: 2, 1: 1}}. Best is trial 5 with value: 0.10067991631799163.


Best Params: {'n_estimators': 288, 'max_depth': 29, 'min_samples_split': 5, 'min_samples_leaf': 16, 'max_features': 'log2', 'class_weight': {0: 5, 1: 1}}
üèÉ View run RandomForest_Run at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/4/runs/a0632fefa7fb430a8b7c97b952392c24
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/4


# LightGBM SMOTE Oversampling

In [5]:
import pandas as pd
import mlflow
import mlflow.sklearn
import optuna

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report , accuracy_score
from imblearn.over_sampling import SMOTE
import lightgbm as lgb

# ------------------------------
# 1. LOAD DATA
# ------------------------------

df = pd.read_csv(r"D:\accredian\data\cleaned_fraud.csv")
df.drop(columns=["Unnamed: 0"], inplace=True)
# ------------------------------
# 2. ENCODE CATEGORICAL COLUMN
# ------------------------------

df = pd.get_dummies(df, columns=['type'], drop_first=True)

# ------------------------------
# 3. DEFINE X AND y
# ------------------------------

X = df.drop(columns=['isFraud'])
y = df['isFraud']

# ------------------------------
# 4. TRAIN‚ÄìTEST SPLIT
# ------------------------------

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    stratify=y,
    random_state=42
)

# ------------------------------
# 5. SCALE FEATURES
# (not mandatory for LGBM, kept for consistency)
# ------------------------------

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ------------------------------
# 6. SMOTE (TRAIN ONLY)
# ------------------------------

smote = SMOTE(random_state=42, k_neighbors=5)
X_train_bal, y_train_bal = smote.fit_resample(X_train_scaled, y_train)

# ------------------------------
# 7. MLFLOW SETUP
# ------------------------------

mlflow.set_tracking_uri("https://dagshub.com/Pranay5519/fraud_detection.mlflow")
mlflow.set_experiment("Fraud_LightGBM_Optuna Exp2")

mlflow.lightgbm.autolog(disable=False)

# ------------------------------
# 8. OPTUNA OBJECTIVE FUNCTION
# ------------------------------

def objective(trial):

    params = {
        "n_estimators": trial.suggest_int("n_estimators", 50, 500),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.5, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 20, 300),
        "max_depth": trial.suggest_int("max_depth", 3, 70),
        "min_child_samples": trial.suggest_int("min_child_samples", 10, 100),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
        "class_weight": trial.suggest_categorical(
            "class_weight",
            [{0: 1, 1: 1}, {0: 2, 1: 1}, {0: 5, 1: 1}]
        ),
        "random_state": 42,
        "n_jobs": -1
    }

    with mlflow.start_run(nested=True):

        model = lgb.LGBMClassifier(**params)
        model.fit(X_train_bal, y_train_bal)

        y_test_pred = model.predict(X_test_scaled)

        # ---------- Classification Report ----------
        report = classification_report(
            y_test,
            y_test_pred,
            output_dict=True,
            zero_division=0
        )
        # log accuracy score
        mlflow.log_metric("accuracy", accuracy_score(y_test, y_test_pred))
        # ---------- Log metrics ----------
        for label, metrics in report.items():
            if isinstance(metrics, dict):
                for metric, value in metrics.items():
                    mlflow.log_metric(f"{label}_{metric}", value)

        # Optuna optimization target ‚Üí FRAUD PRECISION
        return report["1"]["precision"]

# ------------------------------
# 9. RUN OPTUNA STUDY
# ------------------------------

with mlflow.start_run(run_name="LightGBM_Run"):

    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=20)

    mlflow.log_params(
        {f"best_{k}": v for k, v in study.best_params.items()}
    )

    best_params = study.best_params

    # --------------------------
    # 10. TRAIN FINAL MODEL
    # --------------------------

    final_model = lgb.LGBMClassifier(
        **best_params,
        random_state=42,
        n_jobs=-1
    )

    final_model.fit(X_train_bal, y_train_bal)

    y_test_pred = final_model.predict(X_test_scaled)

    final_report = classification_report(
        y_test,
        y_test_pred,
        output_dict=True,
        zero_division=0
    )
    # log accuracy score
    mlflow.log_metric("accuracy", accuracy_score(y_test, y_test_pred))
    for label, metrics in final_report.items():
        if isinstance(metrics, dict):
            for metric, value in metrics.items():
                mlflow.log_metric(f"{label}_{metric}", value)
                #mlflow.log_metric(f"{label}_{metric}", value)
    print("Best Params:", best_params)


2026/01/17 11:01:35 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'd512a8c55c7e4507b78dd7f7fd47254d', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


üèÉ View run capable-mink-743 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6/runs/d512a8c55c7e4507b78dd7f7fd47254d
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6


[I 2026-01-17 11:01:48,651] A new study created in memory with name: no-name-1cf677bf-f698-4636-970f-7879848f0875


[LightGBM] [Info] Number of positive: 5083526, number of negative: 5083526
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.053244 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1030
[LightGBM] [Info] Number of data points in the train set: 10167052, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




üèÉ View run mysterious-dove-850 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6/runs/25854e13db8247cfb8a9d81a5161ddd0
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6


[I 2026-01-17 11:03:07,636] Trial 0 finished with value: 0.07716718675622786 and parameters: {'n_estimators': 342, 'learning_rate': 0.15841451215292526, 'num_leaves': 160, 'max_depth': 45, 'min_child_samples': 13, 'subsample': 0.919851528253562, 'colsample_bytree': 0.991492713185355, 'class_weight': {0: 1, 1: 1}}. Best is trial 0 with value: 0.07716718675622786.


[LightGBM] [Info] Number of positive: 5083526, number of negative: 5083526
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.209334 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1030
[LightGBM] [Info] Number of data points in the train set: 10167052, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333333 -> initscore=-0.693147
[LightGBM] [Info] Start training from score -0.693147




üèÉ View run fun-quail-374 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6/runs/470b58efeb64498f939ee9db1ce12e7d
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6


[I 2026-01-17 11:06:20,580] Trial 1 finished with value: 0.12331313131313132 and parameters: {'n_estimators': 351, 'learning_rate': 0.034731791232880525, 'num_leaves': 85, 'max_depth': 67, 'min_child_samples': 72, 'subsample': 0.8911265000421036, 'colsample_bytree': 0.9436933529536189, 'class_weight': {0: 2, 1: 1}}. Best is trial 1 with value: 0.12331313131313132.


[LightGBM] [Info] Number of positive: 5083526, number of negative: 5083526
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.066950 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1030
[LightGBM] [Info] Number of data points in the train set: 10167052, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




üèÉ View run nervous-hare-501 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6/runs/e499975026e44a79a2d13b761bff433c
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6


[I 2026-01-17 11:08:07,309] Trial 2 finished with value: 0.02540640187699011 and parameters: {'n_estimators': 483, 'learning_rate': 0.18346022024708822, 'num_leaves': 293, 'max_depth': 65, 'min_child_samples': 90, 'subsample': 0.7780903609942215, 'colsample_bytree': 0.7583931359332616, 'class_weight': {0: 1, 1: 1}}. Best is trial 1 with value: 0.12331313131313132.


[LightGBM] [Info] Number of positive: 5083526, number of negative: 5083526
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.052902 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1030
[LightGBM] [Info] Number of data points in the train set: 10167052, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




üèÉ View run illustrious-hawk-851 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6/runs/544f85fb72794d8c9860300af32a8416
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6


[I 2026-01-17 11:10:06,140] Trial 3 finished with value: 0.09579905260533533 and parameters: {'n_estimators': 488, 'learning_rate': 0.011207119889774896, 'num_leaves': 266, 'max_depth': 44, 'min_child_samples': 11, 'subsample': 0.7328226220833365, 'colsample_bytree': 0.917246380557865, 'class_weight': {0: 1, 1: 1}}. Best is trial 1 with value: 0.12331313131313132.


[LightGBM] [Info] Number of positive: 5083526, number of negative: 5083526
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.067822 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1030
[LightGBM] [Info] Number of data points in the train set: 10167052, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333333 -> initscore=-0.693147
[LightGBM] [Info] Start training from score -0.693147




üèÉ View run peaceful-skink-475 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6/runs/f0f3403241874337962a23cbdeeda65b
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6


[I 2026-01-17 11:10:47,188] Trial 4 finished with value: 0.11067165252409032 and parameters: {'n_estimators': 136, 'learning_rate': 0.2845745044028196, 'num_leaves': 26, 'max_depth': 66, 'min_child_samples': 55, 'subsample': 0.6479561945004498, 'colsample_bytree': 0.6112982895497147, 'class_weight': {0: 2, 1: 1}}. Best is trial 1 with value: 0.12331313131313132.


[LightGBM] [Info] Number of positive: 5083526, number of negative: 5083526
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.053305 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1030
[LightGBM] [Info] Number of data points in the train set: 10167052, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333333 -> initscore=-0.693147
[LightGBM] [Info] Start training from score -0.693147




üèÉ View run fearless-wren-659 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6/runs/2d0dcdcc4ba34aeab6972118e15661f2
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6


[I 2026-01-17 11:11:40,008] Trial 5 finished with value: 0.14632921562719695 and parameters: {'n_estimators': 187, 'learning_rate': 0.21598599268734434, 'num_leaves': 181, 'max_depth': 64, 'min_child_samples': 22, 'subsample': 0.7853546892128662, 'colsample_bytree': 0.8106367265956919, 'class_weight': {0: 2, 1: 1}}. Best is trial 5 with value: 0.14632921562719695.


[LightGBM] [Info] Number of positive: 5083526, number of negative: 5083526
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.055001 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1030
[LightGBM] [Info] Number of data points in the train set: 10167052, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




üèÉ View run peaceful-horse-351 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6/runs/b14c0847552c4603882b35633844996e
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6


[I 2026-01-17 11:12:52,561] Trial 6 finished with value: 0.21382228490832159 and parameters: {'n_estimators': 398, 'learning_rate': 0.024682514555689226, 'num_leaves': 29, 'max_depth': 40, 'min_child_samples': 66, 'subsample': 0.9507498403160686, 'colsample_bytree': 0.7522316232813369, 'class_weight': {0: 5, 1: 1}}. Best is trial 6 with value: 0.21382228490832159.


[LightGBM] [Info] Number of positive: 5083526, number of negative: 5083526
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.058606 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1030
[LightGBM] [Info] Number of data points in the train set: 10167052, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333333 -> initscore=-0.693147
[LightGBM] [Info] Start training from score -0.693147




üèÉ View run efficient-rook-898 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6/runs/4d052fd6d271468697757527cfc61dc9
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6


[I 2026-01-17 11:14:17,943] Trial 7 finished with value: 0.09100917431192661 and parameters: {'n_estimators': 400, 'learning_rate': 0.12248796399646412, 'num_leaves': 247, 'max_depth': 9, 'min_child_samples': 100, 'subsample': 0.9513842034832898, 'colsample_bytree': 0.6680703971720567, 'class_weight': {0: 2, 1: 1}}. Best is trial 6 with value: 0.21382228490832159.


[LightGBM] [Info] Number of positive: 5083526, number of negative: 5083526
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.065772 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1030
[LightGBM] [Info] Number of data points in the train set: 10167052, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




üèÉ View run rogue-pug-836 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6/runs/45cc5097eb84442586de42e64c934eb7
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6


[I 2026-01-17 11:16:07,945] Trial 8 finished with value: 0.0216337187616561 and parameters: {'n_estimators': 452, 'learning_rate': 0.16099151581291948, 'num_leaves': 298, 'max_depth': 45, 'min_child_samples': 68, 'subsample': 0.7202923786890069, 'colsample_bytree': 0.8473606931183613, 'class_weight': {0: 5, 1: 1}}. Best is trial 6 with value: 0.21382228490832159.


[LightGBM] [Info] Number of positive: 5083526, number of negative: 5083526
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.053028 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1030
[LightGBM] [Info] Number of data points in the train set: 10167052, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




üèÉ View run invincible-newt-620 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6/runs/6925ed7ec3f04cd58f10fe3d153d290d
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6


[I 2026-01-17 11:17:23,097] Trial 9 finished with value: 0.2709433962264151 and parameters: {'n_estimators': 354, 'learning_rate': 0.110593594614525, 'num_leaves': 212, 'max_depth': 11, 'min_child_samples': 84, 'subsample': 0.6077242239184953, 'colsample_bytree': 0.7543627379257855, 'class_weight': {0: 5, 1: 1}}. Best is trial 9 with value: 0.2709433962264151.


[LightGBM] [Info] Number of positive: 5083526, number of negative: 5083526
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.065030 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1030
[LightGBM] [Info] Number of data points in the train set: 10167052, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




üèÉ View run polite-fawn-548 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6/runs/bc3e290f3da24bddbf2e65040265f383
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6


[I 2026-01-17 11:18:07,933] Trial 10 finished with value: 0.2120701754385965 and parameters: {'n_estimators': 245, 'learning_rate': 0.44401230168971517, 'num_leaves': 195, 'max_depth': 3, 'min_child_samples': 43, 'subsample': 0.6011538430977207, 'colsample_bytree': 0.6923255419298138, 'class_weight': {0: 5, 1: 1}}. Best is trial 9 with value: 0.2709433962264151.


[LightGBM] [Info] Number of positive: 5083526, number of negative: 5083526
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.051904 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1030
[LightGBM] [Info] Number of data points in the train set: 10167052, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




üèÉ View run placid-croc-320 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6/runs/04468dd71d684b6bbf35f9fec2373021
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6


[I 2026-01-17 11:19:10,517] Trial 11 finished with value: 0.24196443139174417 and parameters: {'n_estimators': 291, 'learning_rate': 0.04032196943507167, 'num_leaves': 121, 'max_depth': 22, 'min_child_samples': 79, 'subsample': 0.8624507161717218, 'colsample_bytree': 0.7712371258721815, 'class_weight': {0: 5, 1: 1}}. Best is trial 9 with value: 0.2709433962264151.


[LightGBM] [Info] Number of positive: 5083526, number of negative: 5083526
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.074658 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1030
[LightGBM] [Info] Number of data points in the train set: 10167052, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




üèÉ View run delicate-snail-32 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6/runs/4924e5dcbe22456ea54630c399c1ca81
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6


[I 2026-01-17 11:20:12,895] Trial 12 finished with value: 0.2631483824326767 and parameters: {'n_estimators': 283, 'learning_rate': 0.06770719996893305, 'num_leaves': 129, 'max_depth': 18, 'min_child_samples': 84, 'subsample': 0.8544284003827926, 'colsample_bytree': 0.8569945490313634, 'class_weight': {0: 5, 1: 1}}. Best is trial 9 with value: 0.2709433962264151.


[LightGBM] [Info] Number of positive: 5083526, number of negative: 5083526
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.050156 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1030
[LightGBM] [Info] Number of data points in the train set: 10167052, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




üèÉ View run calm-skunk-894 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6/runs/a3381df7f05b4d14b461f714f0df36f7
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6


[I 2026-01-17 11:20:53,901] Trial 13 finished with value: 0.233015773856005 and parameters: {'n_estimators': 57, 'learning_rate': 0.07670215389688408, 'num_leaves': 223, 'max_depth': 22, 'min_child_samples': 86, 'subsample': 0.8508007065880768, 'colsample_bytree': 0.8651525016815726, 'class_weight': {0: 5, 1: 1}}. Best is trial 9 with value: 0.2709433962264151.


[LightGBM] [Info] Number of positive: 5083526, number of negative: 5083526
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.065652 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1030
[LightGBM] [Info] Number of data points in the train set: 10167052, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




üèÉ View run trusting-shad-125 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6/runs/ba8dcbdbd4b24672bda1a0f1d6b175a1
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6


[I 2026-01-17 11:21:54,927] Trial 14 finished with value: 0.24920235096557514 and parameters: {'n_estimators': 262, 'learning_rate': 0.07217719452049637, 'num_leaves': 119, 'max_depth': 23, 'min_child_samples': 96, 'subsample': 0.7042317986104475, 'colsample_bytree': 0.7051306879846424, 'class_weight': {0: 5, 1: 1}}. Best is trial 9 with value: 0.2709433962264151.


[LightGBM] [Info] Number of positive: 5083526, number of negative: 5083526
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.047615 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1030
[LightGBM] [Info] Number of data points in the train set: 10167052, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




üèÉ View run bemused-foal-852 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6/runs/a5f2d0ef52e34796b40102f09246b639
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6


[I 2026-01-17 11:22:57,976] Trial 15 finished with value: 0.26474872541879096 and parameters: {'n_estimators': 313, 'learning_rate': 0.09417598340315711, 'num_leaves': 122, 'max_depth': 12, 'min_child_samples': 58, 'subsample': 0.8259475786058535, 'colsample_bytree': 0.8397467524894888, 'class_weight': {0: 5, 1: 1}}. Best is trial 9 with value: 0.2709433962264151.


[LightGBM] [Info] Number of positive: 5083526, number of negative: 5083526
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.049001 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1030
[LightGBM] [Info] Number of data points in the train set: 10167052, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




üèÉ View run adorable-grouse-5 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6/runs/d63e058c3a04417a92c51de8509a4abd
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6


[I 2026-01-17 11:24:02,862] Trial 16 finished with value: 0.26022040526128687 and parameters: {'n_estimators': 373, 'learning_rate': 0.10107643737304617, 'num_leaves': 71, 'max_depth': 30, 'min_child_samples': 40, 'subsample': 0.6571317240115849, 'colsample_bytree': 0.7945716593556285, 'class_weight': {0: 5, 1: 1}}. Best is trial 9 with value: 0.2709433962264151.


[LightGBM] [Info] Number of positive: 5083526, number of negative: 5083526
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.045930 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1030
[LightGBM] [Info] Number of data points in the train set: 10167052, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




üèÉ View run bald-moth-593 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6/runs/dd0bcf1bc05a4e969053ac74889971b4
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6


[I 2026-01-17 11:24:56,788] Trial 17 finished with value: 0.24494530991050711 and parameters: {'n_estimators': 205, 'learning_rate': 0.042850846007322266, 'num_leaves': 216, 'max_depth': 12, 'min_child_samples': 52, 'subsample': 0.7852561943199883, 'colsample_bytree': 0.9184877972507806, 'class_weight': {0: 5, 1: 1}}. Best is trial 9 with value: 0.2709433962264151.


[LightGBM] [Info] Number of positive: 5083526, number of negative: 5083526
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.047589 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1030
[LightGBM] [Info] Number of data points in the train set: 10167052, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




üèÉ View run suave-shrimp-118 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6/runs/d2d3ef864fa5419fb3e706950229fbcc
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6


[I 2026-01-17 11:26:06,442] Trial 18 finished with value: 0.15519056643503895 and parameters: {'n_estimators': 328, 'learning_rate': 0.3299520025677696, 'num_leaves': 159, 'max_depth': 33, 'min_child_samples': 32, 'subsample': 0.8192238151476868, 'colsample_bytree': 0.724591687055901, 'class_weight': {0: 5, 1: 1}}. Best is trial 9 with value: 0.2709433962264151.


[LightGBM] [Info] Number of positive: 5083526, number of negative: 5083526
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.043939 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1030
[LightGBM] [Info] Number of data points in the train set: 10167052, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




üèÉ View run rogue-wasp-598 at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6/runs/8cddb9acf05e410dba18defa2c2fda25
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6


[I 2026-01-17 11:27:02,627] Trial 19 finished with value: 0.03199796962242786 and parameters: {'n_estimators': 412, 'learning_rate': 0.017296640985136242, 'num_leaves': 83, 'max_depth': 3, 'min_child_samples': 61, 'subsample': 0.7438999929173415, 'colsample_bytree': 0.6499109778125517, 'class_weight': {0: 1, 1: 1}}. Best is trial 9 with value: 0.2709433962264151.


[LightGBM] [Info] Number of positive: 5083526, number of negative: 5083526
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.055969 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1030
[LightGBM] [Info] Number of data points in the train set: 10167052, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166667 -> initscore=-1.609438
[LightGBM] [Info] Start training from score -1.609438




Best Params: {'n_estimators': 354, 'learning_rate': 0.110593594614525, 'num_leaves': 212, 'max_depth': 11, 'min_child_samples': 84, 'subsample': 0.6077242239184953, 'colsample_bytree': 0.7543627379257855, 'class_weight': {0: 5, 1: 1}}
üèÉ View run LightGBM_Run at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6/runs/3138f215fd7149a7a55685a1ca5da6a3
üß™ View experiment at: https://dagshub.com/Pranay5519/fraud_detection.mlflow/#/experiments/6
