In [5]:
# =========================
# 1. Import Libraries
# =========================
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

# =========================
# 2. Load Dataset
# =========================
df = pd.read_csv("balanced_fraud_dataset.csv")
df=df.sample(500)
df.to_csv("data.csv",index=False)

# =========================
# 3. Basic Inspection
# =========================
print(df.shape)
print(df.info())
print(df.isnull().sum())

# =========================
# 4. Drop Unnecessary Columns
# (IDs that don't help model)
# =========================
df = df.drop(["nameOrig", "nameDest"], axis=1)

# =========================
# 5. Feature Engineering
# =========================
# Balance difference features
df["orgBalanceDiff"] = df["oldbalanceOrg"] - df["newbalanceOrig"]
df["destBalanceDiff"] = df["newbalanceDest"] - df["oldbalanceDest"]

# =========================
# 6. Encode Categorical Feature
# =========================
le = LabelEncoder()
df["type"] = le.fit_transform(df["type"])

# =========================
# 7. Define Features and Target
# =========================
X = df.drop("isFraud", axis=1)
y = df["isFraud"]

# =========================
# 8. Train-Test Split
# =========================
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42
)

# =========================
# 9. Feature Scaling
# =========================
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Training data shape:", X_train_scaled.shape)
print("Testing data shape:", X_test_scaled.shape)

(500, 11)
<class 'pandas.core.frame.DataFrame'>
Index: 500 entries, 11310 to 13426
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   step            500 non-null    int64  
 1   type            500 non-null    object 
 2   amount          500 non-null    float64
 3   nameOrig        500 non-null    object 
 4   oldbalanceOrg   500 non-null    float64
 5   newbalanceOrig  500 non-null    float64
 6   nameDest        500 non-null    object 
 7   oldbalanceDest  500 non-null    float64
 8   newbalanceDest  500 non-null    float64
 9   isFraud         500 non-null    int64  
 10  isFlaggedFraud  500 non-null    int64  
dtypes: float64(5), int64(3), object(3)
memory usage: 46.9+ KB
None
step              0
type              0
amount            0
nameOrig          0
oldbalanceOrg     0
newbalanceOrig    0
nameDest          0
oldbalanceDest    0
newbalanceDest    0
isFraud           0
isFlaggedFraud    0
dtype: int6

In [None]:
import mlflow
import mlflow.sklearn
import dagshub
# from sklearn.model_selection import train_test_split, GridSearchCV
# from sklearn.linear_model import LogisticRegression
# from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# df=pd.read_csv("data.csv")


# import dagshub

mlflow.set_tracking_uri('https://dagshub.com/ArchitSaki/Fraud-Detection-System--End-to-end-ml-project-.mlflow')
dagshub.init(repo_owner='ArchitSaki', repo_name='Fraud-Detection-System--End-to-end-ml-project-', mlflow=True)

# mlflow.set_experiment("Logistic Regression Baseline")
# mlflow.set_experiment("Logistic Regression Baseline")

In [9]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

In [10]:
models = {
    "LogisticRegression": LogisticRegression(),
    "RandomForest": RandomForestClassifier(),
    "DecisionTree": DecisionTreeClassifier(),
    "SVM": SVC()
}
mlflow.set_experiment("Classification_Model_Comparison")

for model_name, model in models.items():

    with mlflow.start_run(run_name=model_name):

        # Train model
        model.fit(X_train, y_train)

        # Predictions
        y_pred = model.predict(X_test)

        # Metrics
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='weighted')
        recall = recall_score(y_test, y_pred, average='weighted')
        f1 = f1_score(y_test, y_pred, average='weighted')

        # Log parameters
        mlflow.log_param("model", model_name)

        # Log metrics
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("precision", precision)
        mlflow.log_metric("recall", recall)
        mlflow.log_metric("f1_score", f1)

        # Log model
        mlflow.sklearn.log_model(model, "model")

        print(f"{model_name} logged to MLflow")

2026/02/24 16:03:54 INFO mlflow.tracking.fluent: Experiment with name 'Classification_Model_Comparison' does not exist. Creating a new experiment.
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LogisticRegression logged to MLflow
üèÉ View run LogisticRegression at: https://dagshub.com/ArchitSaki/Fraud-Detection-System--End-to-end-ml-project-.mlflow/#/experiments/1/runs/dd6043655a0e4e0e8c3ac5a0c478eef8
üß™ View experiment at: https://dagshub.com/ArchitSaki/Fraud-Detection-System--End-to-end-ml-project-.mlflow/#/experiments/1




RandomForest logged to MLflow
üèÉ View run RandomForest at: https://dagshub.com/ArchitSaki/Fraud-Detection-System--End-to-end-ml-project-.mlflow/#/experiments/1/runs/f2a590f4b30041448725c67f55d5f092
üß™ View experiment at: https://dagshub.com/ArchitSaki/Fraud-Detection-System--End-to-end-ml-project-.mlflow/#/experiments/1




DecisionTree logged to MLflow
üèÉ View run DecisionTree at: https://dagshub.com/ArchitSaki/Fraud-Detection-System--End-to-end-ml-project-.mlflow/#/experiments/1/runs/9204c47e401f402399226bbd6848f86b
üß™ View experiment at: https://dagshub.com/ArchitSaki/Fraud-Detection-System--End-to-end-ml-project-.mlflow/#/experiments/1




SVM logged to MLflow
üèÉ View run SVM at: https://dagshub.com/ArchitSaki/Fraud-Detection-System--End-to-end-ml-project-.mlflow/#/experiments/1/runs/4529ac8643274a73a982fa864dd62fa3
üß™ View experiment at: https://dagshub.com/ArchitSaki/Fraud-Detection-System--End-to-end-ml-project-.mlflow/#/experiments/1
