In [21]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix

from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier

import joblib



In [22]:
df = pd.read_csv("dataset.csv")


In [23]:
target_col = df.columns[-1]
print("Target column:", target_col)



Target column: is_fraud


In [24]:
X = df.drop(target_col, axis=1)
y = df[target_col]




In [25]:
X = X.select_dtypes(include=["number"])



In [26]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [27]:
smote = SMOTE(random_state=42)
X_train, y_train = smote.fit_resample(X_train, y_train)


In [28]:
model = XGBClassifier(
    n_estimators=200,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    eval_metric="logloss",
    random_state=42
)

model.fit(X_train, y_train)


In [29]:
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]


In [30]:
print("Classification Report:\n")
print(classification_report(y_test, y_pred))

roc_auc = roc_auc_score(y_test, y_prob)
print("ROC-AUC Score:", roc_auc)

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Classification Report:

              precision    recall  f1-score   support

           0       1.00      0.96      0.98    257815
           1       0.10      0.81      0.18      1520

    accuracy                           0.96    259335
   macro avg       0.55      0.88      0.58    259335
weighted avg       0.99      0.96      0.97    259335

ROC-AUC Score: 0.9550792285778152

Confusion Matrix:
[[247026  10789]
 [   294   1226]]


In [31]:
joblib.dump(model, "fraud_model.pkl")
print("Model saved as fraud_model.pkl")


Model saved as fraud_model.pkl
