In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
from xgboost import XGBClassifier

# -----------------------------
# Load dataset
# -----------------------------
df = pd.read_csv("fraud_data1.csv")

# -----------------------------
# Define target and features
# -----------------------------
TARGET_COLUMN = "is_fraud"   # change if your column name is different

X = df.drop(columns=[TARGET_COLUMN])
y = df[TARGET_COLUMN]

# -----------------------------
# Encode categorical features
# -----------------------------
label_encoders = {}

for col in X.select_dtypes(include=["object"]).columns:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col].astype(str))
    label_encoders[col] = le

# -----------------------------
# Train-test split
# -----------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

# -----------------------------
# Train XGBoost Classifier
# -----------------------------
model = XGBClassifier(
    n_estimators=200,
    max_depth=6,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    scale_pos_weight=(y == 0).sum() / (y == 1).sum(),  # handle class imbalance
    eval_metric="logloss",
    random_state=42
)

model.fit(X_train, y_train)

# -----------------------------
# Model Evaluation
# -----------------------------
y_pred = model.predict(X_test)

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# -----------------------------
# Detect Fraudulent Transactions
# -----------------------------
df["fraud_probability"] = model.predict_proba(X)[:, 1]
df["fraud_prediction"] = model.predict(X)

fraud_transactions = df[df["fraud_prediction"] == 1]

# -----------------------------
# Display Fraudulent Transactions
# -----------------------------
print("\nDetected Fraudulent Transactions:")
print(fraud_transactions.head(20))  # show first 20 fraud cases

print(f"\nTotal Fraudulent Transactions Detected: {len(fraud_transactions)}")