In [None]:
%pip install scikit-learn

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
import joblib

# SOLELY XGBoost For the project
# Load dataset
df = pd.read_csv(r"C:\Users\hp\Desktop\Fraud_Payment_Detection2\Fraud_Detection_Dataset\PS_20174392719_1491204439457_log.csv")

# Encode 'type'
le = LabelEncoder()
df['type'] = le.fit_transform(df['type'])

# Drop unnecessary columns
df.drop(['nameOrig', 'nameDest'], axis=1, inplace=True)

# Features and target
X = df.drop(['isFraud', 'isFlaggedFraud'], axis=1)
y = df['isFraud']

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build XGBoost model
model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluation
print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("✅ ROC AUC:", roc_auc_score(y_test, y_pred))
print("✅ Classification Report:\n", classification_report(y_test, y_pred))
print("✅ Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Save model
joblib.dump(model, "best_fraud_model_xgb.pkl")


In [None]:
%pip install pandas
%pip install numpy
%pip install joblib
%pip install xgboost

import pandas as pd
import numpy as np
import joblib
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, classification_report
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier

# 1. Load Dataset
df = pd.read_csv(r"C:\Users\hp\Desktop\Fraud_Payment_Detection2\Fraud_Detection_Dataset\PS_20174392719_1491204439457_log.csv")  

# 2. Preprocessing
le = LabelEncoder()
df['type'] = le.fit_transform(df['type'])

X = df[['step', 'type', 'amount', 'oldbalanceOrg', 'newbalanceOrg', 'oldbalanceDest', 'newbalanceDest']]
y = df['isFraud']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 3. Define Models
models = {
    "Random Forest": RandomForestClassifier(n_jobs=-1, n_estimators=100, random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Extra Trees": ExtraTreesClassifier(n_jobs=-1, n_estimators=100, random_state=42),
    "SVM": SVC(probability=True, random_state=42),
    "XGBoost": XGBClassifier(n_jobs=-1, use_label_encoder=False, eval_metric='logloss', random_state=42)
}

# 4. Train & Evaluate
results = []
best_auc = 0
best_model = None

for name, model in models.items():
    print(f"\nTraining: {name}")
    start = time.time()
    model.fit(X_train, y_train)
    end = time.time()

    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1]

    acc = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_proba)
    cm = confusion_matrix(y_test, y_pred)
    cr = classification_report(y_test, y_pred)

    print(f"⏱️ Time Taken: {end - start:.2f}s")
    print(f"✅ Accuracy: {acc:.4f}")
    print(f"✅ ROC AUC: {auc:.4f}")
    print(f"✅ Confusion Matrix:\n{cm}")
    print(f"✅ Classification Report:\n{cr}")

    results.append((name, acc, auc))

    if auc > best_auc:
        best_auc = auc
        best_model = model
        joblib.dump(best_model, "flask/payments.pkl")  # Save best model here

# 5. Summary Table
print("\nSummary:")
for name, acc, auc in results:
    print(f"{name:15} | Accuracy: {acc:.4f} | ROC AUC: {auc:.4f}")
