<a href="https://colab.research.google.com/github/Samiha9864/MyWork/blob/main/CreaditCardFraud.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
!pip install xgboost lightgbm catboost tensorflow
!pip install catboost

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks

# ---------- Load Dataset ----------
train_data = pd.read_csv("/content/drive/MyDrive/fraudTrain.csv/fraudTrain.csv")
test_data  = pd.read_csv("/content/drive/MyDrive/fraudTest.csv/fraudTest.csv")

# ---------- Clean Data ----------
cols_to_drop = ['Unnamed: 0','cc_num','first','last','street','city','state','zip','dob','trans_num','trans_date_trans_time']
train_data.drop(columns=[c for c in cols_to_drop if c in train_data.columns], inplace=True)
test_data.drop(columns=[c for c in cols_to_drop if c in test_data.columns], inplace=True)

train_data.dropna(inplace=True)
test_data.dropna(inplace=True)

# ---------- Encode Categorical Features ----------
cat_cols = [c for c in ["merchant","category","gender","job"] if c in train_data.columns]
encoder = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)
encoder.fit(train_data[cat_cols])
train_data[cat_cols] = encoder.transform(train_data[cat_cols])
test_data[cat_cols] = encoder.transform(test_data[cat_cols])

# ---------- Split Features & Target ----------
X = train_data.drop(columns=["is_fraud"])
y = train_data["is_fraud"]

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

X_test = test_data.drop(columns=["is_fraud"])
y_test = test_data["is_fraud"]

# ---------- Define Evaluation Function ----------
def evaluate_model(name, model, X_test, y_test):
    y_pred = model.predict(X_test)
    if hasattr(y_pred, "flatten"):
        y_pred = y_pred.flatten()
    y_pred = np.round(y_pred).astype(int)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, zero_division=0)
    rec = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)
    cm = confusion_matrix(y_test, y_pred)

    print(f"\n{name} Model Evaluation:")
    print(f"Accuracy:  {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall:    {rec:.4f}")
    print(f"F1 Score:  {f1:.4f}")
    print("Confusion Matrix:\n", cm)
    return acc, prec, rec, f1, cm


# ======================================================
# 1️⃣ XGBoost
# ======================================================
xgb_model = xgb.XGBClassifier(
    n_estimators=200, max_depth=6, learning_rate=0.1,
    use_label_encoder=False, eval_metric='logloss', n_jobs=-1, random_state=42
)
xgb_model.fit(X_train, y_train)
evaluate_model("XGBoost", xgb_model, X_test, y_test)


# ======================================================
# 2️⃣ LightGBM
# ======================================================
lgb_model = lgb.LGBMClassifier(
    n_estimators=200, learning_rate=0.1, random_state=42, n_jobs=-1
)
lgb_model.fit(X_train, y_train)
evaluate_model("LightGBM", lgb_model, X_test, y_test)


# ======================================================
# 3️⃣ CatBoost
# ======================================================
cat_model = CatBoostClassifier(iterations=300, depth=6, learning_rate=0.1, verbose=False, random_seed=42)
cat_model.fit(X_train, y_train)
evaluate_model("CatBoost", cat_model, X_test, y_test)


# ======================================================
# 4️⃣ Random Forest
# ======================================================
rf_model = RandomForestClassifier(n_estimators=200, random_state=42, n_jobs=-1)
rf_model.fit(X_train, y_train)
evaluate_model("Random Forest", rf_model, X_test, y_test)


# ======================================================
# 5️⃣ Logistic Regression
# ======================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

lr_model = LogisticRegression(max_iter=1000, class_weight='balanced', random_state=42, n_jobs=-1)
lr_model.fit(X_train_scaled, y_train)
evaluate_model("Logistic Regression", lr_model, X_test_scaled, y_test)


# ======================================================
# 6️⃣ Neural Network
# ======================================================
input_dim = X_train_scaled.shape[1]
nn_model = models.Sequential([
    layers.Input(shape=(input_dim,)),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(1, activation='sigmoid')
])

nn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
es = callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

nn_model.fit(X_train_scaled, y_train, validation_split=0.2, epochs=20, batch_size=256, callbacks=[es], verbose=1)

y_pred_nn = (nn_model.predict(X_test_scaled) > 0.5).astype(int).flatten()
acc = accuracy_score(y_test, y_pred_nn)
prec = precision_score(y_test, y_pred_nn, zero_division=0)
rec = recall_score(y_test, y_pred_nn, zero_division=0)
f1 = f1_score(y_test, y_pred_nn, zero_division=0)
cm = confusion_matrix(y_test, y_pred_nn)

print("\nNeural Network Model Evaluation:")
print(f"Accuracy:  {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall:    {rec:.4f}")
print(f"F1 Score:  {f1:.4f}")
print("Confusion Matrix:\n", cm)




Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



XGBoost Model Evaluation:
Accuracy:  0.9968
Precision: 0.6289
Recall:    0.4424
F1 Score:  0.5194
Confusion Matrix:
 [[553014    560]
 [  1196    949]]
[LightGBM] [Info] Number of positive: 6005, number of negative: 1031335
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.037216 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2290
[LightGBM] [Info] Number of data points in the train set: 1037340, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.005789 -> initscore=-5.146017
[LightGBM] [Info] Start training from score -5.146017

LightGBM Model Evaluation:
Accuracy:  0.9950
Precision: 0.3979
Recall:    0.5524
F1 Score:  0.4626
Confusion Matrix:
 [[551781   1793]
 [   960   1185]]

CatBoost Model Evaluation:
Accuracy:  0.9969
Precision: 0.6160
Recall:    0.4951
F1 Score:  0.5490
Confusion Matrix:
 [[552912    6

In [8]:
# ✅ Ensure variables exist and are consistent with your previous code

# If Y or X aren't defined, get them from your existing dataset
try:
    Y
except NameError:
    Y = train_data["is_fraud"]

try:
    X
except NameError:
    X = train_data.drop(columns=["is_fraud"])

# If test data not scaled yet, do it now
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_test_scaled = scaler.transform(X_test)
