<a href="https://colab.research.google.com/github/SAYEDASHRAF1218/CODSOFT.2/blob/main/creditcard_fraud_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 1. Import libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from imblearn.over_sampling import SMOTE

# 2. Load datasets
train_df = pd.read_csv("fraudTrain.csv").sample(frac=0.1,random_state=42)
test_df = pd.read_csv("fraudTest.csv").sample(frac=0.1,random_state=42)

print("Training shape:", train_df.shape)
print("Test shape:", test_df.shape)

# 3. Split features and target (target is is_fraud)
X_train = train_df.drop("is_fraud", axis=1)
y_train = train_df["is_fraud"]
X_test = test_df.drop("is_fraud", axis=1)
y_test = test_df["is_fraud"]

# Remove non-numerical columns
non_numerical_cols = ['Unnamed: 0', 'trans_date_trans_time', 'cc_num', 'merchant', 'category', 'first', 'last', 'street', 'city', 'state', 'zip', 'job', 'dob', 'trans_num', 'gender']
X_train = X_train.drop(non_numerical_cols, axis=1)
X_test = X_test.drop(non_numerical_cols, axis=1)


# 4. Handle class imbalance on the training set
sm = SMOTE(random_state=42)
X_train_res, y_train_res = sm.fit_resample(X_train, y_train)

# 5. Scale features
scaler = StandardScaler()
X_train_res = scaler.fit_transform(X_train_res)
X_test = scaler.transform(X_test)

# 6. Define models
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42)
}

# 7. Train and evaluate
for name, model in models.items():
    print(f"\n===== {name} =====")
    model.fit(X_train_res, y_train_res)
    y_pred = model.predict(X_test)

    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred), "\n")
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    print("ROC-AUC Score:", roc_auc_score(y_test, y_pred))

Training shape: (129668, 23)
Test shape: (55572, 23)

===== Logistic Regression =====
Confusion Matrix:
[[52693  2663]
 [   44   172]] 

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.95      0.97     55356
           1       0.06      0.80      0.11       216

    accuracy                           0.95     55572
   macro avg       0.53      0.87      0.54     55572
weighted avg       1.00      0.95      0.97     55572

ROC-AUC Score: 0.874094748336023

===== Decision Tree =====
Confusion Matrix:
[[53591  1765]
 [  135    81]] 

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.97      0.98     55356
           1       0.04      0.38      0.08       216

    accuracy                           0.97     55572
   macro avg       0.52      0.67      0.53     55572
weighted avg       0.99      0.97      0.98     55572

ROC-AUC Score: 0.6715577353855047

===== Random Fore