<a href="https://colab.research.google.com/github/MANOJ-80/Fraud-Detection-NM/blob/main/NM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pandas scikit-learn xgboost lightgbm catboost matplotlib seaborn




In [None]:
# 📌 Import Libraries
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, classification_report

from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

import warnings
warnings.filterwarnings("ignore")

## 📌 Load Dataset
data = pd.read_csv('/content/creditcard.csv')

# 📌 Check for missing values in 'Class'
missing_values = data['Class'].isnull().sum()
print(f"Missing values in 'Class': {missing_values}")

# 📌 If there are any missing values — drop those rows
if missing_values > 0:
    data = data.dropna(subset=['Class'])
    print("Dropped rows with missing target values.")

# 📌 Define Features (X) and Target (y)
X = data.drop('Class', axis=1)
y = data['Class']
print(X)
# 📌 Standardize 'Time' and 'Amount'
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X[['Time', 'Amount']] = scaler.fit_transform(X[['Time', 'Amount']])

# 📌 Split the Dataset (Stratify keeps class distribution same)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 📌 Confirm no NaNs now
print(f"Remaining missing values in y_train: {y_train.isnull().sum()}")
print(f"Remaining missing values in y_test: {y_test.isnull().sum()}")



Missing values in 'Class': 0
            Time         V1         V2        V3        V4        V5  \
0            0.0  -1.359807  -0.072781  2.536347  1.378155 -0.338321   
1            0.0   1.191857   0.266151  0.166480  0.448154  0.060018   
2            1.0  -1.358354  -1.340163  1.773209  0.379780 -0.503198   
3            1.0  -0.966272  -0.185226  1.792993 -0.863291 -0.010309   
4            2.0  -1.158233   0.877737  1.548718  0.403034 -0.407193   
...          ...        ...        ...       ...       ...       ...   
284802  172786.0 -11.881118  10.071785 -9.834783 -2.066656 -5.364473   
284803  172787.0  -0.732789  -0.055080  2.035030 -0.738589  0.868229   
284804  172788.0   1.919565  -0.301254 -3.249640 -0.557828  2.630515   
284805  172788.0  -0.240440   0.530483  0.702510  0.689799 -0.377961   
284806  172792.0  -0.533413  -0.189733  0.703337 -0.506271 -0.012546   

              V6        V7        V8        V9  ...       V20       V21  \
0       0.462388  0.239599  0.0

In [None]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

print("📊 Random Forest Results:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_rf):.4f}")
print(f"ROC AUC Score: {roc_auc_score(y_test, y_pred_rf):.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_rf))
print("Classification Report:")
print(classification_report(y_test, y_pred_rf))


📊 Random Forest Results:
Accuracy: 0.9996
ROC AUC Score: 0.9081
Confusion Matrix:
[[56859     5]
 [   18    80]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.94      0.82      0.87        98

    accuracy                           1.00     56962
   macro avg       0.97      0.91      0.94     56962
weighted avg       1.00      1.00      1.00     56962



In [None]:
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
xgb_model.fit(X_train, y_train)
y_pred_xgb = xgb_model.predict(X_test)

print("📊 XGBoost Results:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_xgb):.4f}")
print(f"ROC AUC Score: {roc_auc_score(y_test, y_pred_xgb):.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_xgb))
print("Classification Report:")
print(classification_report(y_test, y_pred_xgb))


📊 XGBoost Results:
Accuracy: 0.9995
ROC AUC Score: 0.9030
Confusion Matrix:
[[56857     7]
 [   19    79]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.92      0.81      0.86        98

    accuracy                           1.00     56962
   macro avg       0.96      0.90      0.93     56962
weighted avg       1.00      1.00      1.00     56962



In [None]:
lgbm_model = LGBMClassifier(random_state=42)
lgbm_model.fit(X_train, y_train)
y_pred_lgbm = lgbm_model.predict(X_test)

print("📊 LightGBM Results:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_lgbm):.4f}")
print(f"ROC AUC Score: {roc_auc_score(y_test, y_pred_lgbm):.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_lgbm))
print("Classification Report:")
print(classification_report(y_test, y_pred_lgbm))


[LightGBM] [Info] Number of positive: 394, number of negative: 227451
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.035844 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7650
[LightGBM] [Info] Number of data points in the train set: 227845, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.001729 -> initscore=-6.358339
[LightGBM] [Info] Start training from score -6.358339
📊 LightGBM Results:
Accuracy: 0.9970
ROC AUC Score: 0.7540
Confusion Matrix:
[[56740   124]
 [   48    50]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.29      0.51      0.37        98

    accuracy                           1.00     56962
   macro avg       0.64      0.75      0.68     56962
weighted avg       1.00      1.00      

In [None]:
cat_model = CatBoostClassifier(verbose=0, random_state=42)
cat_model.fit(X_train, y_train)
y_pred_cat = cat_model.predict(X_test)

print("📊 CatBoost Results:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_cat):.4f}")
print(f"ROC AUC Score: {roc_auc_score(y_test, y_pred_cat):.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_cat))
print("Classification Report:")
print(classification_report(y_test, y_pred_cat))


📊 CatBoost Results:
Accuracy: 0.9996
ROC AUC Score: 0.9132
Confusion Matrix:
[[56860     4]
 [   17    81]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.95      0.83      0.89        98

    accuracy                           1.00     56962
   macro avg       0.98      0.91      0.94     56962
weighted avg       1.00      1.00      1.00     56962



In [None]:
ada_model = AdaBoostClassifier(n_estimators=100, random_state=42)
ada_model.fit(X_train, y_train)
y_pred_ada = ada_model.predict(X_test)

print("📊 AdaBoost Results:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_ada):.4f}")
print(f"ROC AUC Score: {roc_auc_score(y_test, y_pred_ada):.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_ada))
print("Classification Report:")
print(classification_report(y_test, y_pred_ada))


📊 AdaBoost Results:
Accuracy: 0.9990
ROC AUC Score: 0.8671
Confusion Matrix:
[[56834    30]
 [   26    72]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.71      0.73      0.72        98

    accuracy                           1.00     56962
   macro avg       0.85      0.87      0.86     56962
weighted avg       1.00      1.00      1.00     56962



In [None]:
import joblib

joblib.dump(rf_model, 'random_forest_model.pkl')

print("✅ Random Forest model saved as 'random_forest_model.pkl'")


✅ Random Forest model saved as 'random_forest_model.pkl'


In [None]:
from google.colab import files

files.download('random_forest_model.pkl')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>