In [2]:
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
import pandas as pd

df = pd.read_csv('creditcard.csv')

X = df.drop(['Class'],axis = 1)
y =df['Class']

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2 , random_state = 42)
print(f"Before SMOTE: {y_train.value_counts()}")

smote = SMOTE(random_state=42)

X_train_smote, y_train_smote = smote.fit_resample(X_train,y_train)
print(f"After SMOTE: {y_train_smote.value_counts()}")


Before SMOTE: Class
0    227451
1       394
Name: count, dtype: int64
After SMOTE: Class
0    227451
1    227451
Name: count, dtype: int64


In [None]:
# from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score, roc_curve
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:

# # 1. Logistic Regression
# lr_model = LogisticRegression(max_iter=1000,solver = 'saga')
# lr_model.fit(X_train_smote, y_train_smote)
# y_pred_lr = lr_model.predict(X_test)

# 2. Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_smote, y_train_smote)
y_pred_rf = rf_model.predict(X_test)

# 3. XGBoost Classifier
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb_model.fit(X_train_smote, y_train_smote)
y_pred_xgb = xgb_model.predict(X_test)

# Function to evaluate model
def evaluate_model(y_test, y_pred, model_name):
    print(f"----- {model_name} -----")
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    print("\n")

# Evaluate all models
# evaluate_model(y_test, y_pred_lr, "Logistic Regression")
evaluate_model(y_test, y_pred_rf, "Random Forest")
evaluate_model(y_test, y_pred_xgb, "XGBoost")



In [14]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_auc_score

# Predict on Test Set
y_pred_rf = rf_model.predict(X_test)
y_pred_xgb = xgb_model.predict(X_test)

# Evaluation Function
def evaluate_model(y_test, y_pred, model_name):
    print(f"----- {model_name} -----")
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    print("\n")

# Evaluate Random Forest
evaluate_model(y_test, y_pred_rf, "Random Forest")

# Evaluate XGBoost
evaluate_model(y_test, y_pred_xgb, "XGBoost")


----- Random Forest -----
Accuracy: 0.9995
Confusion Matrix:
[[56852    12]
 [   15    83]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.87      0.85      0.86        98

    accuracy                           1.00     56962
   macro avg       0.94      0.92      0.93     56962
weighted avg       1.00      1.00      1.00     56962



----- XGBoost -----
Accuracy: 0.9994
Confusion Matrix:
[[56843    21]
 [   14    84]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.80      0.86      0.83        98

    accuracy                           1.00     56962
   macro avg       0.90      0.93      0.91     56962
weighted avg       1.00      1.00      1.00     56962





In [17]:
import joblib

# Save the XGBoost Model
joblib.dump(xgb_model, 'credit_card_fraud_xgb_model.pkl')



['credit_card_fraud_xgb_model.pkl']