**Importing the Dependencies**

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


**Load the Dataset**

In [None]:
df = pd.read_csv("creditcard.csv")
# df.head()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
# Missing values in each column
df.isnull().sum()

In [None]:
# Distribution of legit and fraud transaction

df['Class'].value_counts()

# It is the case of unbalance dataset. We need to balance it.
# We can handle the dataset by Over-sampling of minorities class and Under-sampling of majority class.
# SMOTE(Synthetic Minority Over-sampling Technique)

In [None]:
legit = df[df.Class == 0]
fraud = df[df.Class == 1]

In [None]:
print(legit.shape)
print(fraud.shape)

In [None]:
legit.Amount.describe()

In [None]:
fraud.Amount.describe()

In [None]:
# Comparing the values of both the transaction

df.groupby('Class').mean()

Exploratory Data Analysis  

Distribution of Fraud vs Non-Fraud  

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.countplot(x="Class", data=df)
plt.title("Distribution of Fraud vs Non-Fraud")
plt.show()


**Correlation Heatmap**

In [None]:
plt.figure(figsize=(12,8))
sns.heatmap(df.corr(), cmap="coolwarm", cbar=True)
plt.title("Feature Correlation Heatmap")
plt.show()


**Transaction Amount vs Fraud**

In [None]:
sns.boxplot(x="Class", y="Amount", data=df)
plt.title("Transaction Amount vs Fraud")
plt.show()

**Under-Sampling**

In [None]:
Legit_sample = legit.sample(492)

**Concatinate two DataFrame**

In [None]:
new_dataset = pd.concat([Legit_sample, fraud], axis=0)

In [None]:
new_dataset['Class'].value_counts()

In [None]:
new_dataset.groupby('Class').mean()

**Splitting the data into Features & Targets**

In [None]:
X = new_dataset.drop(columns='Class', axis=1)
Y = new_dataset['Class']

In [None]:
print(X)
print(Y)

**Split the data into Training and Testing Data**

In [None]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.6,random_state=2,stratify=Y)

In [None]:
print(X.shape, X_train.shape, X_test.shape)

**Logistic Regression**

In [None]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, Y_train)
y_pred_lr = lr.predict(X_test)


In [None]:
model = LogisticRegression()

In [None]:
model.fit(X_train,Y_train)

**Random Forest**

In [None]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, Y_train)
y_pred_rf = rf.predict(X_test)

**XgBoost**

In [None]:
from xgboost import XGBClassifier

xgb = XGBClassifier(use_label_encoder=False, eval_metric="logloss")
xgb.fit(X_train, Y_train)
y_pred_xgb = xgb.predict(X_test)


**Model Evaluation**

In [None]:
# Accuracy on training data

X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

In [None]:
print("Accuracy on Training Data : ", training_data_accuracy)

In [None]:
# Accuracy on test data

X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)


In [None]:
print("Accuracy on Test Data : ", test_data_accuracy)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

def evaluate_model(Y_test, Y_pred, model_name):
    print(f"{model_name} ")
    print(classification_report(Y_test, Y_pred))

    cm = confusion_matrix(Y_test, Y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot()
    plt.title(f"{model_name} Confusion Matrix")
    plt.show()

# Evaluate all models
evaluate_model(Y_test, y_pred_lr, "Logistic Regression")
evaluate_model(Y_test, y_pred_rf, "Random Forest")
evaluate_model(Y_test, y_pred_xgb, "XGBoost")


**ROC Curve(Receiver Operating Characteristic Curve)**



In [None]:
from sklearn.metrics import roc_curve, auc

def plot_roc(model, X_test, Y_test, model_name):
    y_prob = model.predict_proba(X_test)[:,1]
    fpr, tpr, _ = roc_curve(Y_test, y_prob)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f'{model_name} (AUC = {roc_auc:.2f})')

plt.figure(figsize=(8,6))
plot_roc(lr, X_test, Y_test, "Logistic Regression")
plot_roc(rf, X_test, Y_test, "Random FoYest")
plot_roc(xgb, X_test, Y_test, "XGBoost")
plt.plot([0,1], [0,1], 'k--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve Comparison")
plt.legend()
plt.show()
