In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    roc_auc_score,
    roc_curve
)

import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
df = pd.read_csv("../data/creditcard.csv")
df.head()

In [None]:
df.shape

In [None]:
df["Class"].value_counts(normalize=True) * 100

In [None]:
# features -> label (0 or 1)
# This is basically saying that we are going to use the features that we have in order to train and test this model to see if the results will be accurate as the data claims.
X = df.drop("Class", axis=1)  # everything but the answer
y = df["Class"]  # the answer


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
    # original data = .17% fraud, train data = .17% fraud, test data = 0.17% fraud (this is too maintain fraud distribution in the training and test data
)

In [None]:
scaler = StandardScaler()
X_train['Amount'] = scaler.fit_transform(X_train[['Amount']])
X_test['Amount'] = scaler.transform(X_test[['Amount']])



In [None]:
model = LogisticRegression(
    max_iter=1000,
    class_weight='balanced',
    random_state=42
)


In [None]:
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

In [None]:
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()


In [None]:
# Get fraud probabilities
y_probs = model.predict_proba(X_test)[:, 1]

# ROC-AUC score
roc_auc = roc_auc_score(y_test, y_probs)
print(f"ROC-AUC Score: {roc_auc:.4f}")


In [None]:
fpr, tpr, thresholds = roc_curve(y_test, y_probs)

plt.figure(figsize=(6,5))
plt.plot(fpr, tpr, label=f"ROC Curve (AUC = {roc_auc:.4f})")
plt.plot([0,1], [0,1], linestyle='--', color='gray')  # random baseline
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend()
plt.show()


In [None]:
# Predict fraud if probability > 0.3
y_pred_adjusted = (y_probs > 0.3).astype(int)

print(classification_report(y_test, y_pred_adjusted, digits=4))
