In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report


In [2]:
df = pd.read_csv("../data/raw/creditcard.csv")

X = df.drop("Class", axis=1)
y = df["Class"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [3]:
##final model 
final_model = LogisticRegression(
    max_iter=1000,
    class_weight="balanced",
    random_state=42
)

final_model.fit(X_train_scaled, y_train)

y_proba = final_model.predict_proba(X_test_scaled)[:, 1]


In [4]:
def risk_bucket(prob):
    if prob < 0.2:
        return "LOW"
    elif prob < 0.7:
        return "MEDIUM"
    else:
        return "HIGH"


In [5]:
risk_levels = pd.Series(y_proba).apply(risk_bucket)
risk_levels.value_counts()


LOW       51355
MEDIUM     4873
HIGH        734
Name: count, dtype: int64

In [6]:
def decision_logic(risk):
    if risk == "LOW":
        return "ALLOW"
    elif risk == "MEDIUM":
        return "REQUIRE_VERIFICATION"
    else:
        return "FLAG_FOR_REVIEW"


In [7]:
decisions = risk_levels.apply(decision_logic)
decisions.value_counts()


ALLOW                   51355
REQUIRE_VERIFICATION     4873
FLAG_FOR_REVIEW           734
Name: count, dtype: int64

In [8]:
results = pd.DataFrame({
    "Actual": y_test.values,
    "Risk": risk_levels.values,
    "Decision": decisions.values
})

pd.crosstab(results["Actual"], results["Risk"])


Risk,HIGH,LOW,MEDIUM
Actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,645,51349,4870
1,89,6,3


In [9]:
print(classification_report(
    y_test,
    (y_proba >= 0.5).astype(int),
    digits=4
))


              precision    recall  f1-score   support

           0     0.9999    0.9756    0.9876     56864
           1     0.0610    0.9184    0.1144        98

    accuracy                         0.9755     56962
   macro avg     0.5304    0.9470    0.5510     56962
weighted avg     0.9982    0.9755    0.9861     56962

