In [1]:
import pandas as pd
import numpy as np

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, classification_report

import joblib


In [5]:
train_df = pd.read_csv('../train.csv')
test_df = pd.read_csv('../test.csv')

In [7]:
X_train = train_df.drop(columns=["default"])
y_train = train_df["default"]

X_test = test_df.drop(columns=["default"])
y_test = test_df["default"]


In [9]:
# Seperating numerical and categorical features
numeric_features = [
    "LIMIT_BAL",
    "AGE",
    "UTILIZATION_RATE",
    "PAY_DELAY_MONTHS"
]

categorical_features = [
    "SEX",
    "EDUCATION",
    "MARRIAGE"
]


In [11]:
# Preprocessing pipelines
numeric_transformer = Pipeline(steps=[
    ("scaler", StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])

In [13]:
# column Transformation
preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features)
    ]
)


In [15]:
# Building the model pipeline
model = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("classifier", LogisticRegression(
        max_iter=1000,
        class_weight="balanced",
        random_state=42
    ))
])


In [21]:
model.fit(X_train, y_train)


In [23]:
y_pred_proba = model.predict_proba(X_test)[:, 1]
y_pred = model.predict(X_test)

print("ROC-AUC:", roc_auc_score(y_test, y_pred_proba))
print(classification_report(y_test, y_pred))


ROC-AUC: 0.7092528878143534
              precision    recall  f1-score   support

           0       0.60      0.68      0.64       566
           1       0.70      0.62      0.66       684

    accuracy                           0.65      1250
   macro avg       0.65      0.65      0.65      1250
weighted avg       0.65      0.65      0.65      1250



In [31]:
joblib.dump(model, "../Notebooks/credit_risk_logistic_model.joblib")


['../Notebooks/credit_risk_logistic_model.joblib']