In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
# =======================
# 1. Load Dataset
# =======================
df = pd.read_csv("cc_approvals.data")

X = df.drop("approved", axis=1)
y = df["approved"]


In [None]:
# =======================
# 2. Define Columns
# =======================
numeric = ["age", "income", "credit_score", "debt_to_income_ratio"]
categorical = ["employment_status"]

In [None]:
# Preprocessor = scale numerics + encode categoricals
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numeric),
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical)
    ]
)

### Using LogisticRegression

In [None]:
# =======================
# 3. Build Pipeline - Using LogisticRegression
# =======================
clf = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("model", LogisticRegression())
])

In [None]:
# =======================
# 4. Train-Test Split
# =======================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

In [None]:
# =======================
# 5. Train Model
# =======================
clf.fit(X_train, y_train)

In [None]:
# =======================
# 6. Predictions
# =======================
y_pred = clf.predict(X_test)
y_prob = clf.predict_proba(X_test)[:, 1]

In [None]:
# =======================
# 7. Evaluation
# =======================
print("\n📊 Classification Report:\n")
print(classification_report(y_test, y_pred))

print("ROC AUC Score:", round(roc_auc_score(y_test, y_prob), 3))

In [None]:
# 8. Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(5,4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

In [None]:
#9. ROC Curve
fpr, tpr, thresholds = roc_curve(y_test, y_prob)
plt.figure(figsize=(6,4))
plt.plot(fpr, tpr, label=f"ROC Curve (AUC={roc_auc_score(y_test, y_prob):.2f})")
plt.plot([0,1],[0,1],'--', color="gray")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend()
plt.title("ROC Curve")
plt.show()

### Decision Tree

In [None]:
# =======================
# 10. Build Pipeline - Using Decision Tree
# =======================
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline

dt_clf = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("model", DecisionTreeClassifier(random_state=42))
])


In [None]:
import pandas as pd
# =======================
# 11. Define Input Data (same schema as training)
# =======================
new_data = pd.DataFrame([{
    "age": 30,
    "income": 55000,
    "credit_score": 710,
    "debt_to_income_ratio": 0.22,
    "employment_status": "Employed"
}])

print("📄 Input Data:\n", new_data)

# =======================
# 3. Predict
# =======================
y_pred = loaded_model.predict(new_data)
y_prob = loaded_model.predict_proba(new_data)[:, 1]

print("Predicted Class:", y_pred[0])       # 0 = Reject, 1 = Approve
print("Approval Probability:", y_prob[0])

In [None]:
# =======================
# 12. Train-Test Split
# =======================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

In [None]:
# =======================
# 13. Train Model
# =======================
dt_clf.fit(X_train, y_train)

In [None]:
# =======================
# 14. Save Model
# =======================
joblib.dump(clf, "credit_pipeline.pkl")
print("✅ Pipeline saved as credit_pipeline.pkl")

In [1]:
import joblib
# =======================
# 15. Load Model (Offline Use)
# =======================
loaded_model = joblib.load("credit_pipeline.pkl")
print("✅ Pipeline loaded successfully")


✅ Pipeline loaded successfully


In [2]:
import pandas as pd
# =======================
# 16. Define Input Data (same schema as training)
# =======================
new_data = pd.DataFrame([{
    "age": 30,
    "income": 55000,
    "credit_score": 710,
    "debt_to_income_ratio": 0.22,
    "employment_status": "Employed"
}])

print("📄 Input Data:\n", new_data)

# =======================
# 17. Predict
# =======================
y_pred = loaded_model.predict(new_data)
y_prob = loaded_model.predict_proba(new_data)[:, 1]

print("Predicted Class:", y_pred[0])       # 0 = Reject, 1 = Approve
print("Approval Probability:", y_prob[0])

📄 Input Data:
    age  income  credit_score  debt_to_income_ratio employment_status
0   30   55000           710                  0.22          Employed
Predicted Class: 1
Approval Probability: 0.8021064830028617
