In [None]:
import pandas as pd
import numpy as np

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder, FunctionTransformer
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay

# ===============================
# 1. Custom Validator
# ===============================
class CreditHistoryValidator(BaseEstimator, TransformerMixin):

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X = X.copy()

        if ((X["Credit_History"] < 0) | (X["Credit_History"] > 3)).any():
            raise ValueError("Credit_History should be between 0 and 3")

        if (X["ApplicantIncome"] < 0).any():
            raise ValueError("ApplicantIncome cannot be negative")

        return X

# ===============================
# 2. Load dataset
# ===============================
df = pd.read_csv("Loan dataset_classification.csv")

df = df.dropna(subset=["Loan_Status"])

# ===============================
# 3. Features & Target
# ===============================
X = df.drop(columns=["Loan_Status", "Loan_ID", "Gender", "Dependents"])
y = df["Loan_Status"].map({"Y": 1, "N": 0})

# ===============================
# 4. Train-test split
# ===============================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ===============================
# 5. Column groups
# ===============================
log_cols = ["ApplicantIncome", "CoapplicantIncome"]
num_cols = ["LoanAmount", "Loan_Amount_Term", "Credit_History"]
cat_cols = ["Married", "Education", "Self_Employed", "Property_Area"]

# ===============================
# 6. Pipelines
# ===============================
numeric_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler())
])

log_numeric_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("log", FunctionTransformer(np.log1p, validate=False)),
    ("scaler", StandardScaler())
])

categorical_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("encoder", OneHotEncoder(handle_unknown="ignore"))
])

preprocessor = ColumnTransformer([
    ("log_num", log_numeric_pipeline, log_cols),
    ("num", numeric_pipeline, num_cols),
    ("cat", categorical_pipeline, cat_cols)
])

# ===============================
# 7. FINAL MODEL PIPELINE (WITH VALIDATION)
# ===============================
model_pipeline = Pipeline([
    ("credit_history_validation", CreditHistoryValidator()),
    ("preprocess", preprocessor),
    ("model", LogisticRegression(
        class_weight="balanced",
        max_iter=3000
    ))
])

# ===============================
# 8. Train model
# ===============================
model_pipeline.fit(X_train, y_train)

# ===============================
# 9. Accuracy check
# ===============================
y_pred = model_pipeline.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

# ===============================
# 10. Manual prediction (VALID input)
# ===============================
manual_input = pd.DataFrame([{
    "Married": "Yes",
    "Education": "Graduate",
    "Self_Employed": "No",
    "Property_Area": "Urban",
    "ApplicantIncome": 5000,
    "CoapplicantIncome": 2000,
    "LoanAmount": 150,
    "Loan_Amount_Term": 360,
    "Credit_History": 1
}])

print("\nManual Prediction:", model_pipeline.predict(manual_input))

# ===============================
# 11. Manual prediction (INVALID input demo)
# ===============================
invalid_input = pd.DataFrame([{
    "Married": "Yes",
    "Education": "Graduate",
    "Self_Employed": "No",
    "Property_Area": "Urban",
    "ApplicantIncome": -1000,  # ❌ invalid
    "CoapplicantIncome": 2000,
    "LoanAmount": 150,
    "Loan_Amount_Term": 360,
    "Credit_History": 5       # ❌ invalid
}])

# This will raise ValueError
# model_pipeline.predict(invalid_input)
