In [1]:
import pandas as pd
import numpy as np

# Modeling libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from xgboost import XGBRegressor

# ------------------
# Load dataset
# ------------------
file_path = "loan_recommendation_dataset.csv"
df = pd.read_csv(file_path)

# ------------------
# Features & Target
# ------------------
X = df.iloc[:, :15]   # first 15 variables as input
y = df["Loan_Amount"] # target

# Identify categorical and numerical columns
categorical_cols = X.select_dtypes(include=["object"]).columns.tolist()
numeric_cols = X.select_dtypes(exclude=["object"]).columns.tolist()

# ------------------
# Preprocessing
# ------------------
preprocessor = ColumnTransformer([
    ("num", StandardScaler(), numeric_cols),
    ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_cols)
])

# ------------------
# XGBoost Model with given hyperparameters
# ------------------
xgb_model = XGBRegressor(
    colsample_bytree=1.0,
    learning_rate=0.2,
    max_depth=3,
    n_estimators=300,
    reg_alpha=0.1,
    reg_lambda=1,
    subsample=0.9,
    random_state=42,
    n_jobs=-1
)

# Build pipeline
pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", xgb_model)
])

# ------------------
# Train/Test Split
# ------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ------------------
# Train model
# ------------------
pipeline.fit(X_train, y_train)

# ------------------
# Predictions & Evaluation
# ------------------
y_pred = pipeline.predict(X_test)

r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print("Model Performance (XGBoost):")
print(f"R² Score   : {r2:.4f}")
print(f"MAE        : {mae:.2f}")
print(f"RMSE       : {rmse:.2f}")


Model Performance (XGBoost):
R² Score   : 0.9384
MAE        : 13879.02
RMSE       : 29346.43


In [2]:
import pickle

# ------------------
# Save model as pickle
# ------------------
with open("loan_recommendation_model.pkl", "wb") as f:
    pickle.dump(pipeline, f)

print("✅ Model saved as loan_recommendation_model.pkl")

# ------------------
# Load model back (for testing)
# ------------------
with open("loan_recommendation_model.pkl", "rb") as f:
    loaded_model = pickle.load(f)

# Test prediction with loaded model
sample_pred = loaded_model.predict(X_test[:5])
print("🔍 Sample Predictions (first 5):", sample_pred)


✅ Model saved as loan_recommendation_model.pkl
🔍 Sample Predictions (first 5): [112200.49  86638.63 138507.48 121003.42 255031.4 ]
