In [37]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

In [38]:
# Load dataset
data_path = 'heloc_cleaned_optimized_knn.csv'
df = pd.read_csv(data_path)

# Assuming the first column is the target variable
y = df.iloc[:, 0]
X = df.iloc[:, 1:]

# Apply StandardScaler with specified parameters
scaler = StandardScaler(with_mean=False, with_std=False)
X_scaled = scaler.fit_transform(X)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Define XGBoostClassifier with given parameters
xgb_model = XGBClassifier(
    booster="gbtree",
    colsample_bytree=0.5,
    eta=0.01,
    gamma=0,
    max_depth=6,
    max_leaves=7,
    n_estimators=400,
    objective="reg:logistic",
    reg_alpha=0,
    reg_lambda=0.8333333333333334,
    subsample=0.6,
    tree_method="auto"
)

# Train the model
xgb_model.fit(X_train, y_train)

# Make predictions
y_pred = xgb_model.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.4f}")


Model Accuracy: 0.7334


In [39]:
import joblib

In [40]:
joblib.dump(xgb_model, "xgboost_heloc_model.pkl")
joblib.dump(scaler, "scaler_heloc.pkl")

['scaler_heloc.pkl']