In [None]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, StackingRegressor
from xgboost import XGBRegressor
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from scipy.stats import randint, uniform

In [None]:
# Membaca data train
data = pd.read_csv('train.csv')

# Memisahkan fitur dan target
X = data.drop(columns=['Id', 'SalePrice'])
y = data['SalePrice']

In [None]:
# Memisahkan kolom numerik dan kategorikal
numeric_features = X.select_dtypes(include=['int64', 'float64']).columns
categorical_features = X.select_dtypes(include=['object']).columns

# Preprocessing untuk fitur numerik
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

# Preprocessing untuk fitur kategorikal
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

# Menggabungkan preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

In [None]:
# Model dasar (Base Learners)
rf = RandomForestRegressor(random_state=42)
gb = GradientBoostingRegressor(random_state=42)
xgb = XGBRegressor(random_state=42, objective='reg:squarederror')

# Model meta (Meta Learner)
meta_learner = Lasso(alpha=0.1)

# Stacking Regressor
stacking_regressor = StackingRegressor(
    estimators=[('rf', rf), ('gb', gb), ('xgb', xgb)],
    final_estimator=meta_learner
)

In [None]:
# Pipeline lengkap
model = Pipeline(steps=[('preprocessor', preprocessor),
                        ('regressor', stacking_regressor)])

In [None]:
# Grid hyperparameter untuk RandomizedSearchCV
param_grid = {
    'regressor_rf_n_estimators': randint(100, 500),
    'regressor_rf_max_depth': randint(3, 10),
    'regressor_gb_n_estimators': randint(100, 500),
    'regressor_gb_learning_rate': uniform(0.01, 0.1),
    'regressor_gb_max_depth': randint(3, 10),
    'regressor_xgb_n_estimators': randint(100, 500),
    'regressor_xgb_learning_rate': uniform(0.01, 0.1),
    'regressor_xgb_max_depth': randint(3, 10),
    'regressor_final_estimator_alpha': uniform(0.01, 1.0)
}

In [None]:
# Membagi data menjadi train dan test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Inisialisasi RandomizedSearchCV
random_search = RandomizedSearchCV(model, param_distributions=param_grid,
                                   n_iter=50, cv=5, verbose=2, n_jobs=-1,
                                   random_state=42, scoring='neg_mean_squared_error')

In [None]:
# Melatih model dengan hyperparameter tuning
random_search.fit(X_train, y_train)

# Menampilkan hyperparameter terbaik
print("Best Parameters:", random_search.best_params_)