<a href="https://colab.research.google.com/github/NovrianPratama/MachineLearning/blob/main/Hyperparameter_Tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Hyper Parameter Tuning dalam Regresi**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor

In [None]:
# Mengunduh dataset
X, y = fetch_california_housing(return_X_y=True)

# Membagi dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Normalisasi dataset
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# menampilkan jumlah baris dan kolom dari hasil normalisass
print(f"Jumlah Baris pada X_train: {X_train.shape}")
print(f"Jumlah Baris pada X_test: {X_test.shape}")
# print(f"Jumlah Baris pada X_test: {y_test.shape}")

Jumlah Baris pada X_train: (14448, 8)
Jumlah Baris pada X_test: (6192, 8)


In [None]:
# Melakukan inisiasi model
model_rf = RandomForestRegressor(random_state=42)
model_rf.fit(X_train, y_train)

# Melakukan evaluasi sebelum tuning
y_pred = model_rf.predict(X_test)
initial_mse = mean_squared_error(y_test, y_pred)
print(f"MSE sebelum tuning: {initial_mse:.2f}")

MSE sebelum tuning: 0.26


## **Random Search**

In [None]:
from sklearn.model_selection import RandomizedSearchCV
import time

# Inisialisasi waktu mulai
start_time = time.time() # waktu mulai
# Definisi ruang pencarian
param_dist = {
    'n_estimators' : np.arange(100, 500, 100),
    'max_depth' : np.arange(10, 50, 10),
    'min_samples_split' : np.arange(2, 11, 2),
    'min_samples_leaf' : np.arange(1, 5),
    'bootstrap' : [True, False]
}

# Inisialisasi RandomizedSearch
random_search = RandomizedSearchCV(estimator=model_rf, param_distributions=param_dist, n_iter=5, cv=3, n_jobs=-1, verbose=2, random_state=42)
random_search.fit(X_train, y_train)

# Output hasil terbaik
print(f"Best Random Search: {random_search.best_params_}")
best_rf_random = random_search.best_estimator_

# Evaluasi performa model setelah random search
y_pred_random = best_rf_random.predict(X_test)
random_mse = mean_squared_error(y_test, y_pred_random)
print(f"MSE setelah tuning Random Search: {random_mse:.2f}")

end_time = time.time() # waktu berahkir
execution_time = end_time - start_time # waktu eksekusi
print(f"Waktu Eksekusi Tuning: {execution_time:.4f} Detik")

Fitting 3 folds for each of 5 candidates, totalling 15 fits
Best Random Search: {'n_estimators': 300, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_depth': 20, 'bootstrap': True}
MSE setelah tuning Random Search: 0.25
Waktu Eksekusi Tuning: 429.3167 Detik


## Hyperparameter Bayes Optimization

In [None]:
pip install -U scikit-optimize




In [None]:
import skopt
from skopt import BayesSearchCV

start_time = time.time() # waktu mulai eksekusi

# Definisi ruang pencarian
param_space = {
    'n_estimators': (100, 500),
    'max_depth': (10, 50),
    'min_samples_split': (2, 10),
    'min_samples_leaf': (1, 4),
    'bootstrap': [True, False]
}

# initial bayes optimation
bayes_search = BayesSearchCV(estimator=model_rf, search_spaces=param_space, n_iter=32, cv=3, n_jobs=-1, verbose=2, random_state=42)
bayes_search.fit(X_train, y_train)

# Ouput hasil terbaik
print(f"Best Bayes Optimation: {bayes_search.best_params_}")
best_rf_bayes = bayes_search.best_estimator_

# evaluasi performa model
y_pred_bayes = best_rf_bayes.predict(X_test)
bayes_mse = mean_squared_error(y_test, y_pred_bayes)
print(f"MSE setelah tuning Bayes Optimation: {bayes_mse:.2f}")

end_time = time.time() # waktu berakhir
execution_time = end_time - start_time # waktu eksekusi
print(f"Waktu Eksekusi Tuning: {execution_time:.4f} Detik")

## Hyperparameter Tuning GridSearchCV

In [None]:
from sklearn.model_selection import GridSearchCV
import time

start_time = time.time() # Mencatat waktu mulai

# Definisi parameter dalam grid
param_grid = {
    'n_estimators' : [100, 200, 300],
    'max_depth' : [10, 20, 30],
    'min_samples_split' : [2, 5, 10],
    'min_samples_leaf' : [1, 2, 4],
    'bootstrap' : [True, False]
}

# Inisialisasi GridSearchCV
grid_search = GridSearchCV(estimator=model_rf, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

# output hasil terbaik
print(f"Best output Grid Search: {grid_search.best_params_}")
best_rf_grid = grid_search.best_estimator_

# Evaluasi performa model setelah grid
y_pred_grid = best_rf_grid.predict(X_test)
grid_mse = mean_squared_error(y_test, y_pred_grid)
print(f"MSE setelah turning: {grid_mse:.2f}")

end_time = time.time() # Mencatat Waktu berakhir
execution_time = end_time - start_time # Menghitung waktu eksekusi
print(f"Waktu eksekusi: {execution_time:.4f} Detik")