<a href="https://colab.research.google.com/github/NovrianPratama/MachineLearning/blob/main/Hyperparameter_Tuning_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [3]:
# Mengunduh dataset German credit dari openml
X, y = fetch_openml(name='credit-g', version=1, return_X_y=True, as_frame=True)

# Konversi target menjadi numeric
le = LabelEncoder()
y = le.fit_transform(y) # Mengubah good = 1 dan bad = 0

# Melakukan one-hot Encodinga pada fitur kategorical
X_encoded = pd.get_dummies(X, drop_first=True) # Konversi fitur kategorical menjadi numerical

# membagi dataset
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.3, random_state=42)

# menampilkan jumlah baris dan kolom
print(f"Jumlah baris dan kolom pada X_train: {X_train.shape}")
print(f"Jumlah baris dan kolom pada y_train: {X_test.shape}")

Jumlah baris dan kolom pada X_train: (700, 48)
Jumlah baris dan kolom pada y_train: (300, 48)


In [4]:
# Inisialisasi model
from sklearn.ensemble import RandomForestClassifier

model_rf = RandomForestClassifier(random_state=42)
model_rf.fit(X_train, y_train)

# Evaluasi sebelum tunin
initial_score = model_rf.score(X_test, y_test)
print(f"Skor sebelum tuning: {initial_score:.2f}")

Skor sebelum tuning: 0.76


## Hyperparameter GridSearchCV

In [7]:
# Definisikan parameter grid untuk Grid Search
from sklearn.model_selection import GridSearchCV
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'criterion': ['gini', 'entropy']
}

# Inisialisasi GridSearchCV
grid_search = GridSearchCV(estimator=model_rf, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

# Output hasil terbaik
print(f"Best parameters (Grid Search): {grid_search.best_params_}")
best_model_rf_grid = grid_search.best_estimator_

# Evaluasi performa model pada test set
grid_search_score = best_model_rf_grid.score(X_test, y_test)
print(f"Accuracy after Grid Search: {grid_search_score:.2f}")

Fitting 3 folds for each of 54 candidates, totalling 162 fits
Best parameters (Grid Search): {'criterion': 'gini', 'max_depth': 30, 'min_samples_split': 5, 'n_estimators': 200}
Accuracy after Grid Search: 0.76


## **Hyperparameter Random Search**

In [10]:
from sklearn.model_selection import RandomizedSearchCV

# Mendefinisi ruang pencarian
param_dist = {
    'n_estimators' : np.linspace(100, 500, 5, dtype=int),
    'max_depth' : np.linspace(10, 50, 5, dtype=int),
    'min_samples_split' : [2, 5, 10],
    'criterion' : ['gini', 'entropy']
}

# inisialisasi random search CV
random_search = RandomizedSearchCV(estimator=model_rf, param_distributions=param_dist, n_iter=20, cv=3, n_jobs=-1, verbose=2, random_state=42)
random_search.fit(X_train, y_train)

# Hasil terbaik parameter
print(f"Best Paramter Random Search: {random_search.best_params_}")
best_model_random = random_search.best_estimator_

# evaluasi
random_search_score = best_model_random.score(X_test, y_test)
print(f"Accuracy after Random Search: {random_search_score:.2f}")

Fitting 3 folds for each of 20 candidates, totalling 60 fits
Best Paramter Random Search: {'n_estimators': 200, 'min_samples_split': 5, 'max_depth': 30, 'criterion': 'gini'}
Accuracy after Random Search: 0.76


In [13]:
pip install scikit-optimize


Collecting scikit-optimize
  Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting pyaml>=16.9 (from scikit-optimize)
  Downloading pyaml-24.12.1-py3-none-any.whl.metadata (12 kB)
Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl (107 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m107.8/107.8 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyaml-24.12.1-py3-none-any.whl (25 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-24.12.1 scikit-optimize-0.10.2


In [14]:
import skopt
from skopt import BayesSearchCV

# Definisikan ruang pencarian untuk Bayesian Optimization
param_space = {
    'n_estimators': (100, 500),
    'max_depth': (10, 50),
    'min_samples_split': (2, 10),
    'criterion': ['gini', 'entropy']
}

# Inisialisasi BayesSearchCV
bayes_search = BayesSearchCV(estimator=model_rf, search_spaces=param_space, n_iter=32, cv=3, n_jobs=-1, verbose=2, random_state=42)
bayes_search.fit(X_train, y_train)

# Output hasil terbaik
print(f"Best parameters (Bayesian Optimization): {bayes_search.best_params_}")
best_rf_bayes = bayes_search.best_estimator_

# Evaluasi performa model pada test set
bayes_search_score = best_rf_bayes.score(X_test, y_test)
print(f"Accuracy after Bayesian Optimization: {bayes_search_score:.2f}")

Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fits
Fitting 3 folds for each of 1 candidates, totalling 3 fi