<a href="https://colab.research.google.com/github/RifatMuhtasim/Data_Science_Workflow/blob/main/5.1.Optuna_Regression_Hyperparameter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
try:
    import optuna
except:
    !pip install --quiet optuna
    import optuna

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score

def objective(X, y):
    reg = LinearRegression()
    mse_scores = -cross_val_score(
        reg, X, y, scoring='neg_mean_squared_error', cv=5, n_jobs=-1
    )  # Get negative mean squared error scores.
    rmse_scores = np.sqrt(mse_scores)
    return rmse_scores.mean()

rmse_result = objective(X, y)
print(f"RMSE: {rmse_result}")

In [None]:
# Check Multiple Model
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor


def objective(X, y, model):
    reg = model

    mse_scores = -cross_val_score(
        reg, X, y, scoring='neg_mean_squared_error', cv=5, n_jobs=-1
    )  # Get negative mean squared error scores.
    rmse_scores = np.sqrt(mse_scores)
    return rmse_scores.mean()


models = {'Linear_Regression':  LinearRegression(),
                    'Random_Forest': RandomForestRegressor(),
                    'Decision_Tree': DecisionTreeRegressor(),
                    'XGB_Regressor': XGBRegressor(),
                    'SVM': SVR(),
                    'KNearest_Neighbors': KNeighborsRegressor()}

for model_name, model in models.items():
    result = objective(X, y, model=model)
    print(f"{model_name} RMSE is: ", result)

# 1. Optuna

In [None]:
# Linear Regression

import optuna
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error
import numpy as np

def LinearRegression_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'fit_intercept': trial.suggest_categorical('fit_intercept', [True, False])
        }

        clf = LinearRegression(**params)
        mse_scores = -cross_val_score(clf, X, y, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
        rmse_scores = np.sqrt(mse_scores)
        return rmse_scores.mean()

    study = optuna.create_study(direction="minimize")
    study.optimize(lambda trial: objective(trial, X, y), n_trials=100)
    trial = study.best_trial
    return trial

result = LinearRegression_Optuna(X, y)
print(f"Linear Regression RMSE: {result.value}")
print(f"Best Hyperparameters: {result.params}")

In [None]:
#  XGBRegressor

import optuna
from xgboost import XGBRegressor
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error
import numpy as np

def XGBRegressor_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.1, log=True),
            'subsample': trial.suggest_float('subsample', 0.6, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
            'reg_alpha': trial.suggest_float('reg_alpha', 1e-5, 100, log=True),
            'reg_lambda': trial.suggest_float('reg_lambda', 1e-5, 100, log=True),
            'random_state': 42,
            'n_jobs': -1
        }

        clf = XGBRegressor(**params)
        mse_scores = -cross_val_score(clf, X, y, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
        rmse_scores = np.sqrt(mse_scores)
        return rmse_scores.mean()

    study = optuna.create_study(direction="minimize")
    study.optimize(lambda trial: objective(trial, X, y), n_trials=100)
    trial = study.best_trial
    return trial

result = XGBRegressor_Optuna(X, y)
print(f"XGBoost Regressor RMSE: {result.value}")
print(f"Best Hyperparameters: {result.params}")

In [None]:
# DecisionTreeRegressor

from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error
import numpy as np

def DecisionTreeRegressor_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'max_depth': trial.suggest_int('max_depth', 3, 20),
            'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
            'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 20),
            'max_features': trial.suggest_categorical('max_features', ['auto', 'sqrt', 'log2', None]),
            'random_state': 42
        }

        clf = DecisionTreeRegressor(**params)
        mse_scores = -cross_val_score(clf, X, y, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
        rmse_scores = np.sqrt(mse_scores)
        return rmse_scores.mean()

    study = optuna.create_study(direction="minimize")
    study.optimize(lambda trial: objective(trial, X, y), n_trials=100)
    trial = study.best_trial
    return trial

result = DecisionTreeRegressor_Optuna(X, y)
print(f"DecisionTreeRegressor RMSE: {result.value}")
print(f"Best Hyperparameters: {result.params}")

In [None]:
# RandomForestRegressor

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error
import optuna
import numpy as np

def RandomForestRegressor_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'n_estimators': trial.suggest_int('n_estimators', 100, 1000, step=100),
            'max_depth': trial.suggest_int('max_depth', 3, 20),
            'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
            'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 20),
            'max_features': trial.suggest_categorical('max_features', ['auto', 'sqrt', 'log2']),
            'random_state': 42
        }

        clf = RandomForestRegressor(**params)
        mse_scores = -cross_val_score(clf, X, y, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
        rmse_scores = np.sqrt(mse_scores)
        return rmse_scores.mean()

    study = optuna.create_study(direction="minimize")
    study.optimize(lambda trial: objective(trial, X, y), n_trials=100)
    trial = study.best_trial
    return trial

result = RandomForestRegressor_Optuna(X, y)
print(f"RandomForestRegressor RMSE: {result.value}")
print(f"Best Hyperparameters: {result.params}")


In [None]:
# SVR

from sklearn.svm import SVR
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error
import optuna
import numpy as np

def SVMRegressor_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'kernel': trial.suggest_categorical('kernel', ['linear', 'poly', 'rbf', 'sigmoid']),
            'C': trial.suggest_float('C', 1e-3, 1e3, log=True),
            'epsilon': trial.suggest_float('epsilon', 1e-3, 1e1, log=True),
        }
        if params['kernel'] == 'poly':
            params['degree'] = trial.suggest_int('degree', 2, 5)
        if params['kernel'] in ['poly', 'rbf', 'sigmoid']:
            params['gamma'] = trial.suggest_categorical('gamma', ['scale', 'auto'])

        svr = SVR(**params)
        mse_scores = -cross_val_score(svr, X, y, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
        rmse_scores = np.sqrt(mse_scores)
        return rmse_scores.mean()

    study = optuna.create_study(direction="minimize")
    study.optimize(lambda trial: objective(trial, X, y), n_trials=100)
    trial = study.best_trial
    return trial

result = SVMRegressor_Optuna(X, y)
print(f"SVM Regressor RMSE: {result.value}")
print(f"Best Hyperparameters: {result.params}")


In [None]:
# KNeighborsRegressor

from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error
import optuna
import numpy as np

def KNNRegressor_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'n_neighbors': trial.suggest_int('n_neighbors', 1, 20),
            'weights': trial.suggest_categorical('weights', ['uniform', 'distance']),
            'algorithm': trial.suggest_categorical('algorithm', ['auto', 'ball_tree', 'kd_tree', 'brute']),
            'p': trial.suggest_categorical('p', [1, 2])  # 1 for Manhattan distance, 2 for Euclidean distance
        }

        knn = KNeighborsRegressor(**params)
        mse_scores = -cross_val_score(knn, X, y, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
        rmse_scores = np.sqrt(mse_scores)
        return rmse_scores.mean()

    study = optuna.create_study(direction="minimize")
    study.optimize(lambda trial: objective(trial, X, y), n_trials=100)
    trial = study.best_trial
    return trial

result = KNNRegressor_Optuna(X, y)
print(f"K-Nearest Neighbors Regressor RMSE: {result.value}")
print(f"Best Hyperparameters: {result.params}")

## 2. Mutliple Optuna Model

In [None]:
import optuna
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.model_selection import cross_val_score
import numpy as np

def Optuna_Hyperparameter_tuning(X, y):
    def objective(trial, X, y):
        regressor = trial.suggest_categorical("regressor", ['RandomForest', 'XGB'])

        if regressor == "RandomForest":
            rf_params = {
                'n_estimators': trial.suggest_int("rf_n_estimators", 100, 1000, step=100),
                'max_depth': trial.suggest_int("rf_max_depth", 3, 20),
                'min_samples_split': trial.suggest_int("rf_min_samples_split", 2, 20),
                'min_samples_leaf': trial.suggest_int("rf_min_samples_leaf", 1, 20),
                'max_features': trial.suggest_categorical("rf_max_features", ['auto', 'sqrt', 'log2']),
                'random_state': 42,
                'n_jobs': -1
            }
            reg = RandomForestRegressor(**rf_params)

        else:
            xgb_params = {
                'n_estimators': trial.suggest_int('xgb_n_estimators', 100, 1000),
                'max_depth': trial.suggest_int('xgb_max_depth', 3, 10),
                'learning_rate': trial.suggest_float('xgb_learning_rate', 0.01, 0.3, log=True),
                'subsample': trial.suggest_float('xgb_subsample', 0.5, 1.0),
                'colsample_bytree': trial.suggest_float('xgb_colsample_bytree', 0.5, 1.0),
                'gamma': trial.suggest_float('xgb_gamma', 1e-8, 1.0, log=True),
                'min_child_weight': trial.suggest_int('xgb_min_child_weight', 1, 300),
                'reg_alpha': trial.suggest_float('xgb_reg_alpha', 1e-8, 1.0, log=True),
                'reg_lambda': trial.suggest_float('xgb_reg_lambda', 1e-8, 1.0, log=True),
                'random_state': 42,
                'n_jobs': -1
            }
            reg = XGBRegressor(**xgb_params)

        mse_scores = -cross_val_score(reg, X, y, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
        rmse_scores = np.sqrt(mse_scores)
        return rmse_scores.mean()

    study = optuna.create_study(direction="minimize")
    study.optimize(lambda trial: objective(trial=trial, X=X, y=y), n_trials=100)
    trial = study.best_trial
    return trial

result = Optuna_Hyperparameter_tuning(X=X, y=y)
print(f"RMSE: {result.value}")
print(f"Best Hyperparameters: {result.params}")