In [2]:
#In this file, Optuna automatically optimizes the fit_intercept hyperparameter of a linear regression model to
#minimize the RMSE (Root Mean Squared Error) on the test dataset.

In [1]:
%pip install optuna scikit-learn

In [17]:
import optuna
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [18]:
#Data loading
diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target

In [28]:
#Test and Train datasets preparation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [29]:
# 3. Define the objective function to optimize hyperparameters
def objective(trial):
    #Using optuna to specify fit_intercept in model as true or false
    fit_intercept = trial.suggest_categorical('fit_intercept', [True, False])

    #Create model with suggestet fit_intercept
    model = LinearRegression(fit_intercept=fit_intercept)

    #Train model
    model.fit(X_train, y_train)

    #Make predictions on the test set
    y_pred = model.predict(X_test)

    #Calculate the Root Mean Squared Error (RMSE) as the evaluation metric
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))

    return rmse

In [30]:
# 4. Create an Optuna study and optimize the objective function
study = optuna.create_study(direction="minimize")  # Minimize RMSE
study.optimize(objective, n_trials=10)  # Perform 10 trials

[I 2024-12-23 19:02:41,380] A new study created in memory with name: no-name-c668acac-2d08-4a2c-86e2-987fc76beb48
[I 2024-12-23 19:02:41,411] Trial 0 finished with value: 53.85344583676593 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 53.85344583676593.
[I 2024-12-23 19:02:41,417] Trial 1 finished with value: 53.85344583676593 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 53.85344583676593.
[I 2024-12-23 19:02:41,424] Trial 2 finished with value: 167.21769319292116 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 53.85344583676593.
[I 2024-12-23 19:02:41,427] Trial 3 finished with value: 167.21769319292116 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 53.85344583676593.
[I 2024-12-23 19:02:41,436] Trial 4 finished with value: 53.85344583676593 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 53.85344583676593.
[I 2024-12-23 19:02:41,440] Trial 5 finished with value: 167.217693

In [31]:
# 5. Print the best results (lowest RMSE and the best hyperparameters)
print("Best trial:")
print(f"  RMSE: {study.best_value}")
print(f"  Best parameters: {study.best_params}")

Best trial:
  RMSE: 53.85344583676593
  Best parameters: {'fit_intercept': True}
