In [2]:
! pip install optuna

Collecting optuna
  Downloading optuna-4.4.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.16.4-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.4.0-py3-none-any.whl (395 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m395.9/395.9 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.16.4-py3-none-any.whl (247 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m247.0/247.0 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.16.4 colorlog-6.9.0 optuna-4.4.0


In [3]:
import optuna
from sklearn.linear_model import LinearRegression
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Carregar dados do California Housing
X, y = fetch_california_housing(return_X_y=True)

# Dividir em treino e validação
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.3, random_state=42)

def objective(trial):
    # Hiperparâmetro exemplo: fit_intercept (único relevante na regressão linear pura)
    fit_intercept = trial.suggest_categorical('fit_intercept', [True, False])
    model = LinearRegression(fit_intercept=fit_intercept)
    model.fit(X_train, y_train)
    preds = model.predict(X_val)
    mse = mean_squared_error(y_val, preds)
    return mse

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=20)

print("Melhor configuração:", study.best_params)
print("Melhor MSE:", study.best_value)

[I 2025-08-02 18:22:34,401] A new study created in memory with name: no-name-79aceacc-7406-41a2-b16b-5b508ea18714
[I 2025-08-02 18:22:34,448] Trial 0 finished with value: 0.5305677824766757 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 0.5305677824766757.
[I 2025-08-02 18:22:34,469] Trial 1 finished with value: 0.604878579654339 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 0.5305677824766757.
[I 2025-08-02 18:22:34,494] Trial 2 finished with value: 0.604878579654339 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 0.5305677824766757.
[I 2025-08-02 18:22:34,504] Trial 3 finished with value: 0.5305677824766757 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 0.5305677824766757.
[I 2025-08-02 18:22:34,534] Trial 4 finished with value: 0.604878579654339 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 0.5305677824766757.
[I 2025-08-02 18:22:34,549] Trial 5 finished with value: 0.53

Melhor configuração: {'fit_intercept': True}
Melhor MSE: 0.5305677824766757


In [6]:
import optuna
from sklearn.datasets import fetch_california_housing
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Carregar dados
X, y = fetch_california_housing(return_X_y=True)

# Divisão treino/validação
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.3, random_state=42)

def objective(trial):
    # Definir hiperparâmetros a serem otimizados
    n_neighbors = trial.suggest_int("n_neighbors", 1, 30)
    weights = trial.suggest_categorical("weights", ["uniform", "distance"])
    p = trial.suggest_int("p", 1, 3)

    # Criar modelo KNN
    model = KNeighborsRegressor(
        n_neighbors=n_neighbors,
        weights=weights,
        p=p
    )

    # Treinar e validar
    model.fit(X_train, y_train)
    preds = model.predict(X_val)
    mse = mean_squared_error(y_val, preds)
    return mse  # Minimizar erro quadrático médio

# Criar estudo e otimizar
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

print("Melhores hiperparâmetros:", study.best_params)
print("Melhor MSE:", study.best_value)

[I 2025-08-02 18:24:59,000] A new study created in memory with name: no-name-02ec44da-0130-4ce9-be6e-56c92716072a
[I 2025-08-02 18:24:59,050] Trial 0 finished with value: 1.2750616141570847 and parameters: {'n_neighbors': 2, 'weights': 'distance', 'p': 2}. Best is trial 0 with value: 1.2750616141570847.
[I 2025-08-02 18:24:59,138] Trial 1 finished with value: 1.1479127563890883 and parameters: {'n_neighbors': 29, 'weights': 'distance', 'p': 2}. Best is trial 1 with value: 1.1479127563890883.
[I 2025-08-02 18:24:59,207] Trial 2 finished with value: 0.9980707800621474 and parameters: {'n_neighbors': 9, 'weights': 'uniform', 'p': 1}. Best is trial 2 with value: 0.9980707800621474.
[I 2025-08-02 18:24:59,398] Trial 3 finished with value: 1.1788897269548972 and parameters: {'n_neighbors': 5, 'weights': 'uniform', 'p': 3}. Best is trial 2 with value: 0.9980707800621474.
[I 2025-08-02 18:24:59,654] Trial 4 finished with value: 1.1443594337009955 and parameters: {'n_neighbors': 16, 'weights': 

Melhores hiperparâmetros: {'n_neighbors': 9, 'weights': 'distance', 'p': 1}
Melhor MSE: 0.9672981588573515
