In [None]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.3.0-py3-none-any.whl (386 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.2-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.9/231.9 kB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.15.2 colorlog-6.9.0 optuna-4.3.0


# Gráficos na escala de -1 até 1

In [None]:
#MLP_50K

import joblib
import optuna
import optuna.visualization as vis

# Carrega o estudo original de 50 000 trials
study = joblib.load("optuna_mlp_study.pkl")

# Seleciona apenas os primeiros 20 000 trials
first_trials = study.trials[:20_000]

# Cria um estudo “filtrado” com a mesma direção (minimize/maximize)
filtered_study = optuna.create_study(direction=study.direction)
filtered_study.add_trials(first_trials)

# Agora, ao invés de usar `study`, use `filtered_study`
# Exemplo: plotar histórico de otimização só com esses 20k trials
fig_history = vis.plot_optimization_history(filtered_study)
fig_history.update_layout(
    yaxis=dict(range=[-1, 1], title="NSE"),
    title="Optimization History (NSE range between –1 and 1)"
)
# Salva em HTML
fig_history.write_html(
    "optuna_history_20k.html",
    include_plotlyjs="cdn",
    full_html=True,
    auto_open=False
)

[I 2025-05-06 18:40:39,946] A new study created in memory with name: no-name-d48037dc-0852-4ad6-944a-8d54e2fd505b


In [None]:
# Exemplo: plotar slice plot
fig_slice = vis.plot_slice(filtered_study)
fig_slice.update_layout(
    yaxis=dict(range=[-1, 1], title="NSE"),
    title="Hyperparameters vs NSE"
)
fig_slice.write_html(
    "optuna_slice_20k.html",
    include_plotlyjs="cdn",
    full_html=True,
    auto_open=False
)

In [None]:
# 1. Seleciona os 3 mais importantes
top3_params = imp_df["parameter"].head(3).tolist()

# 2. Plota slice só para eles
fig_slice = vis.plot_slice(
    filtered_study,
    params=top3_params  # passa a lista com os 3 nomes
)
fig_slice.update_layout(
    yaxis=dict(range=[-1, 1], title="NSE"),
    title="Hyperparameters vs NSE (Top 3)"
)
fig_slice.write_html(
    "optuna_slice_top3.html",
    include_plotlyjs="cdn",
    full_html=True,
    auto_open=False
)


# Importancia com random forest e coordenadas paraleals independente do optuna

In [None]:
#1) Importância de hiperparâmetros via RandomForestRegressor

import joblib
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.ensemble import RandomForestRegressor

# 1. Carrega o estudo e filtra só os primeiros 20k trials completos
study = joblib.load("optuna_mlp_study.pkl")
trials = [t for t in study.trials if t.state.name == "COMPLETE"][:20_000]

# 2. Converte em DataFrame
records = []
for t in trials:
    rec = dict(t.params)
    rec["value"] = t.value
    records.append(rec)
df = pd.DataFrame(records)

# 3. Factoriza categóricos, separa X e y
for col in df.select_dtypes(include="object"):
    df[col], _ = pd.factorize(df[col])
X = df.drop("value", axis=1).to_numpy()
y = df["value"].to_numpy()

# 4. Treina RF paralelo
rf = RandomForestRegressor(n_estimators=100, n_jobs=-1, random_state=0)
rf.fit(X, y)

# 5. Plota e salva a importância
imp_df = pd.DataFrame({
    "parameter": df.drop("value", axis=1).columns,
    "importance": rf.feature_importances_
}).sort_values("importance", ascending=False)

fig_imp = px.bar(
    imp_df,
    x="importance",
    y="parameter",
    orientation="h",
    title="Importance of Hyperparameters (MLP, top 20 k trials)"
)
fig_imp.update_layout(yaxis={"categoryorder": "total ascending"})
fig_imp.write_html(
    "optuna_mlp_importance_rf.html",
    include_plotlyjs="cdn",
    full_html=True,
    auto_open=False,
)


In [None]:
import plotly.express as px

# 1. Filtra trials com NSE > 0
df_par = df[df["value"] > 0]

# 2. Define manualmente a lista de hiperparâmetros a plotar
dims = [
    'n_layers',
    'neurons',
    'alpha',
    'learning_rate',
    'learning_rate_init',
    'tol',
    'n_iter_no_change',
    'max_fun'
]

# 3. Gera o parallel coordinates apenas com essas dimensões, usando escala de verdes
fig_par = px.parallel_coordinates(
    df_par,
    dimensions=dims,
    color="value",
    color_continuous_scale="Greens",       # escala de verdes
    range_color=[0, df_par["value"].max()],
    title="Parallel Coordinates (NSE > 0)"
)

# Exibe o gráfico
fig_par.show()

# (Opcional) salva em HTML
fig_par.write_html(
    "optuna_mlp_parallel_selected_greens.html",
    include_plotlyjs="cdn",
    full_html=True,
    auto_open=False,
)


In [None]:
from joblib import load

# 1) Carrega o modelo
model = load("best_mlp_model.joblib")

# 2) Veja que tipo de objeto é
print(type(model))


<class 'sklearn.neural_network._multilayer_perceptron.MLPRegressor'>


In [None]:
# Imprime todos os hiperparâmetros com seus valores
print(model.get_params())


{'activation': 'logistic', 'alpha': 3.092166230303286e-05, 'batch_size': 14, 'beta_1': 0.9, 'beta_2': 0.999, 'early_stopping': False, 'epsilon': 1e-08, 'hidden_layer_sizes': (3, 3, 3), 'learning_rate': 'invscaling', 'learning_rate_init': 0.05263808111852234, 'max_fun': 91661, 'max_iter': 20000, 'momentum': 0.9, 'n_iter_no_change': 448, 'nesterovs_momentum': True, 'power_t': 0.5, 'random_state': 42, 'shuffle': True, 'solver': 'lbfgs', 'tol': 3.0362952700266083e-06, 'validation_fraction': 0.1, 'verbose': False, 'warm_start': True}


In [None]:
import joblib
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.ensemble import RandomForestRegressor

# 1. Carrega o estudo e filtra só trials completos com solver="lbfgs"
study = joblib.load("optuna_mlp_study.pkl")
trials_lbfgs = [
    t for t in study.trials
    if t.state.name == "COMPLETE" and t.params.get("solver") == "lbfgs"
]

# 2. Converte em DataFrame
records = []
for t in trials_lbfgs:
    rec = dict(t.params)
    rec["value"] = t.value
    records.append(rec)
df_lbfgs = pd.DataFrame(records)

# 3. Factoriza categóricos (se houver) e separa X e y
for col in df_lbfgs.select_dtypes(include="object"):
    df_lbfgs[col], _ = pd.factorize(df_lbfgs[col])
X = df_lbfgs.drop("value", axis=1).to_numpy()
y = df_lbfgs["value"].to_numpy()

# 4. Treina o RF para estimar importância
rf = RandomForestRegressor(n_estimators=100, n_jobs=-1, random_state=0)
rf.fit(X, y)

# 5. Monta o DataFrame de importâncias e ordena
imp_df_lbfgs = (
    pd.DataFrame({
        "parameter": df_lbfgs.drop("value", axis=1).columns,
        "importance": rf.feature_importances_
    })
    .sort_values("importance", ascending=False)
)

# 6. Seleciona top-3 e plota
top3 = imp_df_lbfgs.head(3)

fig_top3 = px.bar(
    top3,
    x="importance",
    y="parameter",
    orientation="h",
    title="MLP Regressor (solver=lbfgs): Top 3 Hyperparameters Importance",
    labels={"importance": "Importance", "parameter": "Hyperparameter"},
    text_auto=".2f"    # valor com 2 casas decimais
)
fig_top3.update_layout(
    yaxis={"categoryorder": "total ascending"}
)
fig_top3.update_traces(textposition="outside")

fig_top3.show()

# (opcional) salvar em HTML
fig_top3.write_html(
    "optuna_mlp_importance_top3_lbfgs.html",
    include_plotlyjs="cdn",
    full_html=True,
    auto_open=False
)


In [None]:
import optuna.visualization as vis

# 1) Lista dos top-3 que você já extraiu (pode vir de imp_df_lbfgs)
top3_params = ['alpha', 'tol', 'n_iter_no_change']

# 2) Gera o slice plot só para eles
fig_slice_top3 = vis.plot_slice(
    filtered_study,     # ou study, se não precisar filtrar por solver
    params=top3_params
)
fig_slice_top3.update_layout(
    yaxis=dict(range=[-1, 1], title="NSE"),
    title="Slice Plot — Top 3 Hyperparameters (lbfgs)"
)
fig_slice_top3.show()

# (opcional) salvar em HTML
fig_slice_top3.write_html(
    "optuna_slice_top3_lbfgs.html",
    include_plotlyjs="cdn",
    full_html=True,
    auto_open=False
)
