In [27]:
import optuna
import numpy as np
import sklearn.datasets
import sklearn.model_selection
import sklearn.svm
from tensorairspace.agent.ihdp.model import IHDPAgent
import gym
def unit_step(x):
    return np.deg2rad(5) * (x > 1000)

t0 = 0  # Начальное время
tn = 20  # Время моделирования
dt = 0.01  # Дискретизация
number_time_steps = int(((tn - t0) / dt) + 1)  # Количество шагов моделирования
time = list(np.arange(0, number_time_steps * dt, dt))  # Массив с шагов dt
t = np.linspace(-0, len(time), len(time))

reference_signals = np.reshape(unit_step(t), [1, -1])


def objective(trial):

    env = gym.make('LinearLongitudinalF16-v0',
               number_time_steps=number_time_steps,
               initial_state=[[0], [0], [0], [0]],
               reference_signal=reference_signals,
               tracking_states=["alpha"])
    env.reset()

    actor_settings = {
        "start_training": trial.suggest_int("start_training", 1, 20, log=True),
        "layers": (trial.suggest_int("layers", 1, 100, log=True), 1),
        "activations": ('tanh', 'tanh'),
        "learning_rate": trial.suggest_int("learning_rate", 1, 20, log=True),
        "learning_rate_exponent_limit": 10,
        "type_PE": "combined",
        "amplitude_3211": 15,
        "pulse_length_3211": 5 / dt,
        "maximum_input": 25,
        "maximum_q_rate": 20,
        "WB_limits": 30,
        "NN_initial": 120,
        "cascade_actor": False,
        "learning_rate_cascaded": 1.2
    }
    incremental_settings = {
        "number_time_steps": number_time_steps,
        "dt": dt,
        "input_magnitude_limits":25,
        "input_rate_limits":60,
    }
    critic_settings = {
        "Q_weights": [trial.suggest_float('Q_weights', 0, 100)],
        "start_training": -1,
        "gamma": trial.suggest_float('gamma', 0, 0.99),
        "learning_rate": trial.suggest_int("learning_rate", 1, 20, log=True),
        "learning_rate_exponent_limit": 10,
        "layers": (trial.suggest_int("layers", 1, 100, log=True),1),
        "activations": ("tanh", "linear"),
        "WB_limits": 30,
        "NN_initial": 120,
        "indices_tracking_states": env.indices_tracking_states
    }

    model = IHDPAgent(actor_settings, critic_settings, incremental_settings, env.tracking_states, env.state_space, env.control_space, number_time_steps, env.indices_tracking_states)
    xt = np.array([[np.deg2rad(3)], [0]])
    for step in range(number_time_steps):
        ut = model.predict(xt, reference_signals, step)
        xt, reward, done, info = env.step(np.array(ut))
    return reward

In [28]:
study = optuna.create_study(direction="minimize")

[32m[I 2022-02-09 18:44:33,808][0m A new study created in memory with name: no-name-cf898c56-ec28-4c7b-9b82-0e0bebb41ac3[0m


In [30]:
study.optimize(objective, n_trials=20)

[0]


[32m[I 2022-02-09 18:48:55,259][0m Trial 5 finished with value: -0.09829259083171762 and parameters: {'start_training': 1, 'layers': 4, 'learning_rate': 14, 'Q_weights': 27.44095968225787, 'gamma': 0.30305118117670277}. Best is trial 0 with value: -0.1206735140742464.[0m


[0]


[32m[I 2022-02-09 18:49:28,933][0m Trial 6 finished with value: -0.40441091235410254 and parameters: {'start_training': 6, 'layers': 51, 'learning_rate': 3, 'Q_weights': 58.52638192353954, 'gamma': 0.3776920060424658}. Best is trial 6 with value: -0.40441091235410254.[0m


[0]


[32m[I 2022-02-09 18:49:58,469][0m Trial 7 finished with value: -0.08244226900780334 and parameters: {'start_training': 1, 'layers': 12, 'learning_rate': 1, 'Q_weights': 50.46935945321298, 'gamma': 0.4796719785570008}. Best is trial 6 with value: -0.40441091235410254.[0m


[0]


[32m[I 2022-02-09 18:50:26,986][0m Trial 8 finished with value: -0.04705089232658564 and parameters: {'start_training': 1, 'layers': 27, 'learning_rate': 1, 'Q_weights': 81.93577526164867, 'gamma': 0.4752188468106958}. Best is trial 6 with value: -0.40441091235410254.[0m


[0]


[32m[I 2022-02-09 18:50:54,967][0m Trial 9 finished with value: -0.10082665528503788 and parameters: {'start_training': 4, 'layers': 1, 'learning_rate': 1, 'Q_weights': 69.33489012781567, 'gamma': 0.698920985121834}. Best is trial 6 with value: -0.40441091235410254.[0m


[0]


[32m[I 2022-02-09 18:51:23,063][0m Trial 10 finished with value: -0.19738287935830784 and parameters: {'start_training': 16, 'layers': 15, 'learning_rate': 5, 'Q_weights': 49.776626213359044, 'gamma': 0.04648672575460505}. Best is trial 6 with value: -0.40441091235410254.[0m


[0]


[32m[I 2022-02-09 18:51:51,205][0m Trial 11 finished with value: -0.17135269941130704 and parameters: {'start_training': 19, 'layers': 17, 'learning_rate': 5, 'Q_weights': 53.63285564836726, 'gamma': 0.008641584458457557}. Best is trial 6 with value: -0.40441091235410254.[0m


[0]


[32m[I 2022-02-09 18:52:18,336][0m Trial 12 finished with value: -0.12202962190949994 and parameters: {'start_training': 15, 'layers': 40, 'learning_rate': 6, 'Q_weights': 35.67474220994948, 'gamma': 0.11116542999987818}. Best is trial 6 with value: -0.40441091235410254.[0m


[0]


[32m[I 2022-02-09 18:52:46,771][0m Trial 13 finished with value: -0.08808368813651138 and parameters: {'start_training': 8, 'layers': 5, 'learning_rate': 7, 'Q_weights': 59.87614490946035, 'gamma': 0.6827490438331016}. Best is trial 6 with value: -0.40441091235410254.[0m


[0]


[32m[I 2022-02-09 18:53:15,239][0m Trial 14 finished with value: -0.08915332534584898 and parameters: {'start_training': 10, 'layers': 9, 'learning_rate': 2, 'Q_weights': 38.66290558436249, 'gamma': 0.1706248861116266}. Best is trial 6 with value: -0.40441091235410254.[0m


[0]


KeyboardInterrupt: 

In [23]:
study.best_trial


ValueError: No trials are completed yet.