# Result Notebook

## Imports

In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import multimodal_mazes
import seaborn as sns

## Parameters, Hyperparameters and Helper Functions

In [2]:
def environment_hyperparameters():
    width = 21
    height = 20
    pk = 40
    pk_hw = 20
    capture_radius = 1
    return width, height, pk, pk_hw, capture_radius

def agent_hyperparameters(width, height, pk_hw):
    input_dim = 8
    hidden_dim = 10
    action_dim = 2
    actor_lr = 1.0506603198418105e-05
    critic_lr = 0.0005747127451203138
    gamma = 0.9197484689502473
    tau = 0.0009655446721377356
    channels = np.array([1, 1])
    location = np.array([pk_hw + height//2, pk_hw + width//2])
    # location = np.array([pk_hw, pk_hw + width//2])
    return input_dim, hidden_dim, action_dim, actor_lr, critic_lr, gamma, tau, channels, location

def static_hyperparamters():
    noise = 0.49798430512987657
    n_prey = 1
    n_steps = 50
    scenario =  "Static"
    motion = None
    case = None
    multisensory = "Unisensory"
    speed = 0
    pe = 1
    pc = 0.0
    return noise, n_prey, n_steps, scenario, motion, case, multisensory, speed, pe, pc

def constant_hyperparameters():
    noise = 0
    n_prey = 1
    n_steps = 50
    scenario =  "Constant"
    motion = "Linear"
    case = None
    multisensory = "Unisensory" 
    speed = 1
    pe = 1
    pc = 0.0
    return noise, n_prey, n_steps, scenario, motion, case, multisensory, speed, pe, pc

## Static Training Test

In [None]:
width, height, pk, pk_hw, capture_radius = environment_hyperparameters()
input_dim, hidden_dim, action_dim, actor_lr, critic_lr, gamma, tau, channels, location = agent_hyperparameters(width, height, pk_hw)
sensor_noise, n_prey, n_steps, scenario, motion, case, multisensory, speed, pe, pc = static_hyperparamters()

agent = multimodal_mazes.DDPGAgent(input_dim, hidden_dim, action_dim, actor_lr, critic_lr, gamma, tau, channels, capture_radius, location)
training_evaluator = multimodal_mazes.LinearPreyEvaluatorContinuous(width, height, pk_hw, agent, sensor_noise, n_prey, capture_radius, n_steps, n_steps, scenario, motion, case, multisensory, speed, pe, pc)
training_evaluator.train_RL(training_trials=10000)

In [None]:
training_evaluator.training_plots(plot_training_lengths=True, plot_first_5_last_5=True)

## Optuna Hyperparameter Search

In [3]:
width, height, pk, pk_hw, capture_radius = environment_hyperparameters()
input_dim, hidden_dim, action_dim, actor_lr, critic_lr, gamma, tau, channels, location = agent_hyperparameters(width, height, pk_hw)
sensor_noise, n_prey, n_steps, scenario, motion, case, multisensory, speed, pe, pc = static_hyperparamters()

In [4]:
import optuna
from optuna import TrialPruned

def ddpg_objective(trial):
    # Define the hyperparameters to search
    actor_lr = trial.suggest_float('actor_lr', 1e-5, 1e-2, log=True)  # Log scale for learning rates
    critic_lr = trial.suggest_float('critic_lr', 1e-5, 1e-2, log=True)
    hidden_dim = trial.suggest_int('hidden_dim', 8, 256)  # Hidden dimensions for networks
    gamma = trial.suggest_float('gamma', 0.8, 0.999)  # Discount factor
    tau = trial.suggest_float('tau', 1e-5, 1e-2)  # For soft update of target networks
    sensor_noise = trial.suggest_float('noise_scale', 0, 0.5)  # Exploration noise scale

    # Create the agent with the hyperparameters from Optuna
    agent = multimodal_mazes.DDPGAgent(
            input_dim, 
            hidden_dim, 
            action_dim, 
            actor_lr, 
            critic_lr, 
            gamma, 
            tau, 
            channels, 
            capture_radius, 
            location
        )

    total_reward = 0
    
    for episode in tqdm(range(1000)):
        training_trial_instance = multimodal_mazes.PredatorTrialContinuous(width, height, pk_hw, agent, sensor_noise, n_prey, capture_radius, n_steps, n_steps, scenario, motion, case, multisensory, speed, pe, pc)
        training_trial_data = training_trial_instance.run_training_trial()
            
        total_reward += sum(training_trial_data['rewards'])

        trial.report(total_reward, episode)

        if episode % 50 == 0:
            if trial.should_prune():
                raise TrialPruned()

    avg_reward = total_reward / 1000

    return avg_reward


In [None]:
# Create a study object to optimize
pruner = optuna.pruners.MedianPruner(n_warmup_steps=5)
study = optuna.create_study(direction='maximize', pruner=pruner, study_name='DDPG Agent Parameter Tuning')  # We want to maximize the average reward
study.optimize(ddpg_objective, n_trials=100)  # Run 100 trials (adjust based on time)

# Get the best hyperparameters
best_params = study.best_params
print(f"Best parameters: {best_params}")

[I 2024-09-27 15:37:50,736] A new study created in memory with name: DDPG Agent Parameter Tuning
100%|██████████| 1000/1000 [04:04<00:00,  4.09it/s]
[I 2024-09-27 15:41:56,086] Trial 0 finished with value: -417.0585519371758 and parameters: {'actor_lr': 0.0087723640956809, 'critic_lr': 1.5837867985802622e-05, 'hidden_dim': 239, 'gamma': 0.9559845620931456, 'tau': 0.007190300323120642, 'noise_scale': 0.3487406545346927}. Best is trial 0 with value: -417.0585519371758.
100%|██████████| 1000/1000 [03:31<00:00,  4.74it/s]
[I 2024-09-27 15:45:27,116] Trial 1 finished with value: -324.33721673456057 and parameters: {'actor_lr': 0.0009764142920234925, 'critic_lr': 0.00014198380155647632, 'hidden_dim': 241, 'gamma': 0.8040801667714881, 'tau': 0.008899282791554606, 'noise_scale': 0.3973617204717981}. Best is trial 1 with value: -324.33721673456057.
100%|██████████| 1000/1000 [02:51<00:00,  5.83it/s]
[I 2024-09-27 15:48:18,727] Trial 2 finished with value: -746.7387637383708 and parameters: {'actor_lr': 0.0009346194570029213, 'critic_lr': 0.003953651559753783, 'hidden_dim': 145, 'gamma': 0.9883549896706009, 'tau': 0.000773567996835442, 'noise_scale': 0.026528784020361795}. Best is trial 1 with value: -324.33721673456057.
100%|██████████| 1000/1000 [03:59<00:00,  4.18it/s]
[I 2024-09-27 15:52:18,207] Trial 3 finished with value: -800.1864961399702 and parameters: {'actor_lr': 1.5740784198163642e-05, 'critic_lr': 0.0038524675662617637, 'hidden_dim': 229, 'gamma': 0.9867768434083477, 'tau': 0.004199554331677343, 'noise_scale': 0.0373331078043076}. Best is trial 1 with value: -324.33721673456057.
100%|██████████| 1000/1000 [02:49<00:00,  5.88it/s]
[I 2024-09-27 15:55:08,203] Trial 4 finished with value: -781.0711772616644 and parameters: {'actor_lr': 0.0006028524919719193, 'critic_lr': 5.889631327484927e-05, 'hidden_dim': 138, 'gamma': 0.9916123023924568, 'tau': 0.007577590823517911, 'noise_scale': 0.10736156221503468}. Best is trial 1 with value: -324.33721673456057.
100%|██████████| 1000/1000 [03:51<00:00,  4.32it/s]
[I 2024-09-27 15:58:59,893] Trial 5 finished with value: -811.0558677284141 and parameters: {'actor_lr': 0.00010014764165911945, 'critic_lr': 1.1270249036277267e-05, 'hidden_dim': 215, 'gamma': 0.8634895188699411, 'tau': 0.00951430115512719, 'noise_scale': 0.03546387520753236}. Best is trial 1 with value: -324.33721673456057.
100%|██████████| 1000/1000 [02:23<00:00,  6.97it/s]
[I 2024-09-27 16:01:23,338] Trial 6 finished with value: -729.3592905029918 and parameters: {'actor_lr': 0.0006840448034259966, 'critic_lr': 0.0008076908006126648, 'hidden_dim': 101, 'gamma': 0.8266172062484001, 'tau': 0.005021742042401841, 'noise_scale': 0.10088122037753611}. Best is trial 1 with value: -324.33721673456057.
100%|██████████| 1000/1000 [02:08<00:00,  7.76it/s]
[I 2024-09-27 16:03:32,164] Trial 7 finished with value: -266.5839134865015 and parameters: {'actor_lr': 3.156555341888829e-05, 'critic_lr': 0.00013767780322158463, 'hidden_dim': 102, 'gamma': 0.979758738158909, 'tau': 0.009308090322271517, 'noise_scale': 0.4484443819902037}. Best is trial 7 with value: -266.5839134865015.
100%|██████████| 1000/1000 [02:19<00:00,  7.15it/s]
[I 2024-09-27 16:05:52,112] Trial 8 finished with value: -987.0452388298746 and parameters: {'actor_lr': 0.001264541297527209, 'critic_lr': 2.4359886091447854e-05, 'hidden_dim': 48, 'gamma': 0.8042595921657423, 'tau': 0.0006611580890929267, 'noise_scale': 0.013883448945294308}. Best is trial 7 with value: -266.5839134865015.
100%|██████████| 1000/1000 [02:28<00:00,  6.73it/s]
[I 2024-09-27 16:08:20,671] Trial 9 finished with value: -783.5608680887241 and parameters: {'actor_lr': 0.0005420955290819098, 'critic_lr': 0.00038974932084839196, 'hidden_dim': 70, 'gamma': 0.9540729396144148, 'tau': 0.006404864928814715, 'noise_scale': 0.0004948111594013094}. Best is trial 7 with value: -266.5839134865015.
100%|██████████| 1000/1000 [01:30<00:00, 11.06it/s]
[I 2024-09-27 16:09:51,076] Trial 10 finished with value: -244.47979636717886 and parameters: {'actor_lr': 1.2731638101785453e-05, 'critic_lr': 0.000817515236292217, 'hidden_dim': 8, 'gamma': 0.9115849218071568, 'tau': 0.003078162165583782, 'noise_scale': 0.4939903351966581}. Best is trial 10 with value: -244.47979636717886.
100%|██████████| 1000/1000 [01:30<00:00, 11.03it/s]
[I 2024-09-27 16:11:21,776] Trial 11 finished with value: -226.3789257077783 and parameters: {'actor_lr': 1.3193409368890702e-05, 'critic_lr': 0.001035505525557857, 'hidden_dim': 11, 'gamma': 0.9061981926573026, 'tau': 0.0031788495251878677, 'noise_scale': 0.49671835741773906}. Best is trial 11 with value: -226.3789257077783.
...
100%|██████████| 1000/1000 [01:19<00:00, 12.58it/s]
[I 2024-09-27 18:44:00,174] Trial 98 finished with value: -192.45302088383679 and parameters: {'actor_lr': 1.543848219611113e-05, 'critic_lr': 0.0004363089313868234, 'hidden_dim': 19, 'gamma': 0.9557418507490619, 'tau': 0.0032328123276313234, 'noise_scale': 0.48754761786617307}. Best is trial 46 with value: -134.2939284916707.
100%|██████████| 1000/1000 [01:22<00:00, 12.13it/s]
[I 2024-09-27 18:45:22,620] Trial 99 finished with value: -201.80471189814202 and parameters: {'actor_lr': 1.1416834016311754e-05, 'critic_lr': 0.0003279227369016091, 'hidden_dim': 40, 'gamma': 0.8748100278595965, 'tau': 0.0023443285915074395, 'noise_scale': 0.45887538929301386}. Best is trial 46 with value: -134.2939284916707.
Output is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings...
Best parameters: {'actor_lr': 1.0506603198418105e-05, 'critic_lr': 0.0005747127451203138, 'hidden_dim': 10, 'gamma': 0.9197484689502473, 'tau': 0.0009655446721377356, 'noise_scale': 0.49798430512987657}

In [23]:
# # Visualize optimization history
# optuna.visualization.plot_optimization_history(study)

# # Visualize the hyperparameter importance
# optuna.visualization.plot_param_importances(study)