In [1]:
!pip install numpy gymnasium wandb matplotlib



In [2]:
import numpy as np
import gymnasium as gym
import wandb
import random
import matplotlib.pyplot as plt
from collections import defaultdict

# Set random seed for reproducibility
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    gym.utils.seeding.np_random(seed)
    return seed

# Discretize the continuous state space
class StateDiscretizer:
    def __init__(self, n_bins=10):
        self.n_bins = n_bins
        # CartPole-v1 state space bounds
        self.cart_position_bins = np.linspace(-2.4, 2.4, n_bins)
        self.cart_velocity_bins = np.linspace(-4, 4, n_bins)
        self.pole_angle_bins = np.linspace(-0.2095, 0.2095, n_bins)
        self.pole_velocity_bins = np.linspace(-4, 4, n_bins)

    def discretize(self, state):
        cart_pos, cart_vel, pole_angle, pole_vel = state

        # Discretize each dimension
        cart_pos_bin = np.digitize(cart_pos, self.cart_position_bins)
        cart_vel_bin = np.digitize(cart_vel, self.cart_velocity_bins)
        pole_angle_bin = np.digitize(pole_angle, self.pole_angle_bins)
        pole_vel_bin = np.digitize(pole_vel, self.pole_velocity_bins)

        # Combine into a single discrete state
        return (cart_pos_bin, cart_vel_bin, pole_angle_bin, pole_vel_bin)

# SARSA Agent with epsilon-greedy exploration
class SARSAAgent:
    def __init__(self, state_discretizer, action_space, alpha=0.1, gamma=0.99, epsilon=0.1):
        self.state_discretizer = state_discretizer
        self.action_space = action_space
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.q_table = defaultdict(lambda: np.zeros(action_space.n))

    def select_action(self, state):
        state = self.state_discretizer.discretize(state)

        # Epsilon-greedy policy
        if np.random.random() < self.epsilon:
            return self.action_space.sample()  # Explore
        else:
            return np.argmax(self.q_table[state])  # Exploit

    def update(self, state, action, reward, next_state, next_action, done):
        state = self.state_discretizer.discretize(state)
        next_state = self.state_discretizer.discretize(next_state)

        # SARSA update rule
        current_q = self.q_table[state][action]

        if done:
            target_q = reward
        else:
            target_q = reward + self.gamma * self.q_table[next_state][next_action]

        # Update Q-value
        self.q_table[state][action] += self.alpha * (target_q - current_q)

# Q-Learning Agent with Softmax exploration
class QLearningAgent:
    def __init__(self, state_discretizer, action_space, alpha=0.1, gamma=0.99, temperature=1.0):
        self.state_discretizer = state_discretizer
        self.action_space = action_space
        self.alpha = alpha
        self.gamma = gamma
        self.temperature = temperature
        self.q_table = defaultdict(lambda: np.zeros(action_space.n))

    def select_action(self, state):
        state = self.state_discretizer.discretize(state)

        # Softmax exploration
        q_values = self.q_table[state]
        exp_q = np.exp(q_values / self.temperature)
        probabilities = exp_q / np.sum(exp_q)

        # Sample action based on probabilities
        return np.random.choice(self.action_space.n, p=probabilities)

    def update(self, state, action, reward, next_state, done):
        state = self.state_discretizer.discretize(state)
        next_state = self.state_discretizer.discretize(next_state)

        # Q-Learning update rule
        current_q = self.q_table[state][action]

        if done:
            target_q = reward
        else:
            target_q = reward + self.gamma * np.max(self.q_table[next_state])

        # Update Q-value
        self.q_table[state][action] += self.alpha * (target_q - current_q)

# Training function for SARSA
def train_sarsa(config=None):
    with wandb.init(config=config):
        config = wandb.config

        # Fixed seeds
        seeds = [42, 123, 456, 789, 101]
        all_final_rewards = []

        for seed in seeds:
            # Set environment
            env = gym.make("CartPole-v1")

            # Set random seed
            set_seed(seed)

            # Initialize state discretizer
            state_discretizer = StateDiscretizer(n_bins=config.n_bins)

            # Initialize SARSA agent
            agent = SARSAAgent(
                state_discretizer=state_discretizer,
                action_space=env.action_space,
                alpha=config.alpha,
                gamma=config.gamma,
                epsilon=config.epsilon
            )

            # Training loop
            episode_rewards = []

            for episode in range(config.n_episodes):
                state, _ = env.reset(seed=seed + episode)
                action = agent.select_action(state)
                done = False
                episode_reward = 0

                while not done:
                    next_state, reward, terminated, truncated, _ = env.step(action)
                    done = terminated or truncated
                    episode_reward += reward

                    # Select next action using current policy
                    next_action = agent.select_action(next_state)

                    # Update Q-values
                    agent.update(state, action, reward, next_state, next_action, done)

                    # Move to next state and action
                    state = next_state
                    action = next_action

                episode_rewards.append(episode_reward)

                # Log individual seed performance
                wandb.log({
                    f"seed_{seed}_episode": episode,
                    f"seed_{seed}_reward": episode_reward,
                })

            # Store final performance (avg of last 50 episodes)
            final_reward = np.mean(episode_rewards[-50:])
            all_final_rewards.append(final_reward)
            env.close()

        # Calculate average performance across all seeds
        avg_final_reward = np.mean(all_final_rewards)

        # Log the average performance
        wandb.log({
            "average_reward_across_seeds": avg_final_reward
        })

        return avg_final_reward

# Training function for Q-Learning
# Modified training function for Q-Learning
def train_qlearning(config=None):
    with wandb.init(config=config):
        config = wandb.config

        # Fixed seeds
        seeds = [42, 123, 456, 789, 101]
        all_final_rewards = []

        for seed in seeds:
            # Set environment
            env = gym.make("CartPole-v1")

            # Set random seed
            set_seed(seed)

            # Initialize state discretizer
            state_discretizer = StateDiscretizer(n_bins=config.n_bins)

            # Initialize Q-Learning agent
            agent = QLearningAgent(
                state_discretizer=state_discretizer,
                action_space=env.action_space,
                alpha=config.alpha,
                gamma=config.gamma,
                temperature=config.temperature
            )

            # Training loop
            episode_rewards = []

            for episode in range(config.n_episodes):
                state, _ = env.reset(seed=seed + episode)
                done = False
                episode_reward = 0

                while not done:
                    action = agent.select_action(state)
                    next_state, reward, terminated, truncated, _ = env.step(action)
                    done = terminated or truncated
                    episode_reward += reward

                    # Update Q-values
                    agent.update(state, action, reward, next_state, done)

                    # Move to next state
                    state = next_state

                episode_rewards.append(episode_reward)

                # Log individual seed performance
                wandb.log({
                    f"seed_{seed}_episode": episode,
                    f"seed_{seed}_reward": episode_reward,
                })

            # Store final performance (avg of last 50 episodes)
            final_reward = np.mean(episode_rewards[-50:])
            all_final_rewards.append(final_reward)
            env.close()

        # Calculate average performance across all seeds
        avg_final_reward = np.mean(all_final_rewards)

        # Log the average performance
        wandb.log({
            "average_reward_across_seeds": avg_final_reward
        })

        return avg_final_reward


# Sweep configuration for SARSA
# Updated sweep configuration for SARSA
sarsa_sweep_config = {
    'method': 'bayes',
    'metric': {
        'name': 'average_reward_across_seeds',
        'goal': 'maximize'
    },
    'parameters': {
        'alpha': {
            'min': 0.01,
            'max': 0.5
        },
        'gamma': {
            'value': 0.99
        },
        'epsilon': {
            'min': 0.01,
            'max': 0.3
        },
        'n_bins': {
            'values': [10, 15, 20]
        },
        'n_episodes': {
            'value': 1000
        }
    }
}

# Sweep configuration for Q-Learning
qlearning_sweep_config = {
    'method': 'bayes',  # Bayesian optimization
    'metric': {
        'name': 'average_reward',
        'goal': 'maximize'
    },
    'parameters': {
        'alpha': {
            'min': 0.01,
            'max': 0.5
        },
        'gamma': {
            'value': 0.99  # Fixed as recommended
        },
        'temperature': {
            'min': 0.1,
            'max': 2.0
        },
        'n_bins': {
            'values': [10, 15, 20]
        },
        'n_episodes': {
            'value': 1000
        },
        'seed': {
            'values': [42, 123, 456, 789, 101]  # 5 random seeds
        }
    }
}







In [3]:
# Initialize and run the SARSA sweep
def run_sarsa_sweep():
    sweep_id = wandb.sweep(sarsa_sweep_config, project="RL_CartPole_SARSA")
    wandb.agent(sweep_id, function=train_sarsa, count=50)

# Initialize and run the Q-Learning sweep
def run_qlearning_sweep():
    sweep_id = wandb.sweep(qlearning_sweep_config, project="RL_CartPole_QLearning")
    wandb.agent(sweep_id, function=train_qlearning, count=50)
# Run the sweeps
if __name__ == "__main__":
    # Uncomment one of these to run the respective sweep
    run_sarsa_sweep()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Create sweep with ID: axs367lv
Sweep URL: https://wandb.ai/abhijithvinod-indian-institue-of-technology-madras/RL_CartPole_SARSA/sweeps/axs367lv


[34m[1mwandb[0m: Agent Starting Run: cu4cnh5n with config:
[34m[1mwandb[0m: 	alpha: 0.41040395202036256
[34m[1mwandb[0m: 	epsilon: 0.21065430640253097
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: Currently logged in as: [33mabhijithvinod[0m ([33mabhijithvinod-indian-institue-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇█
seed_101_reward,▁▁▁▁▂▁▃▂▂▂▂▂▃▃▂▅▃▂▂▂▃▁▂▂▂▄▁▅▃▃▃▃▃▂▂▂█▂▁▄
seed_123_episode,▁▁▁▁▁▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇███
seed_123_reward,▁▂▁▂▁▂▂▂▂▃▄▂▄▃▂▃▄▂▇▂▅▅█▃▂▃▂▂▆▇▃▃▃▄▃▃▅▅▃▃
seed_42_episode,▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇████
seed_42_reward,▁▁▁▁▂▁▂▂▂▃▂▂▂▃█▃▃▃▇▃▄▄▃▄▇▅▃▃▅▄▂▁▄▄▆▄▄▅▅▃
seed_456_episode,▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇██
seed_456_reward,▁▁▁▁▁▁▃▂▁▂▂▂▂▂▁▂▂▆▂▁▂▄▂▃▃▂▁▂▃▃▂▂▃▂▃▂▃▃█▂
seed_789_episode,▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇██

0,1
average_reward_across_seeds,88.148
seed_101_episode,999.0
seed_101_reward,133.0
seed_123_episode,999.0
seed_123_reward,112.0
seed_42_episode,999.0
seed_42_reward,100.0
seed_456_episode,999.0
seed_456_reward,123.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: 4jpgoygl with config:
[34m[1mwandb[0m: 	alpha: 0.17452136835253076
[34m[1mwandb[0m: 	epsilon: 0.1599508038989445
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇████
seed_101_reward,▅▁▁▂▁▁▁▁▂▁▁▁▁▁▁▁▂▂▁▃▂▂▂▂▅▁▆▃▃▅▅▁▅▂▆▃▆█▇▆
seed_123_episode,▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇█
seed_123_reward,▁▂▁▁▁▁▁▁▁▂▂▂▂▁▁▃▂▃▂▂▂▁▄▄▄▂▂▆▃█▅▄▅▂▇▅▅▁▅▄
seed_42_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
seed_42_reward,▂▃▂▁▁▂▂▁▂▂▂▁▁▄▂▁▃▄▄▃▃▃▃▃▂▂▂▄▄▁▃▇▆▆▇▄█▇█▁
seed_456_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇█
seed_456_reward,▁▁▂▂▁▁▁▁▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▁▂▃▂▄▅▃▆▅▃█▃▆▅
seed_789_episode,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇███

0,1
average_reward_across_seeds,49.2
seed_101_episode,999.0
seed_101_reward,57.0
seed_123_episode,999.0
seed_123_reward,79.0
seed_42_episode,999.0
seed_42_reward,59.0
seed_456_episode,999.0
seed_456_reward,27.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: c8x1eqe8 with config:
[34m[1mwandb[0m: 	alpha: 0.026185772491887825
[34m[1mwandb[0m: 	epsilon: 0.2980713188350562
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇███
seed_101_reward,▁▁▂▃▂▂▃▂▆▁▂▃▂▃▄▆▁▁▁▁▄▁▁▃▄▅▅▁▅▂▂▁▂▃▃█▁▃▁▃
seed_123_episode,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇█
seed_123_reward,▂█▄▂▅▁▇▂▃▁▁▁▅▁▁▁▁▁▃▆▅▁▂▁▂▄▅▅▂▁▂▃▂▂▆▂▃▁▇▁
seed_42_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇█████
seed_42_reward,▂▃▃▂▃▄▃▃▅▄▅▂▃▂▂▆▁▄▂▂▁▂▂█▂▃▂▂▃▄▂▂▂▂▁▁▂▃▃▂
seed_456_episode,▁▁▁▁▁▂▂▂▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇██
seed_456_reward,▁▂▂▃▄▁▁▁▂▂▃▂▂▂▁█▂▄▁▂▃▃▁▂▂▂▃▂▁▃▁▂▁▁▂▁▂▆▁▃
seed_789_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇███

0,1
average_reward_across_seeds,17.18
seed_101_episode,999.0
seed_101_reward,22.0
seed_123_episode,999.0
seed_123_reward,41.0
seed_42_episode,999.0
seed_42_reward,11.0
seed_456_episode,999.0
seed_456_reward,28.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: eyatf8eq with config:
[34m[1mwandb[0m: 	alpha: 0.011566259510949136
[34m[1mwandb[0m: 	epsilon: 0.24256618487479287
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇████
seed_101_reward,▁▃▁▅▂▄▂▃▄▄▂▆▂▁▂▂▂▄▂▂▃▄█▄▃▁▃▃▂▂▃▂▁▅▆▂▂▄▃█
seed_123_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇██
seed_123_reward,▁▁▄▂▂▃▁▂▁▁▁▁▁▂▁▂▂▂▁▂▂▁█▂▁▂▁▁▁▁▁▃▁▂▁▂▂▂▁▁
seed_42_episode,▁▁▁▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇███
seed_42_reward,▃▂▄▁▄▁▂▁▃▁▁▂▁▂▁▃▅▅▁▁▅▅▁▁▂▅▂▂▁▁▁▇▄▂▂▁▂█▂▁
seed_456_episode,▁▁▁▂▂▃▃▃▃▃▄▄▅▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇██████
seed_456_reward,▁▃▁▂▃▁▂▃▂▁▃▄▂▂▁▂▃▁▂▃▁▃▃█▂▃▅▁▁▂▂▂▃▂▄▂▂▁▁▁
seed_789_episode,▁▁▁▁▁▁▁▁▁▁▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇█

0,1
average_reward_across_seeds,13.608
seed_101_episode,999.0
seed_101_reward,25.0
seed_123_episode,999.0
seed_123_reward,9.0
seed_42_episode,999.0
seed_42_reward,9.0
seed_456_episode,999.0
seed_456_reward,14.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: opv8ta4l with config:
[34m[1mwandb[0m: 	alpha: 0.4707214526307355
[34m[1mwandb[0m: 	epsilon: 0.15893610149966508
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇███
seed_101_reward,▁▁▁▁▁▂▂▂▃▁▄▃▃▄▄▂█▁▆▃▃▃▂▃▂▃▂▃▂▃▃▂▂▃▂▃▃▄▃▆
seed_123_episode,▁▁▁▁▂▂▂▂▂▃▃▃▄▄▄▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇█
seed_123_reward,▁▁▁▁▁▁▁▁▂▂▁▃▂▃▃▂▃▂▂▃▂▃▃▂▄▂▁▃▃▄▂▇█▂▄▇▄▂▃▃
seed_42_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
seed_42_reward,▁▁▁▁▁▂▁▁▁▂▂▁▂▅▃▄▃▅▃▂▄▆▆▄▆▄▃▅▇▃▂▃█▄▅▄▄▄▄█
seed_456_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇████
seed_456_reward,▁▁▁▁▁▂▂▃▃▃▂▅▄▃▃▃▃▄▅▆▄▄▄▂▄▇▆▅▅▅▅▄▅▂▅▅▃█▄▅
seed_789_episode,▁▁▁▁▁▂▂▂▂▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇███

0,1
average_reward_across_seeds,98.136
seed_101_episode,999.0
seed_101_reward,51.0
seed_123_episode,999.0
seed_123_reward,86.0
seed_42_episode,999.0
seed_42_reward,84.0
seed_456_episode,999.0
seed_456_reward,54.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: v83f71pd with config:
[34m[1mwandb[0m: 	alpha: 0.49880082540963705
[34m[1mwandb[0m: 	epsilon: 0.061578102449030295
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇██
seed_101_reward,▁▁▁▁▁▁▁▁▁▁▂▄▁▁▁▁▂▁▂▄▃▁▁▂▁▂▂▂▆█▃▁▃▄▇▄▅▄▆▄
seed_123_episode,▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
seed_123_reward,▁▁▁▁▁▁▁▁▆▁▁▁▁▁▁▁▃▁▁▄▂▃▂█▇▆▄▂▂▂▁▇▄▂▄▃▆▄▆▃
seed_42_episode,▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇██
seed_42_reward,▂▁▂▁▁▁▁▁▁▁▁▂▁▁▁▄▁▁▄▂█▇▆▁▃▅▅▁▄▆▇▆▄▅▇▂▃▃▇▅
seed_456_episode,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇█████
seed_456_reward,▂▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁█▃▃▂▂▁▂▁▃▃▃▃▃▁▁▂▂▁▄▂▂▁▂▂
seed_789_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇█

0,1
average_reward_across_seeds,71.216
seed_101_episode,999.0
seed_101_reward,91.0
seed_123_episode,999.0
seed_123_reward,79.0
seed_42_episode,999.0
seed_42_reward,41.0
seed_456_episode,999.0
seed_456_reward,60.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: m59i5398 with config:
[34m[1mwandb[0m: 	alpha: 0.497038972586999
[34m[1mwandb[0m: 	epsilon: 0.16892771658108116
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇██
seed_101_reward,▁▁▁▁▁▃▂▃▁▂▄▃▅▂▂▃▃▃▂▂▄▂▂▂▂▁▃▃▃▃▂▅▃▂▄▂▃▄█▃
seed_123_episode,▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇█
seed_123_reward,▁▁▁▁▁▁▁▂▂▁▁▁▂▂▃▂▄▄▃▄▃▃▆▃▅▅▄▇▃▄▅▅▄▂▆▆▃▃█▅
seed_42_episode,▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇▇███
seed_42_reward,▁▁▁▁▁▁▁▁▁▁▂▁▂▁▂▃▃▅▂▃▂▅▂▇▂▃▃▄▆█▄▄▅▃▅▄▃▅▆▆
seed_456_episode,▁▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇███
seed_456_reward,▁▁▁▁▁▁▁▁▂▂▂▁▂▂▃▄▃▅▇▂▅▃▃▄▄▄▄▄▃▆▅▃▄▆▆▄▅▅██
seed_789_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇███

0,1
average_reward_across_seeds,100.532
seed_101_episode,999.0
seed_101_reward,223.0
seed_123_episode,999.0
seed_123_reward,140.0
seed_42_episode,999.0
seed_42_reward,79.0
seed_456_episode,999.0
seed_456_reward,323.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: mifg96n8 with config:
[34m[1mwandb[0m: 	alpha: 0.499620243766804
[34m[1mwandb[0m: 	epsilon: 0.2921404639962764
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇██
seed_101_reward,▂▁▁▁▁▂▂▁▄▃▂▃▂▃▂▄▂▆▁▂▃▃▃▃▄▂█▃▃▃▃▃▂▂▆▃▄▂▁▂
seed_123_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
seed_123_reward,▄▂▁▁▁▃▁▅▁▂▅▃▂▄▄▂▃▄▅▅▄▁▁▂▂▂▃▄▄▂█▂▃▁▄▆▁▄▂▂
seed_42_episode,▁▁▁▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇███
seed_42_reward,▁▁▁▁▁▁▁▁▁▁▂▁▁█▂▃▂▃▅▂▂▂▁▄▃▄▂▃▁▃▄▅▄▅▅▂▇▆▃▃
seed_456_episode,▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇██
seed_456_reward,▁▁▂▃▃▁▂▃▁▂▂▄▃▄▃▂▅▃▅▃▃▂▅▄▄▂▅▄▄▂▄▃▂█▃▄▅▄▅▃
seed_789_episode,▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇█████

0,1
average_reward_across_seeds,80.816
seed_101_episode,999.0
seed_101_reward,101.0
seed_123_episode,999.0
seed_123_reward,150.0
seed_42_episode,999.0
seed_42_reward,45.0
seed_456_episode,999.0
seed_456_reward,165.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: qfqrc3hl with config:
[34m[1mwandb[0m: 	alpha: 0.479355980641493
[34m[1mwandb[0m: 	epsilon: 0.24237281340916653
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 10
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▂▂▂▂▂▂▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇████████
seed_101_reward,▁▁▂▁▂▁▂▂▂▃▂▂▃▃▃▂▃▃▄▃▂▂▂▃▃▂▃▇▂▃▁▃▅▃▃▂▁█▃▄
seed_123_episode,▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▇▇▇▇▇▇▇▇███████
seed_123_reward,▁▁▁▁▂▁▁▁▁▄▇█▂▂▂▁▃▂▂▂▁▄▂▂▄▄▄▃▁▄▅▂▆▇▃▃▄▄▂▅
seed_42_episode,▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇█
seed_42_reward,▁▂▂▂▁▄▂▂▂▂▃▁▂▃▄▁▃▂▂▁▃▂▂▂▃▄▂▄▃▃▆▃▃▄▁▅▃█▂▁
seed_456_episode,▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇██
seed_456_reward,▁▂▂▂▁▁▁▅▄▂▂▃▁▂▃▃▂▆▃▅▃▂▁▅▂▄▂▂▅▄▁▁▂▅▇▄▃█▆▄
seed_789_episode,▁▁▁▁▂▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇██

0,1
average_reward_across_seeds,78.56
seed_101_episode,999.0
seed_101_reward,80.0
seed_123_episode,999.0
seed_123_reward,108.0
seed_42_episode,999.0
seed_42_reward,56.0
seed_456_episode,999.0
seed_456_reward,96.0
seed_789_episode,999.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: wdkd46kx with config:
[34m[1mwandb[0m: 	alpha: 0.48938156748358275
[34m[1mwandb[0m: 	epsilon: 0.1925309693820992
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇█████
seed_101_reward,▁▁▁▁▁▁▁▂▁▁▄▃▃▂▄▅█▂▃▃▃▄▃▂▃▃▃▁▄▅▃▂▁▃▃▂▄▄▁▆
seed_123_episode,▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
seed_123_reward,▁▁▁▁▂▁▁▁▁▁▂▂▁▁▂▂▃▃▅▃▂▃▂▃▅▃█▄▆▂▅▃▃▃▄▃▄▃▄▄
seed_42_episode,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▅▆▇▇▇▇█████
seed_42_reward,▄▁▁▃▂▃▅▂▃▄▅▁█▅▄▄▄▇▂▂▄▃▄▃▂▇▄▃▄▄▃▅▅▅▂▂▂▃▄▄
seed_456_episode,▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇██
seed_456_reward,▁▁▁▁▁▁▁▂▂▄▂▂▂▃▁▂▃▃▃▆▂▄▁▁▂▇▂▃▃▃▄▃▂▂█▄▄▃▃▇
seed_789_episode,▁▁▁▁▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▆▇▇▇████

0,1
average_reward_across_seeds,90.56
seed_101_episode,999.0
seed_101_reward,175.0
seed_123_episode,999.0
seed_123_reward,136.0
seed_42_episode,999.0
seed_42_reward,60.0
seed_456_episode,999.0
seed_456_reward,91.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: hdpwt7m6 with config:
[34m[1mwandb[0m: 	alpha: 0.495504891784024
[34m[1mwandb[0m: 	epsilon: 0.11960677262539932
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▂▂▃▃▃▄▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇█████
seed_101_reward,▁▁▁▁▁▁▁▁▂▂▂▂▁▁▃▃▇▃▂▃▃▄▃▅▅▄▂▅▂▅▃▄▃▅▇▅▄▆█▄
seed_123_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▇▇▇▇▇███
seed_123_reward,▁▁▁▁▁▂▁▁▂▅▂▃▆▅▄▃▄▅▅▄▃█▅▃▃▇▄▄▇▄▆▄▅▄▃▃▂▁▅▄
seed_42_episode,▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇██
seed_42_reward,▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▂▂▁▁▁▂▁▃▂▃▃▂▄▃▄▃▃▂▄▅▂▃▄▃█
seed_456_episode,▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇█████
seed_456_reward,▆▃▁▂▁▄▂▁▃▁▄▄▁▂▁▅▂▄▅▂▅▄▅▂▄▄▄█▃▇▄▃▃▄▆▄▅▄▇▅
seed_789_episode,▁▁▁▁▁▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇███

0,1
average_reward_across_seeds,88.152
seed_101_episode,999.0
seed_101_reward,15.0
seed_123_episode,999.0
seed_123_reward,90.0
seed_42_episode,999.0
seed_42_reward,54.0
seed_456_episode,999.0
seed_456_reward,28.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: cg6bp8o7 with config:
[34m[1mwandb[0m: 	alpha: 0.22360612119964
[34m[1mwandb[0m: 	epsilon: 0.011618611334325731
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 10
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▂▂▂▂▃▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███████
seed_101_reward,▃█▃▆▁▃█▃▆▁▃▁▃▆█▃▃▃▆▁▆▆▆▁▃▃▆▃▁▃▆▁▃▁▆█▆▆▃█
seed_123_episode,▁▁▂▂▂▂▂▂▂▂▂▂▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇██
seed_123_reward,▆▆▃▃▆▁▃▆▆▃█▃▁█▃▆▆▃▃▆▁▆▆▆█▃▆▁▆▆▆▆▃▁▁▃▃▃▃▃
seed_42_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▇▇▇▇████
seed_42_reward,▃▂▇▃▇▂▃▂▂▃▃▇▂▅▃▅▂▃▂▂▃▂▃▂▂▂▁▂▁▂▃▃▆▄█▂▂▁▆█
seed_456_episode,▁▁▁▁▁▁▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇███
seed_456_reward,█▆▆▅▃▆▁▃▆▃▃▃▆▆▃▅▆▃▆▃▆▃▆▃▃▆▆▆█▆▆▆▆▃▆▃▃▆▆▃
seed_789_episode,▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇██

0,1
average_reward_across_seeds,10.54
seed_101_episode,999.0
seed_101_reward,9.0
seed_123_episode,999.0
seed_123_reward,9.0
seed_42_episode,999.0
seed_42_reward,8.0
seed_456_episode,999.0
seed_456_reward,12.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: ipekqbbv with config:
[34m[1mwandb[0m: 	alpha: 0.4855342066681259
[34m[1mwandb[0m: 	epsilon: 0.17134967190765055
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▂▂▂▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇█████
seed_101_reward,▁▁▁▂▁▁▂▂▁▁▁▆▆▄▄▄▄▅▃▄▅▃▆▂▄▆▅▄▅▄▁▃█▁▅▄▃▄▅▅
seed_123_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇███
seed_123_reward,▁▃▁▁█▅▂▃▄▄▃▇▃▂▄▂█▅▂▄▂▆▃▃▃▃▄▄▄▆▂▄▄▅▅▇▁▄▄▆
seed_42_episode,▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇▇▇██
seed_42_reward,▁▁▂▁▁▂▁▂▁▁▃▄▁▁▂▄▆▄▃█▄▇█▄▄▇▂█▃▇█▇▆▆▅▆▅▇█▃
seed_456_episode,▁▁▁▂▂▂▃▃▃▃▃▃▃▃▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇█████
seed_456_reward,▁▁▁▁▁▄▂▂▄▁▂▂▂▃█▄▅▃▃▄▄▁▄▅▃▃▅▂▂▃▃▄▃▂▆▅▅▄▄▅
seed_789_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇▇████

0,1
average_reward_across_seeds,85.132
seed_101_episode,999.0
seed_101_reward,74.0
seed_123_episode,999.0
seed_123_reward,133.0
seed_42_episode,999.0
seed_42_reward,76.0
seed_456_episode,999.0
seed_456_reward,44.0
seed_789_episode,999.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8j5ezmlr with config:
[34m[1mwandb[0m: 	alpha: 0.44599172795783854
[34m[1mwandb[0m: 	epsilon: 0.058512736417160566
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
seed_101_reward,▁▁▁▁▁▁▁▁▁▁▁▂▁▁▂▂▂▂▃▂█▂▂▃▆▃▂▂▂▃▂▄▅▆▅▂█▆▆█
seed_123_episode,▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
seed_123_reward,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▁▁▂▆▆▃▅▆▄▇▆▅▂▂▁▁▅█
seed_42_episode,▁▁▁▁▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
seed_42_reward,▁▁▁▁▁▁▁▁▁▁▁▂▂▁▂▁▁▂▂▂▁▂▂▂▁▂▁▃▂▁▂▂▄▃▃▃▄▃█▂
seed_456_episode,▁▁▁▁▁▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
seed_456_reward,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▃▅▃▅▂▁█▄▅▃▄▄▆▆
seed_789_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇██

0,1
average_reward_across_seeds,61.592
seed_101_episode,999.0
seed_101_reward,71.0
seed_123_episode,999.0
seed_123_reward,59.0
seed_42_episode,999.0
seed_42_reward,20.0
seed_456_episode,999.0
seed_456_reward,24.0
seed_789_episode,999.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: v0xv1fma with config:
[34m[1mwandb[0m: 	alpha: 0.18975989934124332
[34m[1mwandb[0m: 	epsilon: 0.04214355569572176
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 10
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇███
seed_101_reward,▂▂▁▂▂▂▁▂▂▂▂▂▂▃▂▂▂▂▂▃▂▂▃▂▂▂▂▃▃▃▂▂▂▂▅▇█▂▇▂
seed_123_episode,▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇█
seed_123_reward,▃▂▂▃▁▁▁▂▃▁▂▃▂▃▃▂▂▃▃▃▁▂▂▃▃▃▃▇▆▃▄▄█▃▇▄▆▇▃▆
seed_42_episode,▁▁▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
seed_42_reward,▂▁▂▃▃▅▁▂▃▃▁▂▄▁▅▂▂▅▃▃▂▅▅▃▄▄▁▂▅▅▂▂▂▅▅▃█▃▆▂
seed_456_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇██
seed_456_reward,▂▂▁▁▂▂▂▂▃▆▂▁▁▂▂▃▂▁▂▂▃▂▁▁▃▁▅▂▃▂▅▃▆▅▆▄▄▃▆█
seed_789_episode,▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▇▇█████

0,1
average_reward_across_seeds,15.38
seed_101_episode,999.0
seed_101_reward,12.0
seed_123_episode,999.0
seed_123_reward,13.0
seed_42_episode,999.0
seed_42_reward,12.0
seed_456_episode,999.0
seed_456_reward,32.0
seed_789_episode,999.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: u8ssgah3 with config:
[34m[1mwandb[0m: 	alpha: 0.459803693739701
[34m[1mwandb[0m: 	epsilon: 0.14483110510089456
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇████
seed_101_reward,▁▁▁▁▁▁▂▁▁▁▂▁▁▁▁▃▃▃▂▄▂▃▁▇▅▅▄▂▂█▅▄▂▄▂▁▃▃▄▃
seed_123_episode,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇█
seed_123_reward,▁▁▁▁▁▁▁▁▁▂▁▂▄▃▄▃▇▃▁▄▄▃▃▃▄▃█▃▄▃▄▁█▅▆▆▄▂▁▃
seed_42_episode,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
seed_42_reward,▁▁▁▂▁▁▁▂▁▁▂▂▂▂▂▂▂▁▄▄▄▁▇▄▆▂▇▃▃▅█▄▆▅▆▆▆▄▄▅
seed_456_episode,▁▁▁▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇████
seed_456_reward,▁▁▁▁▁▁▁▁▂▁▂▂▂▂▁▂▄▂▄▁▂▂▂▅▆▄▂▃▃▄▂▃▂▃▄▅▆█▄▃
seed_789_episode,▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██

0,1
average_reward_across_seeds,95.132
seed_101_episode,999.0
seed_101_reward,143.0
seed_123_episode,999.0
seed_123_reward,58.0
seed_42_episode,999.0
seed_42_reward,38.0
seed_456_episode,999.0
seed_456_reward,69.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: am24w6zy with config:
[34m[1mwandb[0m: 	alpha: 0.4902088077095058
[34m[1mwandb[0m: 	epsilon: 0.15457860481720737
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇████
seed_101_reward,▁▁▁▁▁▁▂▁▁▂▂▂▁▂▃▃▂▄▃▃▃▅▃▅▅▆▄▃▁▃▅▆▂█▄▃▄▄▇▇
seed_123_episode,▁▁▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇██
seed_123_reward,▁▁▁▁▁▁▁▂▂▄▂▃█▄▂▂▃▃▃▆▃▁▂▅▃▄▅▃▅▄▃▃▁▄▂▄▆▃▅▃
seed_42_episode,▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▇▇▇▇█
seed_42_reward,▁▃▃▂▂▄▃▄▃▂▂▂▆▄▇▄▁▄▅▄▃▅▃▃▃▄▅▄█▁▃▅▄▇▅▃▃▇▅▃
seed_456_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███
seed_456_reward,▂▂▁▂▁▅▄▁▃▃▃█▅▂▄▃▄▃▅▆▃▄▇▄▃▄▃▂▇▆▆█▃▆▆▆▄█▅▆
seed_789_episode,▁▁▁▁▁▁▂▂▂▂▂▂▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇███

0,1
average_reward_across_seeds,89.868
seed_101_episode,999.0
seed_101_reward,92.0
seed_123_episode,999.0
seed_123_reward,68.0
seed_42_episode,999.0
seed_42_reward,49.0
seed_456_episode,999.0
seed_456_reward,98.0
seed_789_episode,999.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: wtt8gc8s with config:
[34m[1mwandb[0m: 	alpha: 0.2005080827177665
[34m[1mwandb[0m: 	epsilon: 0.1976414909162829
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 10
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇█
seed_101_reward,▂▁▁▁▁▁▂▂▂█▁▁▃▂▂▂▂▃▁▄▄▂▃▃▂▆▅▅▃▄▂▃▂▃▄▅▄▄▆▄
seed_123_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇████
seed_123_reward,▁▁▁▁▁▁▂▁▂▂▂▂▂▂▂▃▂▂▂▃▄▄▆▃▆▇▅▅▅▅▆▆▆█▅▆▅▆▆▇
seed_42_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇███
seed_42_reward,▁▁▂▁▁▁▂▁▂▁▂▂▂▂▇▂▂▄▄▆▄▅▅▅▅▄▄▆▃▅█▅▅▇▅▄▁▆▆▄
seed_456_episode,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇█
seed_456_reward,▁▂▁▁▃▁▁▃▃▃▄▃▂▃▃▁▂▄▄▅▂▂▃▃▃▃▂▆▄▄▄▅▄▅▄▆▆█▅▆
seed_789_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▇▇▇▇█

0,1
average_reward_across_seeds,84.236
seed_101_episode,999.0
seed_101_reward,67.0
seed_123_episode,999.0
seed_123_reward,64.0
seed_42_episode,999.0
seed_42_reward,162.0
seed_456_episode,999.0
seed_456_reward,156.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: 0kknt8ri with config:
[34m[1mwandb[0m: 	alpha: 0.34983368542858323
[34m[1mwandb[0m: 	epsilon: 0.17420567775970408
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇███
seed_101_reward,▂▁▁▃▂▁▃▁▂▃▃▂▃▁▂▃▃▃▃▂▃▃▂▃▅▄▂▄▄▆▄▇█▇▅▅▅▄▃▂
seed_123_episode,▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇█
seed_123_reward,▁▁▁▁▁▁▁▁▁▁▂▁▂▁▂▂▂▂▂▁▂▇█▅▃▅▂▄▂▅▅▆▆█▄▇▆▄▅▆
seed_42_episode,▁▁▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇███
seed_42_reward,▁▁▁▁▁▁▁▂▂▂▁▁▂▂▁▁▃▃▄▃▄▃▅▅▃▃▆█▅▅█▃▃▄▂▅▄▂▄▅
seed_456_episode,▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇███
seed_456_reward,▁▁▂▁▁▁▂▁▂▄▂▆▄▆▃▃▂▂▂▃▄▂▂▂▃▂▃▃▃▃▃▆▂▃▁▃▄▄▂█
seed_789_episode,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇███

0,1
average_reward_across_seeds,89.74
seed_101_episode,999.0
seed_101_reward,84.0
seed_123_episode,999.0
seed_123_reward,108.0
seed_42_episode,999.0
seed_42_reward,45.0
seed_456_episode,999.0
seed_456_reward,117.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: qz2nvl4y with config:
[34m[1mwandb[0m: 	alpha: 0.19389941779505865
[34m[1mwandb[0m: 	epsilon: 0.14608325357884402
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 10
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇████
seed_101_reward,▁▁▁▂▁▁▁▁▁▁▁▄▂▂▂▂▃▃▃▃▃▃▃▂▄▄▄▃▄▄▆▅▆▇▆█▆▆▃▆
seed_123_episode,▁▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇█
seed_123_reward,▁▁▂▁▁▁▁▁▂▂▁▂▁▂▂▂▄▃▂▃▂▂▃▃▃▄▂▄▅▅▅▆▅▇█▇█▆▄█
seed_42_episode,▁▁▁▁▁▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇███████
seed_42_reward,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▄▄▄▄▂▄▃▃▄▄▄▅▄▄▄▅▅▄▄█▂▄
seed_456_episode,▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
seed_456_reward,▁▁▁▁▁▁▁▁▁▁▁▁▇▃▄▃▃▆▃▃▁▃▅▂▂▃▁▂▃▅▃▄█▃▁▃▄▃▁▄
seed_789_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇█

0,1
average_reward_across_seeds,69.92
seed_101_episode,999.0
seed_101_reward,72.0
seed_123_episode,999.0
seed_123_reward,43.0
seed_42_episode,999.0
seed_42_reward,116.0
seed_456_episode,999.0
seed_456_reward,64.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: vu53f3y6 with config:
[34m[1mwandb[0m: 	alpha: 0.4017517101368918
[34m[1mwandb[0m: 	epsilon: 0.03318286322752691
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 10
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▁▂▂▂▂▂▂▃▃▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇███
seed_101_reward,▂▃▂▂▂▂▃▁▂▂▂▂▁▂▂▂▂▄▃▅▃▅▃▅▄▆▇▅▄▇▇▆▇▆▅█▄▇█▇
seed_123_episode,▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇███
seed_123_reward,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▃▂▁▂▃▃▆▇▇▄▁▆▃▇▅▄▂▆▅█
seed_42_episode,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇████
seed_42_reward,▁▁▁▁▁▁▁▂▁▁▂▂▂▁▂▃▂▂▂▁▂▂▃▂▂▂▃▃▃▃▃▃▃▃▂▃▂▅▃█
seed_456_episode,▁▁▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇███
seed_456_reward,▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▂▁▂▂▄▂▂▄▆▆▄▇▇██▂▆▇▇▇▆▆▆
seed_789_episode,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇███

0,1
average_reward_across_seeds,49.336
seed_101_episode,999.0
seed_101_reward,17.0
seed_123_episode,999.0
seed_123_reward,65.0
seed_42_episode,999.0
seed_42_reward,47.0
seed_456_episode,999.0
seed_456_reward,22.0
seed_789_episode,999.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ze0xw7za with config:
[34m[1mwandb[0m: 	alpha: 0.08498484080885027
[34m[1mwandb[0m: 	epsilon: 0.09135262797017328
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇████
seed_101_reward,▁▁▁▄▂▁▁▁▂▂▁▁▁▁▁▁▁▃▁▁▁▁▁▁▃▁█▂▁▁▁▁▂▁▁▁▂▁▁▁
seed_123_episode,▁▁▁▁▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇█████
seed_123_reward,▂▃▃▂▃▄▃▂▁▃▃▃▃▇▃▅▂▂█▃▂▂▃▃▃▃▂▃█▃▂▄▄▂▃▃▂▂▄▂
seed_42_episode,▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇████
seed_42_reward,▅▆▇▃▅▂▁▃▃▅▅▂▅▃▃▂▅▅▅▃▅▅▄▆▁▂▇▃▇▃▇▄█▂▇▄▄▃▄▄
seed_456_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇██
seed_456_reward,▁▃▅▁▂▄▂▁▁▁▃▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▂▁▂▂▂▁▂▁▁▃▁▃▁
seed_789_episode,▁▁▁▁▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
average_reward_across_seeds,17.676
seed_101_episode,999.0
seed_101_reward,9.0
seed_123_episode,999.0
seed_123_reward,9.0
seed_42_episode,999.0
seed_42_reward,11.0
seed_456_episode,999.0
seed_456_reward,48.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: avvy493y with config:
[34m[1mwandb[0m: 	alpha: 0.22698490075995575
[34m[1mwandb[0m: 	epsilon: 0.16052179179162043
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 10
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▆▆▆▆▆▇▇▇▇▇██
seed_101_reward,▁▁▁▁▁▁▄▆▆▃▅▁▁▁▁▇▅▅▅▅▇▄▅▆▇▆▆█▃▅▆▄██▇▇▄▆█▃
seed_123_episode,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▇▇████
seed_123_reward,▁▁▁▁▁▁▁▁▁▁▁▂▂▁▂▁▁▃▄▂▄▂▄▂▄▄▃▅▂▆▄▄▅▅▆▅▅▄██
seed_42_episode,▁▁▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇█
seed_42_reward,▁▁▁▁▁▁▂▁▁▁▁▂▂▂▂▄▄▄▂▄▅▄▄▁▅▄▂▂▂▄▄▄▃▅▄▆▅▅█▆
seed_456_episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇███
seed_456_reward,▂▂▁▁▁▂▁▂▂▂▂▂▃▃▃▂▂▂▃▃▆▅▅▅▅▅▆▆▆▆█▇▅▆▇▄▅█▆▅
seed_789_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇██

0,1
average_reward_across_seeds,81.328
seed_101_episode,999.0
seed_101_reward,83.0
seed_123_episode,999.0
seed_123_reward,58.0
seed_42_episode,999.0
seed_42_reward,56.0
seed_456_episode,999.0
seed_456_reward,82.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: yhvbbjel with config:
[34m[1mwandb[0m: 	alpha: 0.3741969225103649
[34m[1mwandb[0m: 	epsilon: 0.187561095372338
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 10
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇██
seed_101_reward,▂▂▂▁▁▁▂▃▁▅▃▃▂▃▃▄▃▃▄▇▆▄▅▃▃▂▆▂██▃▄▂▇▆▄▇▃▅▆
seed_123_episode,▁▁▁▁▁▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇██████
seed_123_reward,▁▁▁▁▁▁▁▁▁▂▂▁▂▂▂▂▂▂▃▃▃▂▃▁▂▃▃▂▃▃▃▄▂▅▄▁▂▃█▅
seed_42_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇███
seed_42_reward,▁▁▁▁▁▁▁▁▂▂▅▄▃▃▃▃▃▃▁▃▅▃▃▃▄▃▄█▆▄▅▃▄█▂▄▄▂▂▅
seed_456_episode,▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▆▆▇▇▇▇▇▇▇▇███
seed_456_reward,▁▁▁▂▁▂▂▁▅▁▃▅▁▂▃▃▃▂▂▄▃▂▃▃▇▃▁▃▁▃▃▄▃▃▆▃▂█▅▃
seed_789_episode,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇█

0,1
average_reward_across_seeds,90.316
seed_101_episode,999.0
seed_101_reward,42.0
seed_123_episode,999.0
seed_123_reward,46.0
seed_42_episode,999.0
seed_42_reward,23.0
seed_456_episode,999.0
seed_456_reward,77.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: vtmh2zqs with config:
[34m[1mwandb[0m: 	alpha: 0.14860445595276958
[34m[1mwandb[0m: 	epsilon: 0.14940723609827686
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇███
seed_101_reward,▁▁▁▁▁▅▁▁▁▁▁▁▅▁▁▁▁▁▁▁▁▁▁▁▁▃▁▁▁▁▁▂▁█▁▁▄▂▃▁
seed_123_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇█
seed_123_reward,▄▃▂▄▅▁▅▂▂▂▄█▄▃▂▄▂▂▂▂▂▂▂▁▄▅▄▂▄▆▂▃▂▂▄▇▄▂▅▅
seed_42_episode,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇█
seed_42_reward,▁▂▂▁▁▁▁▃▁▁▁▁█▁▂▁▁▆▅▃▅▂▂▅▃▃▃▅▂▅▁▁▅▁▄▅▄█▁▅
seed_456_episode,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇█
seed_456_reward,▂▁▁▂▁▁▁▁▁▁▁▂▁▁▁▁▁▂▁▁▁▁▁▂▁▁▁▄▁▁▁▂▁▁▂▁▁▁▅█
seed_789_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▇▇▇▇█

0,1
average_reward_across_seeds,34.204
seed_101_episode,999.0
seed_101_reward,14.0
seed_123_episode,999.0
seed_123_reward,10.0
seed_42_episode,999.0
seed_42_reward,75.0
seed_456_episode,999.0
seed_456_reward,40.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: 1uqpmfwt with config:
[34m[1mwandb[0m: 	alpha: 0.28461540176535627
[34m[1mwandb[0m: 	epsilon: 0.10413347767383407
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇██
seed_101_reward,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▄▄▁▂▂▂▃▂▅█▄▄▆▅▃▄▃▆▇
seed_123_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇▇█████
seed_123_reward,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▆▆▄▆▂▁▁▁█
seed_42_episode,▁▁▁▁▁▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇██████
seed_42_reward,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▁█▃▂▃▂▆▁▃▃▁▂▄▂
seed_456_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇████
seed_456_reward,▁▁▁▁▁▁▁▁▁▁▁▁▃▁▁▂▁▁▁▁▁▄▂▃▂▅▆▂█▃▆▁▂▂▂▁▃▃▃▃
seed_789_episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▆▆▆▇▇▇▇▇▇▇███

0,1
average_reward_across_seeds,63.936
seed_101_episode,999.0
seed_101_reward,38.0
seed_123_episode,999.0
seed_123_reward,17.0
seed_42_episode,999.0
seed_42_reward,73.0
seed_456_episode,999.0
seed_456_reward,53.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: wkw13jy7 with config:
[34m[1mwandb[0m: 	alpha: 0.0886011117833053
[34m[1mwandb[0m: 	epsilon: 0.19741126101573703
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▇▇▇██
seed_101_reward,▁▁▂▂▁▃▂▁▁▁▇▁▁▁▃█▁▂▂▃▁▆▄▁▃▁▁▃▁▁▂▃▆▃▁▃▆▃▅█
seed_123_episode,▁▁▁▁▁▁▂▂▂▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇██
seed_123_reward,▁▁▃▁▂▆▁▁▁▁▁▂▁▃▁▁▁▁▁▅▁▂▂▂▁▁▂▁▁▂▁█▁▁▂▂▁▂▃▁
seed_42_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▆▆▆▆▆▇▇▇▇▇▇▇███
seed_42_reward,▂▃▂▂▂▃▃▂▃▁▆▃▄▁▂▄▂▂▃▂▁▂▁▂▃▁▂▃▄▃▄▂▂▇▃▄▅▂▂█
seed_456_episode,▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇█
seed_456_reward,▃▇▂▃▂▁▁▂▂▂▂▁▆▁▂▁▆▂▁▂▃▂▁▁▁▂▁▁▂█▃▂▂▁▁▁▂▂▂▂
seed_789_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇█████

0,1
average_reward_across_seeds,18.876
seed_101_episode,999.0
seed_101_reward,93.0
seed_123_episode,999.0
seed_123_reward,14.0
seed_42_episode,999.0
seed_42_reward,11.0
seed_456_episode,999.0
seed_456_reward,16.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: h80j44ol with config:
[34m[1mwandb[0m: 	alpha: 0.4148597381601265
[34m[1mwandb[0m: 	epsilon: 0.06081796301175036
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 10
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇████
seed_101_reward,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▃▃█▃▄▂▂▃▂▂▃▃▃▃▃▂▂▃▃▃▃▃▂▂▂
seed_123_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇█
seed_123_reward,▁▁▁▂▆▂▁▁▁▁▂▁▅▄▂▅▇█▅▄▅▄▆▂▃▄▄▆▅▆▇▄▄▇▇▇▇▆▇█
seed_42_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇███
seed_42_reward,▁▁▁▁▁▁▁▁▁▁▂▃▁▂▁▅▄▁▇▅▃█▃▆▅█▄▆▇█▅▅▆▅█▆▆▇▃▅
seed_456_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
seed_456_reward,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▁▁▂▃▁▂▂▃▃▄▅▃▂██▃▁▄▆▅▂█
seed_789_episode,▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████

0,1
average_reward_across_seeds,66.004
seed_101_episode,999.0
seed_101_reward,69.0
seed_123_episode,999.0
seed_123_reward,55.0
seed_42_episode,999.0
seed_42_reward,69.0
seed_456_episode,999.0
seed_456_reward,32.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: i8sor0ji with config:
[34m[1mwandb[0m: 	alpha: 0.048509901545161646
[34m[1mwandb[0m: 	epsilon: 0.1706737043370652
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
seed_101_reward,▁▂▂▁▂▁▂▁▁▅▃▂▂▂▂▃▂▂▁▃▂▂▂▂▁▁▂▁▁▂▁▂▁█▁▂▃▂▁▁
seed_123_episode,▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███
seed_123_reward,▁▂▃▄▂▂▂█▂▂▂▃▃▄▃▁▂▃▂▃▃▂▁▁▂▁▃▃▁▅▄▄▂▃▃▁▆▃▃▂
seed_42_episode,▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇█████
seed_42_reward,▁▂▁▁▆▂▁▁▂▁▂▂▁▂▁▁▁▁▁▁▂▁▃▂▁▁▁▁▂▂▁█▁▁▂▁▂▁▁▁
seed_456_episode,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇████
seed_456_reward,▂▂▁▃▂▂▁▂▂▂▅▄▂▂▂█▃▂▄▃▂▂▃▁▂▂▁▄▄▁▃█▃▂▆▁▂▂▃▁
seed_789_episode,▁▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▇▇▇▇▇█████

0,1
average_reward_across_seeds,13.404
seed_101_episode,999.0
seed_101_reward,9.0
seed_123_episode,999.0
seed_123_reward,10.0
seed_42_episode,999.0
seed_42_reward,9.0
seed_456_episode,999.0
seed_456_reward,10.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: 1a05ahfg with config:
[34m[1mwandb[0m: 	alpha: 0.39970611881272144
[34m[1mwandb[0m: 	epsilon: 0.15037991455115535
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▇████
seed_101_reward,▁▁▁▁▁▁▁▁▂▄▂▁▁▄▃▃▄▄▃▂▅▅▃▆▅▅▅▄▃▄█▅▄▃▅▅▂▅▆▇
seed_123_episode,▁▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▇▇▇████
seed_123_reward,▁▁▁▁▁▁▁▁▁▁▁▂▂▂▅▁▄▃█▃▃▃▃▄▅▄▅▂▆▅▄▃▄▄▇▂▂▃▃▆
seed_42_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▇▇▇████
seed_42_reward,▁▁▁▁▁▁▁▂▁▂▂▂▂▃▁▁▁▁▄▂▃▂▄▂▃▂▃▅▂▃▅▃▃▃▅▃▂▂█▂
seed_456_episode,▁▁▁▁▂▂▂▂▂▂▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇████
seed_456_reward,▁▁▂▃▂█▄▄▅▃▄▂▂▃▃▂▂▃▃▄▁▃▃▃▄▂▃▃▃▃▃▂▅▂▂█▃▁▄▇
seed_789_episode,▁▁▁▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇███

0,1
average_reward_across_seeds,96.968
seed_101_episode,999.0
seed_101_reward,139.0
seed_123_episode,999.0
seed_123_reward,62.0
seed_42_episode,999.0
seed_42_reward,144.0
seed_456_episode,999.0
seed_456_reward,135.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: oxbqclbg with config:
[34m[1mwandb[0m: 	alpha: 0.4397631554547789
[34m[1mwandb[0m: 	epsilon: 0.1565716780592825
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇█
seed_101_reward,▁▁▁▁▁▁▂▂▁▁▁▁▁▂▂▃▃▃▂▃▃▃▄▃▆▃▃▃▅▅█▃▅▂▃▃▃▄▅▆
seed_123_episode,▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇███
seed_123_reward,▁▁▁▁▁▁▁▁▁▁▂▃▂▁▁▂▁▂▂▃▅▃█▃▃▄▂▂▃▃▃▃▆▃▄▃▄▃▄▄
seed_42_episode,▁▁▁▂▂▂▂▂▂▂▃▃▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇████
seed_42_reward,▁▁▁▂▂▂▁▁▁▁▃▃▃▂▃▃▁▁▂▂▁▂▄▃▃▃▄▁▃▃▂▂▃▃▄▃▃▃█▄
seed_456_episode,▁▁▁▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▇▇▇██
seed_456_reward,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▃▂▃▂█▂▁▄▁▃▂▃▃▃▁▄▄▃▃▇▂▄▁▄▂▆
seed_789_episode,▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▅▅▅▆▆▆▇▇▇▇█

0,1
average_reward_across_seeds,85.248
seed_101_episode,999.0
seed_101_reward,156.0
seed_123_episode,999.0
seed_123_reward,77.0
seed_42_episode,999.0
seed_42_reward,75.0
seed_456_episode,999.0
seed_456_reward,47.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: ysfvskn7 with config:
[34m[1mwandb[0m: 	alpha: 0.38627945985528345
[34m[1mwandb[0m: 	epsilon: 0.12967986950289626
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▇▇▇████
seed_101_reward,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▂▃▂▂▂▂▂▂▁▂▂▂▄▂▂█▂▂
seed_123_episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇██
seed_123_reward,▁▁▂▁▁▁▁▁▂▁▂▁▂▁▂▂▁▂▂▁▆▂▂▄▆▆▄▃▄▆▆▂▆▆▅▆▄▅▄█
seed_42_episode,▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇███
seed_42_reward,▁▁▁▁▁▂▁▂▃▂▄▃▂▂▃▂▂▃▂▂▃▂▃▂▃▂▂▃▂█▅▃▃▁▂▂▁▂▃▄
seed_456_episode,▁▁▁▁▁▂▂▂▂▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇█
seed_456_reward,▁▁▁▁▁▁▁▁▂▃▂▃▃▁▂▂▃▃▂▃█▆▂▃▄▃▁▃▂▃▆▄▂▄▆▁▂▃▃▃
seed_789_episode,▁▁▁▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇████

0,1
average_reward_across_seeds,88.792
seed_101_episode,999.0
seed_101_reward,128.0
seed_123_episode,999.0
seed_123_reward,100.0
seed_42_episode,999.0
seed_42_reward,102.0
seed_456_episode,999.0
seed_456_reward,53.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: 6vkw86ix with config:
[34m[1mwandb[0m: 	alpha: 0.4621009704181956
[34m[1mwandb[0m: 	epsilon: 0.16238445015704306
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇█
seed_101_reward,▁▁▁▁▁▁▁▁▁▁▂▂▁▃▁▄▃▃▁▁▂▃▃▂▂▃▂▂▂▂▃▂▃▁▂█▅▅▃▃
seed_123_episode,▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇██
seed_123_reward,▁▁▂▁▁▁▁▁▁▂▁▂▁▁▁▂▄▁▄▂▁▅▅▄▃▄▅█▇▅▁▅▄▃▅▄▂▃▃▄
seed_42_episode,▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇█████
seed_42_reward,▁▁▁▁▁▁▁▁▁▁▁▂▅▂▆▄▄▃▄▂▆▄▆▅▅▇▃▂▃█▄▂▂▄▃▆▂▆▆▄
seed_456_episode,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
seed_456_reward,▁▁▁▁▁▁▁▁▂▁▂▂▂▂▃▄▂▆▃▁▂▃▃▂▃▃▃▄▃▂▃▂▄▃▄▃█▅▃▇
seed_789_episode,▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇███

0,1
average_reward_across_seeds,91.996
seed_101_episode,999.0
seed_101_reward,22.0
seed_123_episode,999.0
seed_123_reward,23.0
seed_42_episode,999.0
seed_42_reward,156.0
seed_456_episode,999.0
seed_456_reward,103.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: 99qnx2mx with config:
[34m[1mwandb[0m: 	alpha: 0.499295219427611
[34m[1mwandb[0m: 	epsilon: 0.20903287465088607
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
seed_101_reward,▁▁▁▂▂▁▂▂▂▃▃▃▂▄▁▄▂▃▃▅▅▃▃▅▅█▄▃▃▆▄▅▅▄█▄▅▄▆▆
seed_123_episode,▁▁▁▁▂▂▂▂▃▃▃▄▄▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇█
seed_123_reward,▁▁▁▁▁▁▂▁▁▂▁▃▃▅▂▂▁▃▂▅▃▃▂▃▃█▄▅▃▃▃▃▂▃▄▃▁▄▃▃
seed_42_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▆▆▆▇▇▇▇▇█████
seed_42_reward,▁▁▂▁▁▁▂▄▃▁▄▆▅▄▂▂▂▅▃▁▂▄▅▄▃▄▃▄█▇▃▃▃▃▄▆▅▄▄▅
seed_456_episode,▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇███
seed_456_reward,▁▁▁▁▃▁▁▁▁▂▃▁▁▄▅▂▃▂▅▁▃▃▂▂█▃▃▃▄▃▁▃▃▄▅▃▄▂▃▄
seed_789_episode,▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇█████

0,1
average_reward_across_seeds,84.436
seed_101_episode,999.0
seed_101_reward,45.0
seed_123_episode,999.0
seed_123_reward,91.0
seed_42_episode,999.0
seed_42_reward,54.0
seed_456_episode,999.0
seed_456_reward,90.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: n5r8ps60 with config:
[34m[1mwandb[0m: 	alpha: 0.29803986705299934
[34m[1mwandb[0m: 	epsilon: 0.20378148153533415
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 10
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇███
seed_101_reward,▁▁▁▁▁▂▁▃▆▂▄▆▆▃▅▃▅▄▄▇▄▄▄▃▆▃▅▃▅▂▄▇▃▆▄▁█▇▇█
seed_123_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇█
seed_123_reward,▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▂▂▂▁▅▁▃▄▂▃▃▃▃▅█▃▄▇▃▃▅▃▄▄▄
seed_42_episode,▁▁▁▁▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇█
seed_42_reward,▁▂▂▁▁▃▄▄▃▅▂▅▄▄▃▄▂▅▅▁▃▅▆▂▄▆▁█▁▅▄▅▄▂▆▆▅▄▃▂
seed_456_episode,▁▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇█
seed_456_reward,▁▁▁▁▁▁▁▁▁▁█▁▂▃▁▂▃▂▂▁▃▂▂▃▃▃▂▃▃▂▃▂▃▁▃▃▂▃▂▃
seed_789_episode,▁▁▁▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇█

0,1
average_reward_across_seeds,91.848
seed_101_episode,999.0
seed_101_reward,62.0
seed_123_episode,999.0
seed_123_reward,58.0
seed_42_episode,999.0
seed_42_reward,160.0
seed_456_episode,999.0
seed_456_reward,44.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: 72tzti2k with config:
[34m[1mwandb[0m: 	alpha: 0.4912281816301928
[34m[1mwandb[0m: 	epsilon: 0.16284490730944923
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▇▇▇▇████
seed_101_reward,▁▁▁▁▁▁▂▁▁▂▂▄▂▄▄▃▇▄▃▃▄▄▂█▆▆▆▅▄▃█▄▅▂▃▃▄▁▄▇
seed_123_episode,▁▁▁▁▁▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇▇████
seed_123_reward,▁▁▂▁▁▁▂▂▁▁▂▁▂▃▂▃▃▄▄▂▄▃▇█▄▄▄▄▃▄▃▇▄▅▆▆█▅▅▆
seed_42_episode,▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇██
seed_42_reward,▁▁▁▁▁▂▆▄▄▅▂▃▄▄▅█▆▃▅▆▂▃▂▅▅▅█▄▅▄▇▇▅▃▂▃▆▃▂▅
seed_456_episode,▁▁▁▂▂▂▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇███
seed_456_reward,▁▁▂▂▁▂▁▅▂▄▁▄▄▄▄▃▇▃▄▄▅▅▄▃▅▄▄▇█▂▃▄█▄▄▃▅▃▄▇
seed_789_episode,▁▁▁▁▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇█

0,1
average_reward_across_seeds,97.436
seed_101_episode,999.0
seed_101_reward,196.0
seed_123_episode,999.0
seed_123_reward,99.0
seed_42_episode,999.0
seed_42_reward,93.0
seed_456_episode,999.0
seed_456_reward,59.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: 8fiqsb1h with config:
[34m[1mwandb[0m: 	alpha: 0.3117386985541326
[34m[1mwandb[0m: 	epsilon: 0.1969189314859723
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 10
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇█████
seed_101_reward,▁▁▁▁▂▁▁▂▂▁▂▁▁▂▂▂▁▃▃▄▃▄▃▄▃▄▇▅▆▂▄▃▇▆▆▂▅▄▅█
seed_123_episode,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
seed_123_reward,▁▁▁▁▂▁▁▂▄▃█▃▂▆▃▃▅▅▄▅▃▆█▇▆▂▄▄▇▆▅▄▄▄█▇▂▁▄▄
seed_42_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇███
seed_42_reward,▂▂▁▁▁▁▁▃▁▁▁▂▂▂▁▃▂▃▂▄▂▂█▁▁▃▅▃▄▆▄▃▄▁▄▄▂█▄▆
seed_456_episode,▁▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▇▇▇▇█████
seed_456_reward,▁▁▁▃▁▁▁▁▁▁▂▁▁▁▂▁▂▁▂▄▄▂▃▅▃▅▃▆▇█▃▃▄▅▆▄▆▃▃▅
seed_789_episode,▁▁▁▁▁▁▁▁▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▇▇▇▇▇████████

0,1
average_reward_across_seeds,90.572
seed_101_episode,999.0
seed_101_reward,101.0
seed_123_episode,999.0
seed_123_reward,71.0
seed_42_episode,999.0
seed_42_reward,74.0
seed_456_episode,999.0
seed_456_reward,100.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: 7m851s03 with config:
[34m[1mwandb[0m: 	alpha: 0.4509150761055378
[34m[1mwandb[0m: 	epsilon: 0.13612839930007706
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇████
seed_101_reward,▁▁▁▁▂▁▂▂▁▁▁▂▂▁▂▂▁▂▃▂▃▇▄▃▄▄▁▅█▅▇▆▃▅▃▃▄▃▆▅
seed_123_episode,▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇█
seed_123_reward,▁▁▁▁▁▁▁▁▁▁▁▃▃▃▂▂▃▄▂▃▃▂▄▅▃▄▄▄▄▅▃▄▃▂▄▂▄█▂█
seed_42_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇▇███
seed_42_reward,▁▁▁▁▁▁▁▁▁▁▁▄▂▁▃▃█▃▄▄▃▂▃▃▃▅▃▅▃▃▅▂▄▄▂▅▂▅▂▄
seed_456_episode,▁▁▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇████
seed_456_reward,▂▂▂▃▂▁▄▁▄▄▄▄▄▃▂▅▃▃▄▄▂▅▂▆▃▅▆▂▅██▅▃▃▆▄▂▅▆▆
seed_789_episode,▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████

0,1
average_reward_across_seeds,89.116
seed_101_episode,999.0
seed_101_reward,83.0
seed_123_episode,999.0
seed_123_reward,158.0
seed_42_episode,999.0
seed_42_reward,136.0
seed_456_episode,999.0
seed_456_reward,46.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: su4kqbd8 with config:
[34m[1mwandb[0m: 	alpha: 0.4842064396003078
[34m[1mwandb[0m: 	epsilon: 0.1647830641543609
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▆▆▆▆▇▇▇▇▇▇███
seed_101_reward,▁▁▁▁▂▁▁▃▂▁▂▂▃▄▃▂▁▃▄▃▄▄▆▃█▂▃▄▁▅▄▁▅▂▅▄▆▄█▅
seed_123_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇█████
seed_123_reward,▁▁▁▁▁▁▁▂▁▁▂▂▁▁▁▃▂▂▂▃▂▃▂▃▂▃▂▆█▄▃▃▃▃▄▂▃▂▄▃
seed_42_episode,▁▁▁▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇█
seed_42_reward,▁▁▁▂▂▂▅▂▄▁▁▁▂▅▄▄▇▁▂▄▆▄▃█▂▃▅▄▅▃▅▄▄▃▇▆▂▅▅▃
seed_456_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇██
seed_456_reward,▁▁▁▁▂▁▁▁▂▁▂▄█▄▄▄▇▆▃▂▃▄▄▁▄▃▄▄▅▄▂▃▃▄▂▃▄▅▇▃
seed_789_episode,▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇█████

0,1
average_reward_across_seeds,97.848
seed_101_episode,999.0
seed_101_reward,95.0
seed_123_episode,999.0
seed_123_reward,131.0
seed_42_episode,999.0
seed_42_reward,132.0
seed_456_episode,999.0
seed_456_reward,60.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: we0zjdm2 with config:
[34m[1mwandb[0m: 	alpha: 0.3807356673391468
[34m[1mwandb[0m: 	epsilon: 0.1555447624395171
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇███
seed_101_reward,▁▁▁▁▁▁▁▁▂▁▃▂▂▃▂▃▃▂▃▂▅▅▃▄▃▂▂▃▃▂█▆▄▅▂▅▃▄▂▄
seed_123_episode,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇██
seed_123_reward,▁▁▁▁▁▁▁▁▂▁▂▁▃▃▂▅▂▂▃▂▃▄▃▃▂▂█▇▅▆▃▁▂▄▃▄▄▂▇▃
seed_42_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇█
seed_42_reward,▂▁▂▂▁▁▁▁▁▂▃▁▂▄▄▃▂▃▃▅▃▄▆▅▄▃▅▅▇▂▃▂▇▄▆▄▅▆█▅
seed_456_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇███
seed_456_reward,▁▁▁▁▁▂▁▂▁▂▂▄▂▃▃▂▅▂▆▆▃▂▆▄▃▂▄▃▆▅▃▆▅▁▃▄█▂▅█
seed_789_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇██

0,1
average_reward_across_seeds,104.9
seed_101_episode,999.0
seed_101_reward,222.0
seed_123_episode,999.0
seed_123_reward,162.0
seed_42_episode,999.0
seed_42_reward,94.0
seed_456_episode,999.0
seed_456_reward,116.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: odzo9a0n with config:
[34m[1mwandb[0m: 	alpha: 0.3899695520332141
[34m[1mwandb[0m: 	epsilon: 0.14176349073774502
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇██
seed_101_reward,▁▁▁▂▁▁▁▁▂▁▂▃▄▂▃▄▄▅▃▄▄▃▄▂▆▃▂█▂▅▂█▃▅▄▅▅▃▂▃
seed_123_episode,▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇████
seed_123_reward,▁▁▁▁▁▁▁▂▁▁▂▁▂▃▂▃▃▃▂▃▃▃▄▃▃▂▄▃▅▂▄█▅▅▆▃▅▅▄▃
seed_42_episode,▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇████
seed_42_reward,▂▁▁▁▁▃▁▄▂▁▃▃▃▂▃▃▂▂▃▃▂▁▂▃▄▃▃▃▄▃▃▂▃▃█▃▂▄▃▅
seed_456_episode,▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇████
seed_456_reward,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▅▆▄▁▂▃▃▂▇▂▃▆▂▆▃▄▅▅▂▅▃▃█▆
seed_789_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇██

0,1
average_reward_across_seeds,95.188
seed_101_episode,999.0
seed_101_reward,87.0
seed_123_episode,999.0
seed_123_reward,64.0
seed_42_episode,999.0
seed_42_reward,30.0
seed_456_episode,999.0
seed_456_reward,168.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: rpcqfxzp with config:
[34m[1mwandb[0m: 	alpha: 0.3679970002952621
[34m[1mwandb[0m: 	epsilon: 0.17026729447516015
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇██
seed_101_reward,▁▁▁▁▁▁▁▂▂▁▂▃▁▁▁▂▅▄▃▃▆▄▆▃▂▅▄▅▄▃▆▅█▆▆▅▃▆▆▄
seed_123_episode,▁▁▁▁▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▇████
seed_123_reward,▁▁▁▁▁▁▁▂▁▁▁▂▁▄▁▃▂▃▃▄▃▁▂▂▂▃▃▅▅▃▂▃▅▃▄▅▃█▃▁
seed_42_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇█████
seed_42_reward,▁▁▁▁▁▁▂▂▁▁▄▁▄▁▃▆▁▃▂▂▄▃▃▂▂▄▄▆▅▄▁▅▅▄▆▆▅█▂▅
seed_456_episode,▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇██
seed_456_reward,▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▂▂▂▁▂▂▃▄▂▄▂▄▂▂▂█▂▃▃▆▄▅
seed_789_episode,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▇▇▇▇▇▇▇▇▇█

0,1
average_reward_across_seeds,87.444
seed_101_episode,999.0
seed_101_reward,86.0
seed_123_episode,999.0
seed_123_reward,73.0
seed_42_episode,999.0
seed_42_reward,53.0
seed_456_episode,999.0
seed_456_reward,34.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: jj7j0llj with config:
[34m[1mwandb[0m: 	alpha: 0.3233930279011794
[34m[1mwandb[0m: 	epsilon: 0.1476593088779125
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇███
seed_101_reward,▁▁▁▁▁▁▁▁▁▁▁▂▃▃▂▂▂▃▂▃▂▄█▂▃▃▇▄▄▂▆▃▂▅▅▅▄▄▄▄
seed_123_episode,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███
seed_123_reward,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▁▂▃▅▁▄▂▃▄▃▁▂▂▂▃▂▃▂▂▃█▃▃
seed_42_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇████
seed_42_reward,▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▂▂▄▃▂▅▄▁▃▄▂▃▃▃▃▄▅▄▃▄▄█
seed_456_episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇██
seed_456_reward,▁▁▁▁▁▁▁▂▁▁▃▁▂▂▄▃▃▅▄▃▂▄▂▅▃▆▃▄▄▂▁▄▂▂█▄▅▃▁█
seed_789_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇██

0,1
average_reward_across_seeds,97.436
seed_101_episode,999.0
seed_101_reward,106.0
seed_123_episode,999.0
seed_123_reward,180.0
seed_42_episode,999.0
seed_42_reward,71.0
seed_456_episode,999.0
seed_456_reward,40.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: 62yqu8qc with config:
[34m[1mwandb[0m: 	alpha: 0.35556958471817757
[34m[1mwandb[0m: 	epsilon: 0.16006383659663864
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▆▆▇▇▇▇▇▇▇██
seed_101_reward,▁▁▁▁▁▁▁▁▁▂▁▁▂▂▂▃▅▃▅▂▁▂▂▃▂▃▂▄▄▃▄▆▂█▃▄▃▅▇▃
seed_123_episode,▁▁▁▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇██
seed_123_reward,▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▁▂▂▁▂▂▁▂▂▂▂▄▇▂▄▂▂▁▆▃▂▃▃█
seed_42_episode,▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇██
seed_42_reward,▁▁▁▁▁▁▁▁▁▁▁▂▂▁▁▄▂▅▃▂▂▁▅▃▂▂▄▃▄▆▆▅▅▄▃▄▅█▅▄
seed_456_episode,▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇███
seed_456_reward,▁▁▁▁▁▁▂▂▂▃█▃▁▃▁▂▂▂▂▃▅▃▃▃▄▄▃▅▂▃▃▃▃▃▆▃▂▃▂▄
seed_789_episode,▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████

0,1
average_reward_across_seeds,98.192
seed_101_episode,999.0
seed_101_reward,109.0
seed_123_episode,999.0
seed_123_reward,181.0
seed_42_episode,999.0
seed_42_reward,67.0
seed_456_episode,999.0
seed_456_reward,110.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: 21ljppgu with config:
[34m[1mwandb[0m: 	alpha: 0.4927892856971906
[34m[1mwandb[0m: 	epsilon: 0.162154687530269
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇██
seed_101_reward,▁▂▂▁▁▁▁▁▂▇▆▂▁▄▂▃▄▃▂▃▃▄▄▃▅▃▂▅▃▇█▅▄▄▅▅▆▅▄▄
seed_123_episode,▁▁▁▁▁▂▂▂▂▂▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇██
seed_123_reward,▁▁▁▁▂▁▁▁▁▂▂▃▃▂▃▁▃▂▃▃▂▄▂▃▃▅▃▃▂▃▂▄▃▃▃▄▂█▄▄
seed_42_episode,▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆██
seed_42_reward,▁▁▁▁▁▂▁▂▁▁▁▂▂▂▃▂▂▂▂█▂▃▁▁▂▂▂▂▂▂▂▃▂▂▃▇▂▂▃▂
seed_456_episode,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
seed_456_reward,▁▁▁▁▂▁▁▁▁▂▂▂▂▁▂▂▂▂▆▅▅▄▅▄▄▃▄▂▃▃▃▂▃▄▇▆█▅▄▃
seed_789_episode,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▇▇▇▇▇████

0,1
average_reward_across_seeds,91.78
seed_101_episode,999.0
seed_101_reward,91.0
seed_123_episode,999.0
seed_123_reward,217.0
seed_42_episode,999.0
seed_42_reward,76.0
seed_456_episode,999.0
seed_456_reward,59.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: zarxd8zu with config:
[34m[1mwandb[0m: 	alpha: 0.2584519240557413
[34m[1mwandb[0m: 	epsilon: 0.2544136652646607
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇███
seed_101_reward,▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▂▂▃▂▂▂▄▂▄▂█▃▄▄▁▆▄▇▄▇▃▄▃▂▄
seed_123_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▇▇▇▇▇▇███
seed_123_reward,▁▁▁▁▁▁▁▁▁▂▁▁▂▂▂▂▂▂▃▄▄▃▁▂▂▃▃▁▃▆▂▄▃█▃▃▄▂▆▆
seed_42_episode,▁▁▁▁▁▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇█
seed_42_reward,▁▁▁▃▃▂▁▂▄▃▂▂▁▃▃▄▃▃▆▃▇▁▅▃▆▆▆▃▆█▅▂▄▅▅▃▃▅▃▇
seed_456_episode,▁▁▁▁▁▂▂▂▂▂▃▃▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇████
seed_456_reward,▁▁▁▁▁▁▂▂▁▂▃▃▂▂▃█▄▃▆▇▇▃▄▃▁▁▂▂▆▄▄▄▅▅▅▄▅▃▃▃
seed_789_episode,▁▁▁▁▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇█████

0,1
average_reward_across_seeds,88.1
seed_101_episode,999.0
seed_101_reward,86.0
seed_123_episode,999.0
seed_123_reward,67.0
seed_42_episode,999.0
seed_42_reward,69.0
seed_456_episode,999.0
seed_456_reward,102.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: lhnpqbpj with config:
[34m[1mwandb[0m: 	alpha: 0.14932756227229607
[34m[1mwandb[0m: 	epsilon: 0.2857612732658191
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇██
seed_101_reward,▁▁▁▁▁▁▁▁▁▂▁▂▁▃▂▂▂▂▁▂▁▃▂▂▂▃▂▅▇▂▇▂▄▅▅▇▄█▇▇
seed_123_episode,▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇███
seed_123_reward,▁▂▁▁▁▂▂▂▁▁▂▁▁▂▂▂▄▂▂▂▂▃▃▂▂▄▃▅▂▂▄▆█▃▅██▇▅▆
seed_42_episode,▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▆▆▆▆▆▇▇▇▇▇█████
seed_42_reward,▂▁▁▁▃▁▁▂▁▁▁▁▃▂▂▂▃▁▂▃▃▃▄▄▃▃▄▃▆▃▂▃█▃▂▃▄▃▄█
seed_456_episode,▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
seed_456_reward,▁▁▁▁▁▁▁▁▁▁▃▂▃▁▁▁▂▃▂▂▃▄▂▄▁▄▃▃▂▃▂▂▃▃▂█▃▃▃▃
seed_789_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇██

0,1
average_reward_across_seeds,70.26
seed_101_episode,999.0
seed_101_reward,21.0
seed_123_episode,999.0
seed_123_reward,55.0
seed_42_episode,999.0
seed_42_reward,60.0
seed_456_episode,999.0
seed_456_reward,53.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: lmov8zuk with config:
[34m[1mwandb[0m: 	alpha: 0.14499263595294287
[34m[1mwandb[0m: 	epsilon: 0.21671113074889783
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇██
seed_101_reward,▁▁▁▁▁▂▂▁▁▁▁▁▁▁▁▁▁▄▁▂▅▁▁▄▁▂▄▁▃▃▃▄▄▃▁█▂▃▆▄
seed_123_episode,▁▁▁▁▁▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▆▆▇▇▇▇▇▇█████
seed_123_reward,▁▂▃▂▃▃▂▅▃▃▃▃▂▄▃▃▂▃▂▅▁▃▃▃▅▅▇▅▅▆▅▃▃▅▅█▅▅▇▇
seed_42_episode,▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▆▆▇▇▇▇▇▇████
seed_42_reward,▁▁▁▁▁▂▁▁▁▂▂▁▂▁▄▁▁▁▂▂▄▁▃▃▁▂▂▁▃▃▁▄▇▁▅▃█▂▃▃
seed_456_episode,▁▁▁▁▁▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇████
seed_456_reward,▂█▂▁▂▁▂▁▁▁▂▂▂▁▂▃▁▂▂▁▄▁▃▄▁▃▂▂▄▂▂▄▂▃▂▂▃▃▅▃
seed_789_episode,▁▁▁▁▁▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇██

0,1
average_reward_across_seeds,58.708
seed_101_episode,999.0
seed_101_reward,79.0
seed_123_episode,999.0
seed_123_reward,16.0
seed_42_episode,999.0
seed_42_reward,99.0
seed_456_episode,999.0
seed_456_reward,20.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: z89sxp0k with config:
[34m[1mwandb[0m: 	alpha: 0.27964880484316645
[34m[1mwandb[0m: 	epsilon: 0.027240058365611223
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▇▇▇▇█████
seed_101_reward,▂▂▁▂▂▂▁▁▂▂▁▂▂▂▁▁▂▁▁▂▂▁▂▁▁▁▁▂▁▁▂▁▁▂▂▂█▂▁▂
seed_123_episode,▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
seed_123_reward,▅▁▁▁▅▃▅▃▃▃▅▅█▃▅▅▃▅▅▅▅▅▅▆▆▅▅▁▆▆▃▃▆▆▁▁▆▅▆▆
seed_42_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇█
seed_42_reward,▁▁▁▁▂▁▂▁▁▂▁▁▁▂▂▂▂▂▁▃▂▂▂▂▁▂▂▂▁▁▁▁▁▂▁▂▁▂▂█
seed_456_episode,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇█████
seed_456_reward,▁▁▂▂▁▃▁▁▁▂▁▁▁▁▁▁▁█▁▁▁▁▁▂▁▁▁▁▁▂▃▁▁▁▁▂▁▂▃▂
seed_789_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇██

0,1
average_reward_across_seeds,13.296
seed_101_episode,999.0
seed_101_reward,9.0
seed_123_episode,999.0
seed_123_reward,9.0
seed_42_episode,999.0
seed_42_reward,10.0
seed_456_episode,999.0
seed_456_reward,13.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: gvoxqbps with config:
[34m[1mwandb[0m: 	alpha: 0.4264348091896062
[34m[1mwandb[0m: 	epsilon: 0.28660574958994933
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇███
seed_101_reward,▁▁▂▁▁▂▃▁▃▆▆▄▂▆▂▃▃▄▅▂█▇▄▂▄▄▄▃▅▃▃▃▅▁▅▃▃▂▅▃
seed_123_episode,▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇██
seed_123_reward,▂▂▁▁▁▁▁▁▁▂▃▄▃▄▃▂▄▃▄▇▃▄▂▄▄▄▃▄▅█▂▃▄▂▂▃▂▂▄▄
seed_42_episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇████
seed_42_reward,▁▁▁▁▁▁▁▂▂▃▄▃█▁▂▇▂▄▂▄▃▃▅▂▂▇▄▆▃▅▄▄▄▄▄▆▅▅▃▆
seed_456_episode,▁▁▁▂▂▂▂▃▃▃▄▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇█
seed_456_reward,▁▁▂▁▁▂▁▂▄▂▂▄▂▃▃▄▁▄▃▂▃▂▃▅▂▃▆▃▃▄▄█▄▄▃▇▄▂▃▂
seed_789_episode,▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇██

0,1
average_reward_across_seeds,84.408
seed_101_episode,999.0
seed_101_reward,73.0
seed_123_episode,999.0
seed_123_reward,36.0
seed_42_episode,999.0
seed_42_reward,82.0
seed_456_episode,999.0
seed_456_reward,90.0
seed_789_episode,999.0


In [4]:
run_qlearning_sweep()

Create sweep with ID: kreeqo81
Sweep URL: https://wandb.ai/abhijithvinod-indian-institue-of-technology-madras/RL_CartPole_QLearning/sweeps/kreeqo81


[34m[1mwandb[0m: Agent Starting Run: nep5lbdk with config:
[34m[1mwandb[0m: 	alpha: 0.06738119229195272
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 123
[34m[1mwandb[0m: 	temperature: 1.8862275116226144


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇██
seed_101_reward,▂▂▁▁▁▂▃▁▁▁▄▃▂▃▁▁▂▃▂▆▄▃▃▃▅▃▆▅▄█▇▆▆▆▅▅▆▄█▅
seed_123_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇█████
seed_123_reward,▁▁▂▁▂▁▁▁▁▂▂▁▁▂▂▂▂▃▃▃▄▂▄▃▂▄█▄▄▃▆▆▃▇▄▄▄▅▃▄
seed_42_episode,▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▆▆▆▇▇▇▇█████
seed_42_reward,▂▃▂▂▁▁▂▁▁▁▃▃▆▃▃▆▄▃▅▃▅▄▇▆▆▇█▆▄▆▇█▅█▇▆▃▇█▆
seed_456_episode,▁▁▁▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
seed_456_reward,▁▁▁▁▁▃▁▂▃▃▁▂▂▂▁▂▁▁▃▄▄▁▇▇▃▃▄▆▄█▆▃▅▅▅▅▇▅▆▅
seed_789_episode,▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇███

0,1
average_reward_across_seeds,130.3
seed_101_episode,999.0
seed_101_reward,124.0
seed_123_episode,999.0
seed_123_reward,188.0
seed_42_episode,999.0
seed_42_reward,140.0
seed_456_episode,999.0
seed_456_reward,97.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: dcuhpydd with config:
[34m[1mwandb[0m: 	alpha: 0.0766772249504152
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 101
[34m[1mwandb[0m: 	temperature: 1.2610958209564451


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▇▇▇▇▇██
seed_101_reward,▂▁▂▁▁▂▂▂▁▁▂▂▂▁▂▃▁▄▄▃▃▅▆▄▅▅▃▅▄▆▅▆▆█▂▇▄▆▅▆
seed_123_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▆▆▇▇▇█████
seed_123_reward,▁▁▂▂▃▂▁▁▃▂▁▂▁▃▂▆▆▂▅▁▂▆▆▅▇▃▇▄▇▆▃▆▅▄█▃▇▄▂█
seed_42_episode,▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇██
seed_42_reward,▁▁▁▁▂▃▂▅▁▁▄▄▁▂▅▆▃▆▆▅▆▆▂▆▁▅▆▃▄▃▅▇▆█▃▂▇▃▂█
seed_456_episode,▁▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▆▆▆▆▆▆▆▆▇▇▇███
seed_456_reward,▂▃▁▁▃▁▃▁▂▂▂▃▃▆▃▃▆▃▇▅▄█▂▇█▅▅▆▆▆▅█▆▄▇▅▆▇▆▅
seed_789_episode,▁▁▁▁▁▁▂▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇█████

0,1
average_reward_across_seeds,106.356
seed_101_episode,999.0
seed_101_reward,114.0
seed_123_episode,999.0
seed_123_reward,60.0
seed_42_episode,999.0
seed_42_reward,167.0
seed_456_episode,999.0
seed_456_reward,131.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: kl0psjem with config:
[34m[1mwandb[0m: 	alpha: 0.4401088395571876
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	temperature: 1.4323146757916057


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇██
seed_101_reward,▁▃▃▂▅▂▃▅▃▂▃▄▅▂▂▄▅▇▅▅▅▂▇▄▅▂▄▅█▄▄▄▄▅▅▇█▄▆▄
seed_123_episode,▁▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇███
seed_123_reward,▁▂▃▂▆▄▄▂▄▃▄▃▃▃▆▆▂▃▄▂▆▃▆▅▃▃▆▆█▄▃▃▄▇▃▃▃▃▃▄
seed_42_episode,▁▁▁▁▁▁▁▁▁▂▂▂▂▃▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇███
seed_42_reward,▁▂▁▂▂▃▄▄▂▄▂▄▁▁▂▁▂▄▄▂▁▅▁▃▆▂▃▃▁█▄▂▄▃▁▂▄▂▂▄
seed_456_episode,▁▁▁▁▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇████
seed_456_reward,▁▁▂▃▆▄▆▇▅▇▄▅▇▅▅▄▄▅▅▄█▆▄▁▇▅▄▄▃▄▅▄▅▆▇▄▄▄▆█
seed_789_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇███

0,1
average_reward_across_seeds,170.316
seed_101_episode,999.0
seed_101_reward,124.0
seed_123_episode,999.0
seed_123_reward,240.0
seed_42_episode,999.0
seed_42_reward,178.0
seed_456_episode,999.0
seed_456_reward,203.0
seed_789_episode,999.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: wz8x68a7 with config:
[34m[1mwandb[0m: 	alpha: 0.17227121892027367
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 456
[34m[1mwandb[0m: 	temperature: 1.4122984906127094


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇██
seed_101_reward,▁▁▁▁▁▂▁▄▁▄▂█▂▂▃▂▃▂▅▂▃▂▂▃▅▄▃▅▄▂█▄▃▂▃▂▄▂▂█
seed_123_episode,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇████
seed_123_reward,▂▁▃▁▃▄▃▃▂▄▁▆▂▃▃▄▂▃▄█▄▂▂▅▅▂▄▂▃▃▂▃▃▂▄▃▃▃▄▃
seed_42_episode,▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇█████
seed_42_reward,▁▁▁▁▁▁▁▅▃▁▃▃▂▂▄▂▁▃▆▆▄▁▁▁▂▅▂▃▅▇▄▃▄▁▂█▃▅▅▅
seed_456_episode,▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇██
seed_456_reward,▁▁▂▁▁▁▁▂▃▃▃▄▃▁▁▂▃▁▆▄▃▅▄▁▁▇▆▁▆▅▆▁▅▂▅█▅▂▂▆
seed_789_episode,▁▁▁▁▁▂▂▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇███

0,1
average_reward_across_seeds,185.488
seed_101_episode,999.0
seed_101_reward,110.0
seed_123_episode,999.0
seed_123_reward,118.0
seed_42_episode,999.0
seed_42_reward,41.0
seed_456_episode,999.0
seed_456_reward,500.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: fjibd1c1 with config:
[34m[1mwandb[0m: 	alpha: 0.1967228999158775
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 10
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 456
[34m[1mwandb[0m: 	temperature: 1.6525584011396486


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇██
seed_101_reward,▂▁▁▂▂▂▃▃▃▄▃▄▅▅▄▄▅▄▅█▄▄▅▆▅▆▅▅▅▅▆▅▅▆▅▆▆▅▆▅
seed_123_episode,▁▁▁▁▁▁▁▂▂▂▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇██
seed_123_reward,▂▁▃▄▁▃▁▃▅▅▃▄▃▄▄▅▄▅▅▄▆▇▃▆▆▄█▄▆▆█▆▅▆▇▆▄▆▆▇
seed_42_episode,▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇████
seed_42_reward,▁▂▂▃▂▃▃▃▂▂▂▂▂▃▃▃▄▃▃▄▃▃▄▄▅▄▄▅▄▄▄▃▆▄▅▃█▅▇▃
seed_456_episode,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇█
seed_456_reward,▁▁▂▁▁▂▂▃▅▄▅▄▃▄▃▄▃▆▄▅▇▄▄▆▅▄▇▆▆▅▆▅▄▇▅▆▇▅▇█
seed_789_episode,▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇█████

0,1
average_reward_across_seeds,178.224
seed_101_episode,999.0
seed_101_reward,161.0
seed_123_episode,999.0
seed_123_reward,129.0
seed_42_episode,999.0
seed_42_reward,356.0
seed_456_episode,999.0
seed_456_reward,254.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: dushmm0j with config:
[34m[1mwandb[0m: 	alpha: 0.37554218681989576
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 456
[34m[1mwandb[0m: 	temperature: 1.337701598055116


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇▇█
seed_101_reward,▂▁▁▁▁▃▂▆▂▄▂▂▁▃▁▁▁▁▄▁▃▄▅▄█▃▃▃▃▃▆▃▂▃▃▄▁▃▂▇
seed_123_episode,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇█
seed_123_reward,▁▁▁▂▃▄▄▅▅▅▅▅▆▅▅▅▇▅▆▅▇▇▆▅█▆▆▆▆▆▆▇▆▆█▅▅▅▅▄
seed_42_episode,▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▅▅▅▅▅▅▆▆▆▇▇▇▇▇█████
seed_42_reward,▃▁▂▂▆▃▁▃▃▃▂▂▃▅▂▂▅▃▃▇▄▂█▄▃▄▆▄▄▂▅▆▅▄▂▄▅▄▄▃
seed_456_episode,▁▁▁▁▁▁▁▂▂▂▃▃▃▃▃▃▃▃▃▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇████
seed_456_reward,▁▁▁▂▄▂▃▄▄▂▃▄▃▃▃▅▃▃▃▃▅▃▃▃▃▂█▄▄▃▆▃▄▄█▅▃▃▃▃
seed_789_episode,▁▁▁▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇█████

0,1
average_reward_across_seeds,169.024
seed_101_episode,999.0
seed_101_reward,31.0
seed_123_episode,999.0
seed_123_reward,171.0
seed_42_episode,999.0
seed_42_reward,114.0
seed_456_episode,999.0
seed_456_reward,142.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: 8ovc1466 with config:
[34m[1mwandb[0m: 	alpha: 0.24010571859497365
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 10
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 101
[34m[1mwandb[0m: 	temperature: 1.8159116470410328


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇███
seed_101_reward,▁▂▂▁▁▃▄▁▃▃▆▆▆▅▅▄▆▆▅▄▆▆▇█▆▃█▇█▇█▇█▇▇▇█▇▇▃
seed_123_episode,▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▆▆▆▆▇▇▇▇██████
seed_123_reward,▂▂▁▂▁▄▃▄▃▃▆▄▆▄▆▆▅▆▄▅▆█▆▅▆▅▇▆▆▅▆▅▆▆▆▆▇▇▆▆
seed_42_episode,▁▁▁▁▁▂▂▂▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇████
seed_42_reward,▁▁▁▁▁▃▃▃▅▆▅▅▅▅█▄▅▆▄▅▆▅▅▆▅▆▆▅▆▆▇▅▆▇▆▆▅▆▇▇
seed_456_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇█████
seed_456_reward,▂▃▁▂▃▅▅▅▄▅▆▇▇▆██▆▇██▆▇▇█▇▇█▇▇▇█▇█▇█▇▇▇█▇
seed_789_episode,▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇██

0,1
average_reward_across_seeds,160.824
seed_101_episode,999.0
seed_101_reward,175.0
seed_123_episode,999.0
seed_123_reward,140.0
seed_42_episode,999.0
seed_42_reward,169.0
seed_456_episode,999.0
seed_456_reward,129.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: cls6qb6l with config:
[34m[1mwandb[0m: 	alpha: 0.15373843087387992
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 10
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	temperature: 0.9683671424163732


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇█
seed_101_reward,▂▁▂▁▁▁▁▁▂▂▃▃▅▅▅▇▅▅▄▅▅▆▅▅▇▆▆▇▆▆██▆▇▇▆█▆▆▆
seed_123_episode,▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇█
seed_123_reward,▁▃▃▂▃▃▆▆▄▅▄▄▃▄▃▆▄▃▄▃▅▅▄▆▃▅▃▅█▃▂▄▅▇▅▄▄█▄▅
seed_42_episode,▁▁▁▁▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇████
seed_42_reward,▁▂▄▅▅▄▄▄▅▆▄▄▆▅▇▆▆▆▇▇▇▆█▇▇▇▆▇█▇▃▇▇▇▃▇████
seed_456_episode,▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇██
seed_456_reward,▃▂▂▅▂▁▂▂▄▃▃▃▂▆▇▇▃▄▆▃▃▇▃▃▃▇▇█▆▃▃▃▃▄▇▄▄██▄
seed_789_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇██

0,1
average_reward_across_seeds,102.808
seed_101_episode,999.0
seed_101_reward,150.0
seed_123_episode,999.0
seed_123_reward,71.0
seed_42_episode,999.0
seed_42_reward,127.0
seed_456_episode,999.0
seed_456_reward,109.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: kjaptsn3 with config:
[34m[1mwandb[0m: 	alpha: 0.4475751649683387
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 10
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 456
[34m[1mwandb[0m: 	temperature: 1.764265548800562


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
seed_101_reward,▁▁▁▁▄▂▁▃▅▄▄▇██▇▆▅▆▅▆▆▇▆▄▇▇▄▆▆▇▇▇▇▆▆▇▄▇▇▇
seed_123_episode,▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇██████
seed_123_reward,▂▁▂▂▅▃▇▃▃▄▄▇▄▄▄▄▆▅▄▅▅▆▄▇▄▄▅▆▆▇▆█▅▅▇▄▇▅▇▇
seed_42_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇█████
seed_42_reward,▁▂▁▃▆▃▃▄▅▅▃▄▅▄▄▃▅▅▅▅▅▅▅▅▆▆▆▃▇▇▃▇▇█▇▄▆▇▇▇
seed_456_episode,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇███
seed_456_reward,▂▁▃▄▄▄▅▁▁▄▂▅▅▄▆▇▅█▁█▆█▇▂▇█▁▇▇███▇▂▇████▇
seed_789_episode,▁▁▁▁▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇████

0,1
average_reward_across_seeds,135.472
seed_101_episode,999.0
seed_101_reward,119.0
seed_123_episode,999.0
seed_123_reward,271.0
seed_42_episode,999.0
seed_42_reward,103.0
seed_456_episode,999.0
seed_456_reward,115.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: eftdqk3a with config:
[34m[1mwandb[0m: 	alpha: 0.45870235234381906
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 123
[34m[1mwandb[0m: 	temperature: 0.6251276004919866


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇██████
seed_101_reward,▂▁▁▃▂▂▂▄▃▂▄▇▂▃▂▂▅▃▃█▆▂▂▂▂▃▄▃▇▂▃▂▃▄▄▄▃▂▆▃
seed_123_episode,▁▁▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇████
seed_123_reward,▃▁▁▂▂▇▅▂▄▆▂▂▁▆▆▅▃▁▂▅▂▅█▄▆▃▅█▂▁▇█▆▆▂▂▅▁▅▁
seed_42_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇█████
seed_42_reward,▅▅▃▄▇▇▅▆▄▄▆██▅▅▄▅▄▂▃▃▅▆█▃▄▅▄▄▅▄▄▄▅▅▇▁█▄▅
seed_456_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇███
seed_456_reward,▃▂▇▂▂▂▁▁▂▁▇▁▂▂▇▂▂▁▁▁▂▂▂▁▃▁▂▂▂█▁▂▇▁▂▁▇▁▁▂
seed_789_episode,▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇███

0,1
average_reward_across_seeds,66.812
seed_101_episode,999.0
seed_101_reward,28.0
seed_123_episode,999.0
seed_123_reward,68.0
seed_42_episode,999.0
seed_42_reward,96.0
seed_456_episode,999.0
seed_456_reward,27.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: 1cwz8is4 with config:
[34m[1mwandb[0m: 	alpha: 0.06585706539752455
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 101
[34m[1mwandb[0m: 	temperature: 0.6533538161914733


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇██
seed_101_reward,▁▁▁▁▁▁▁▂▂▁▁▂▁▆▃▆▃▁▂▄▃▃▂▃▂▄▂▂▃█▄▄▂▄▃▂▅▄▁▃
seed_123_episode,▁▁▁▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇█████
seed_123_reward,▁▁▁▄▂▁▅▄▃▆▃▁▁▅▄▃▅▆▅▃▃▁▄▆▃█▇▇▄▁▅▅█▂▆▆▄▅█▂
seed_42_episode,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇███
seed_42_reward,▁▁▁▁▂█▂▃▁▄▄▃▃▂▁▁▁▃▂█▅▆▄▄▅▇▇▅▅▄▆▂▃▃▁▂▁▄▅▁
seed_456_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▇▇▇▇▇▇▇▇██████
seed_456_reward,▁▂▁▁▁▂▃▁▄▃▁▁▄▁▂▂▁▁▃▂▃▁▃▁▅▂▄▅█▂▁▄▄▄▅█▂▂█▁
seed_789_episode,▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇████

0,1
average_reward_across_seeds,105.744
seed_101_episode,999.0
seed_101_reward,265.0
seed_123_episode,999.0
seed_123_reward,131.0
seed_42_episode,999.0
seed_42_reward,74.0
seed_456_episode,999.0
seed_456_reward,37.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: 368j2zr5 with config:
[34m[1mwandb[0m: 	alpha: 0.2750517141978133
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 123
[34m[1mwandb[0m: 	temperature: 1.748836299196283


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▂▂▂▂▂▂▃▃▄▄▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇███
seed_101_reward,▁▂▁▁▂▂▁▁▁▂▁▄▂▁▁▅▃▁▆▁▂█▆▃▃▄▃▆▆▇▆▂▆▂▂▂▆▆▄▃
seed_123_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇██
seed_123_reward,▁▁▁▂▁▃▄▇▅▄▄▄▄▆▅▄█▅▅▅▆▅▅▆▅▄▇▅▅▅▆▄▆▄▅▅▆▇▆▇
seed_42_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇███
seed_42_reward,▂▁▂▃▂▂▅▄▃▄▅▃▄▄▆▃▃▄▄▄▅▆▆▄▇▃▃▅▆▆▄▄▃▃▄▆▃█▃▄
seed_456_episode,▁▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇█
seed_456_reward,▁▁▁▁▁▁▂▃▃▃▃▂▃▄▃▂▃▅▃▄▃▃▅▄▅▄█▄▆▄▅▃▃▇▅▃▄▇▃▅
seed_789_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇███

0,1
average_reward_across_seeds,172.952
seed_101_episode,999.0
seed_101_reward,174.0
seed_123_episode,999.0
seed_123_reward,187.0
seed_42_episode,999.0
seed_42_reward,193.0
seed_456_episode,999.0
seed_456_reward,500.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: l0cbanwj with config:
[34m[1mwandb[0m: 	alpha: 0.1035584804735406
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 789
[34m[1mwandb[0m: 	temperature: 0.4104005745139181


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
seed_101_reward,▂▄▁▂▃▂▄▃▄▁▁▃▁▁▄▃▆▄▃▁▃▁█▃▁▁▇▂▂▃▅▄▁▁▃▁▆▄▃▁
seed_123_episode,▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇█
seed_123_reward,▁▅▂▂▃▄▂▂▄▃▃█▄▂▃▄▁▃▃▄▄▃▃▂▂▄▅▂▁▁▅▃▄▃▇▆▄▆▅▂
seed_42_episode,▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇█████
seed_42_reward,▁▁▁▁▁▃▃▃▁▃▃▃▂▃▄▃▁▃▅▃▄▃▄▃▃▃▆▅▃█▄█▆▄▅▂▃▃▂▃
seed_456_episode,▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇███
seed_456_reward,▁▄▄▂▃▆▆▅▃▃▆▄▄▄▄▂▄▃▃▄▅▁▄▃▅▃▁▃▃▆█▄█▅▁▃▁▇▇▂
seed_789_episode,▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇▇▇▇█████

0,1
average_reward_across_seeds,77.888
seed_101_episode,999.0
seed_101_reward,15.0
seed_123_episode,999.0
seed_123_reward,190.0
seed_42_episode,999.0
seed_42_reward,101.0
seed_456_episode,999.0
seed_456_reward,74.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: h8g4wh1t with config:
[34m[1mwandb[0m: 	alpha: 0.23629947463808623
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 789
[34m[1mwandb[0m: 	temperature: 1.8975349223477904


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇████
seed_101_reward,▁▁▁▂▃▅▆▂▅▅▅▇▇▆▅▇▅▆▇▅▆▂▅▅▄▆▄▅▃▆▆▆▇▅▆▅█▅▇▃
seed_123_episode,▁▁▁▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇█
seed_123_reward,▁▁▁▁▅▄▂▂▅▁▃▄▅▅▄▆▄▅▄▅▄▅▅▅▅▇▅▅▆▇▆▆█▇▆█▆▆▆▇
seed_42_episode,▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▆▆▆▆▆▆▆▇▇▇█████
seed_42_reward,▁▁▂▂▂▃▃▄▃▄▄▃▄▄▃▄▄▄▄▄▅▄▂▄▄▅▆▅▅▅▆▆▅▇▆▅█▇▆█
seed_456_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇██
seed_456_reward,▁▂▁▁▁▂▃▃▃▂▄▄▅▆▃▃▃▃▄█▃▆▄▅▅▅▇▃▅▅█▅▇▅▆▇██▇▇
seed_789_episode,▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇████

0,1
average_reward_across_seeds,280.072
seed_101_episode,999.0
seed_101_reward,170.0
seed_123_episode,999.0
seed_123_reward,304.0
seed_42_episode,999.0
seed_42_reward,380.0
seed_456_episode,999.0
seed_456_reward,321.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: 8iyb6f5t with config:
[34m[1mwandb[0m: 	alpha: 0.20241954640986257
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 10
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 456
[34m[1mwandb[0m: 	temperature: 0.19568233972860188


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇██
seed_101_reward,▃▅▂▃▂▂▂▂▂▁▂▂█▂▂▁▃▁▂▁▂▃▂▂▁▂▄▄▂▇▂▃▁▂▄▂▁▁▄▂
seed_123_episode,▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇█
seed_123_reward,▂▃▃█▃▄▃▁▂▁▂▁▃▁▁▃▃▂▄█▂▄▃▁▂▂▁▂▁▂▃▂▄▂▂▁▂▃▁▃
seed_42_episode,▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▄▄▄▄▅▅▆▆▆▆▆▇▇▇▇▇██
seed_42_reward,██▃▃▅▄▆▇▆▂▄▆▆▆▆▆▃▄▃▂▂▁▇▆▅▄███▂▁▄▄▃▃▃▃▄▄▃
seed_456_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇██
seed_456_reward,▃▂▆▃▅▂▃▂▃▃▂▃▁▆▃▃▂▂▂▂▂█▄▃▂▂▃▃▂▁▂▂▁▃▃▂▃▂▂▂
seed_789_episode,▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▅▅▅▆▆▆▆▆▆▆▆▇▇▇████

0,1
average_reward_across_seeds,43.452
seed_101_episode,999.0
seed_101_reward,41.0
seed_123_episode,999.0
seed_123_reward,27.0
seed_42_episode,999.0
seed_42_reward,33.0
seed_456_episode,999.0
seed_456_reward,31.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: g4zglvqg with config:
[34m[1mwandb[0m: 	alpha: 0.4666182852776158
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 10
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	temperature: 0.18387582913031


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇██
seed_101_reward,▃█▅█▆▃▆▄▇▃▂▄▆▁▆▃█▃▄▄▃▂▂▁▁▁█▄▃▁▆▆▄▇▂▁▂▁▃▇
seed_123_episode,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
seed_123_reward,▃▁▁▃▁▁▁▁▁▁▄▂▃▂▁▃▃▃▁▄▁▃▁▁█▁▁▃▁▃▃▄▃▃▃▇▁▃▃▁
seed_42_episode,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇█
seed_42_reward,▁▁▁▃▁▁▅▂▄▃▂▃▃▂▃▅▂▅▅▁▁▁▁▁▂▂▁▃▆▄▃▄▆█▄▅▁▂▁▁
seed_456_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇███
seed_456_reward,▂▁▂▁▂▂▂▁▁▁▂▁▂▃▁▂▁▂▁▂▁▃▂▅▂▁▂▁▃▃▂▂▂▁▅▂█▂▁▃
seed_789_episode,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▇▇▇█████

0,1
average_reward_across_seeds,26.3
seed_101_episode,999.0
seed_101_reward,32.0
seed_123_episode,999.0
seed_123_reward,10.0
seed_42_episode,999.0
seed_42_reward,13.0
seed_456_episode,999.0
seed_456_reward,14.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: eyrkvs1j with config:
[34m[1mwandb[0m: 	alpha: 0.3546081740770661
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	temperature: 1.4419055738082442


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇█
seed_101_reward,▁▂▄▂▄▄▄▃▂▃▂▂▄▃▆▆▅▇▅▆▄█▄▂▄▄▄▄▅▅▅▅▁▇▆▆▇▇▅▅
seed_123_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▇▇▇█
seed_123_reward,▁▁▃▃▅▄▅▂▂█▂▂▂▂█▂▂▂▅▂▅▂▅▃▂▂▄▄▂▄▅▅▃▇▅▂▄▂▂▆
seed_42_episode,▁▁▁▁▂▂▃▃▃▃▃▃▃▃▃▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇█
seed_42_reward,▁▁▁▃▂▃▄▂▄▃▃▂▃▃▄▃▂▄▅▆▅█▃▃▃▃▃▃▄▄▃▅▅▃▂▆▄▃▄▁
seed_456_episode,▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇█
seed_456_reward,▁▁▃▃▃▃▁▄▃▆▃▆▃▂▁▄▁▅▄▆▆▄▂▄█▃▄▅▆▅▁▄▇▄▁▂▄▄▄▃
seed_789_episode,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇█

0,1
average_reward_across_seeds,152.32
seed_101_episode,999.0
seed_101_reward,135.0
seed_123_episode,999.0
seed_123_reward,257.0
seed_42_episode,999.0
seed_42_reward,188.0
seed_456_episode,999.0
seed_456_reward,163.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: 069c27em with config:
[34m[1mwandb[0m: 	alpha: 0.34777063004694847
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 789
[34m[1mwandb[0m: 	temperature: 1.7747857705186434


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
seed_101_reward,▂▁▃▁▂▃▄▃▇▆▄▄▂▃▂▇▆▆▅▄▆▃▃▃▁█▅▆▂▃▃▆▄▃▆▆▆▅▁▇
seed_123_episode,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇███
seed_123_reward,▁▁▂▂▃▂▃▄▃▃▅▂▃▃▃▅▃▃▃▄▄▅▄▄▃▄▄▃█▂▃▃▄█▃▄▄▃▃▄
seed_42_episode,▁▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
seed_42_reward,▁▁▁▁▂▂▂▃█▃▄▂▇▃▃▄▃▃▃▃▃▃▃▅▃▃▃▃▃▃▃▃▃▃▃▄▄▃▃▃
seed_456_episode,▁▁▁▁▁▂▂▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇▇███
seed_456_reward,▁▁▁▃▁▂▃▄▃▄▃▄▂▄▁▄▄█▄▂▁▄▄▄▆▃▆▃▂▅▅▅▂▂▄▃▁▄▅▁
seed_789_episode,▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇█

0,1
average_reward_across_seeds,197.752
seed_101_episode,999.0
seed_101_reward,356.0
seed_123_episode,999.0
seed_123_reward,428.0
seed_42_episode,999.0
seed_42_reward,150.0
seed_456_episode,999.0
seed_456_reward,200.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: 1cigmz5a with config:
[34m[1mwandb[0m: 	alpha: 0.48213931162854623
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 10
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 101
[34m[1mwandb[0m: 	temperature: 1.1380605315964518


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇█████
seed_101_reward,▁▂▃▅▄▂▅▅▅▄▅▅▅▄▅▅▅▅▆▇▆▇▇▆▆▅▆▆█▅▄▇▆▇█▆▆▆█▃
seed_123_episode,▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇██
seed_123_reward,▁▂▁▄█▂▃▃▄▅▄▄▄▄▂▆▄▅▄▄▅▅█▅▅▅▄▅▅▅▄▅▅▅▅▆▅▅▄▄
seed_42_episode,▁▁▁▁▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇██
seed_42_reward,▁▁▂▃▃▃▄▃▅▅▃▃▃▃▃█▃▄▆▃▆▄▇▃▃▄▃▃▃█▄▃▄▄▆▄▇▅▄█
seed_456_episode,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
seed_456_reward,▂▅▁▁▁▃▂▄▃▅▅▅▃▅▄▄▆▂▅▂█▇▄▇▇▂▂▄███▃█▄▇▃█▃█▂
seed_789_episode,▁▁▁▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇████

0,1
average_reward_across_seeds,128.068
seed_101_episode,999.0
seed_101_reward,249.0
seed_123_episode,999.0
seed_123_reward,237.0
seed_42_episode,999.0
seed_42_reward,58.0
seed_456_episode,999.0
seed_456_reward,28.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: oprnk0hq with config:
[34m[1mwandb[0m: 	alpha: 0.41572333392336647
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 123
[34m[1mwandb[0m: 	temperature: 0.18223575762639305


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇██
seed_101_reward,▁▂▁▂▂▂▁▁▃▂▂▂█▃▂▂▃▁▂▁▁▂▁▂▂▂▂▁▁▂▁▁▁▁▁▃▂▃▁▂
seed_123_episode,▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇██
seed_123_reward,▁▂▂▂▁▄▂█▁▂▂▁▂▄▃▇▂▂▄▃█▅▂▃▁▂▃▁▂▃▁▂▁▁▂▁▁▇▁▂
seed_42_episode,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇████
seed_42_reward,▂▂▁▆▄▄▃▂▂▄▄▁▄▃▂▂▂▄▂▅▅▅▃▂▆▃▁▂▄▁▅▃▂▂▂▅▄▄▃█
seed_456_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇██
seed_456_reward,▁▃▃▃▃▁▄▃▄▄▁▅▂▂▅▃▅▂▃█▁▇▁▆▂▃▁▅▁▄▅▂▂▇█▃▁▃▃▂
seed_789_episode,▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇██

0,1
average_reward_across_seeds,25.42
seed_101_episode,999.0
seed_101_reward,18.0
seed_123_episode,999.0
seed_123_reward,21.0
seed_42_episode,999.0
seed_42_reward,19.0
seed_456_episode,999.0
seed_456_reward,67.0
seed_789_episode,999.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 93qce5b3 with config:
[34m[1mwandb[0m: 	alpha: 0.31053236216600677
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 10
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 456
[34m[1mwandb[0m: 	temperature: 1.0909228416110373


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇███
seed_101_reward,▁▃▄▄▅▄▄▄▅▁▅▃▆▅▆▆▄▄▆▆▂▅▆▅▇▄▆▇▆▅▆█▇▅▆█▇▆▄▆
seed_123_episode,▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇████
seed_123_reward,▁▂▁▄▃▄▄▂▄▄▄▄▄▄▄▆▄▄▄▅▃▄▅▅▅▆▄▅▄▅▄▆█▅▄▇█▆▆▄
seed_42_episode,▁▁▁▁▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇███████
seed_42_reward,▆▁▁▂▁▂▁▂▂▁▁▂▂▂▂▂▂▂▁▂▂▂▂▁▂▁▂▂▂▂▂▂▃▅▃▃▄█▆▅
seed_456_episode,▁▁▁▁▁▁▁▁▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇███
seed_456_reward,▃▁▁▂▁▃▂▄▂▂▃▃▄▃▂▃▂▂▂▄▂▇▃▄▄▂▁▂▂▂▁▂█▂▂▂▂▁▃▂
seed_789_episode,▁▁▁▂▂▂▂▂▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇██████

0,1
average_reward_across_seeds,75.8
seed_101_episode,999.0
seed_101_reward,94.0
seed_123_episode,999.0
seed_123_reward,68.0
seed_42_episode,999.0
seed_42_reward,77.0
seed_456_episode,999.0
seed_456_reward,103.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: moupepjb with config:
[34m[1mwandb[0m: 	alpha: 0.12076396041988616
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 456
[34m[1mwandb[0m: 	temperature: 1.8013764232782523


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇███
seed_101_reward,▁▁▁▁▂▁▁▁▁▂▅▄▅▅▃▅▅▆▅▅▄▄▅▅▆▅▅█▅██▃▆▆▇█▇▆▇▆
seed_123_episode,▁▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇█████
seed_123_reward,▂▁▁▂▁▂▂▂▆▃▃▃▄▄▃▄▅▃▄▄▇▂▂▅▆█▅▆█▅▆▃▄▄▆▄▄▅▄▂
seed_42_episode,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇█████
seed_42_reward,▁▁▁▁▁▂▂▁▃▂▄▃▃▅▆▃▄▅▅▆▅▆▅▇▆▆▂▆▇▅▇▅▅██▇▇▆▅▇
seed_456_episode,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▇▇▇▇▇▇▇███
seed_456_reward,▄▂▂▁▁▄▃▁▃▆▅▂▅▅▃▅▅▄▆▅▆▃▅▆▆▅▅▃▅▅▆▄██▆▆▅▇▇▆
seed_789_episode,▁▁▁▁▁▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████

0,1
average_reward_across_seeds,183.208
seed_101_episode,999.0
seed_101_reward,114.0
seed_123_episode,999.0
seed_123_reward,139.0
seed_42_episode,999.0
seed_42_reward,201.0
seed_456_episode,999.0
seed_456_reward,153.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: wzm031j9 with config:
[34m[1mwandb[0m: 	alpha: 0.3642381172417675
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 123
[34m[1mwandb[0m: 	temperature: 1.5064733102434136


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▂▂▂▂▂▂▃▃▃▄▄▄▄▄▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇████
seed_101_reward,▁▁▂▂▂▅▄▃▄▅▆▆▅▅▆▅▆▇▅▆▄▅▆▅▄██▄▄▄▄▆█▄▇▄▅▅▇▆
seed_123_episode,▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇███
seed_123_reward,▂▂▂▂▅▃▇▅▅▁█▅▅▆▅▆▆▆▆▆▅▄▅▅▅█▄▆▆▆▆▅█▄▆▆▆▅▅█
seed_42_episode,▁▁▁▁▁▁▂▂▂▂▃▃▃▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇█████
seed_42_reward,▁▁▁▁▁▁▃▃▅▃▅▆▃▆▇█▅▃█▅██▂█▇▃██▂▄▄▆▄█▇▄▂▅▂▂
seed_456_episode,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
seed_456_reward,▁▁▁▁▁▁▁▃▃▁▇▄▃▆▃▃▆▆▃▄▄▄▆▃▅▅▅▄▅▃▄▅▇▆▅▃█▅▄▅
seed_789_episode,▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇██████

0,1
average_reward_across_seeds,197.204
seed_101_episode,999.0
seed_101_reward,208.0
seed_123_episode,999.0
seed_123_reward,274.0
seed_42_episode,999.0
seed_42_reward,105.0
seed_456_episode,999.0
seed_456_reward,158.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: j91mhop2 with config:
[34m[1mwandb[0m: 	alpha: 0.2229142362500994
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 789
[34m[1mwandb[0m: 	temperature: 1.361179947089153


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇██
seed_101_reward,▁▁▁▁▁▁▁▁▂▃▁▂▂▂▂▃▂▂▂▅▂▂▂▂▃▂▂▂▃▁▂▂▃▃▃▂█▃▂▄
seed_123_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇████
seed_123_reward,▅▁▁▁▁▅▅▂▂▄▂▇▂▆█▂▄▄▆▃▄▂▇▂▂▄▃▅▃█▃▄▃▃▂▂▆█▆▅
seed_42_episode,▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇█████
seed_42_reward,▁▁▁▁▂▁▆▂▃▇▄█▇▄▇▇▄▄▄▄█▇▅▅▅▃█▄▄▅█▆▅▄▅▄▃▇▅▄
seed_456_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇▇██
seed_456_reward,▁▁▁▁▁▁▃▁▁▃▃▄▄▄▄▁▅▁▄▃▂▄▄▃▄▂▅▄▄▂▃▂▆█▂▂▇▄▄▇
seed_789_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███

0,1
average_reward_across_seeds,192.5
seed_101_episode,999.0
seed_101_reward,278.0
seed_123_episode,999.0
seed_123_reward,299.0
seed_42_episode,999.0
seed_42_reward,500.0
seed_456_episode,999.0
seed_456_reward,108.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: zzibgr7g with config:
[34m[1mwandb[0m: 	alpha: 0.3896125835857428
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 101
[34m[1mwandb[0m: 	temperature: 0.8773297098601173


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇█
seed_101_reward,▃▁▇▅▇▅▇▃▄█▇▇▇▆▅▂▃▃▂▂▅▄█▆▅▆▂▄▅▂▂▃▇▃▆▅▄▅▃▄
seed_123_episode,▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▆▆▆▆▇▇▇▇▇▇▇███
seed_123_reward,▁▁▄▁▃▁▁▄▃▇▃▄▅▂▂▂▄▁▅▂▄▂▃▃▃▁▄▄█▄▂▂▁▄▇▃▄▄▅▁
seed_42_episode,▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇████
seed_42_reward,▂▃▂▂▂▁▁▃▅▄▂▃▄▄▅██▄▃▅▅▆▇▂▇▃▄▄▅▂▅▄▂▅▅▅▆▂▃▆
seed_456_episode,▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇███
seed_456_reward,▁▂▃▃▃▄▃▄▄▄▄▃▄▃▃▆▄▂▄▄▄▄▄▂▅▅▅▅▃▄▄▄▄▄▄█▄▅▄▃
seed_789_episode,▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇████

0,1
average_reward_across_seeds,107.352
seed_101_episode,999.0
seed_101_reward,77.0
seed_123_episode,999.0
seed_123_reward,172.0
seed_42_episode,999.0
seed_42_reward,11.0
seed_456_episode,999.0
seed_456_reward,108.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: 36ohph4f with config:
[34m[1mwandb[0m: 	alpha: 0.2791261845092483
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 10
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 789
[34m[1mwandb[0m: 	temperature: 1.9076240018838009


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇█
seed_101_reward,▁▁▂▃▃▁▅▅▁▂▅▃▂▅▄▅▃▅▄▆▅▄▅▆▃▆▃▆▅▆▅█▇▅▄▅▆▇▃▆
seed_123_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇██
seed_123_reward,▁▂▁▂▅▄▁▆▂▄▂▄▅▄▃▅▃▄▃█▂▅▄▅▃▄▆▇▇▅▇▅▂▆▆▆▇▇▆▆
seed_42_episode,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇██
seed_42_reward,▁▁▂▃▂▂▄▂▁▄▃█▅▄▆▇█▇▇██▆▆▇▆▇▆█▇▇▇██▇▇▇█▇█▇
seed_456_episode,▁▁▁▁▂▂▂▂▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇█
seed_456_reward,▁▄▅▇▁▄▅▅▃▃▄▃▃▅▄▆▄▄▄▆▆▆▇▄▄▆▇█▅▅▆▄▅▆▅▅▆█▆▅
seed_789_episode,▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████

0,1
average_reward_across_seeds,137.772
seed_101_episode,999.0
seed_101_reward,128.0
seed_123_episode,999.0
seed_123_reward,133.0
seed_42_episode,999.0
seed_42_reward,183.0
seed_456_episode,999.0
seed_456_reward,133.0
seed_789_episode,999.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: yqbunrxg with config:
[34m[1mwandb[0m: 	alpha: 0.05199434545496757
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 10
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 456
[34m[1mwandb[0m: 	temperature: 1.9993890270127976


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇█
seed_101_reward,▁▂▂▁▁▂▂▁▁▁▂▂▁▂▃▂▃▁▂▄▂▄▄▃▄▂▂▂▆▄█▄▄▄▃▇▇▃▄▄
seed_123_episode,▁▁▁▁▁▂▂▂▂▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
seed_123_reward,▁▁▁▁▂▁▁▁▂▂▁▂▂▂▁▁▂▁▁▂▂▂▃▂▂▁▄▂▃▃▂▂▂▂▂▃▂▂▂█
seed_42_episode,▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇███
seed_42_reward,▂▁▁▁▁▂▂▁▁▁▂▂▂▁▂▁▁▃▁▂▂▃▄▃▂▁▂▃▃▃▂▃▃▃▂█▂▃▂▂
seed_456_episode,▁▁▁▁▂▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▇▇▇▇▇▇▇██████
seed_456_reward,▁▁▁▁▁▂▂▂▂▂▃▂▂▂▂▃▃▁▃▂▅▂▂▃▂▅▂▂▃▂▄▅▂▃▅▄▄▃▄█
seed_789_episode,▁▁▁▂▂▂▃▃▃▃▃▃▃▃▃▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇████

0,1
average_reward_across_seeds,128.244
seed_101_episode,999.0
seed_101_reward,124.0
seed_123_episode,999.0
seed_123_reward,355.0
seed_42_episode,999.0
seed_42_reward,148.0
seed_456_episode,999.0
seed_456_reward,55.0
seed_789_episode,999.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: g7y683h0 with config:
[34m[1mwandb[0m: 	alpha: 0.1392560577209923
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 10
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 123
[34m[1mwandb[0m: 	temperature: 1.977586041920268


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇███
seed_101_reward,▁▁▁▁▁▁▁▁▁▂▂▂▂▁▂▃▄▅▄▂▃▂▂▂▃█▂▂▃▄▂▂▇▃▅▄▃▃▂▂
seed_123_episode,▁▁▁▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇██████
seed_123_reward,▁▂▁▁▃▂▁▁▁▂▃▁▂▃▃▃▃▄▄▆▄▅▄▄▃▄▅▆▃▆▆▅▅▅█▇▇▇▅▄
seed_42_episode,▁▁▁▁▁▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇██
seed_42_reward,▁▁▁▁▁▁▁▁▂▂▄▂▄▂▂▂▄▃▃▂▄▄▃▃▅▃▅▂▅▄▃▄▃▃▇▄▄█▅▆
seed_456_episode,▁▁▁▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇███
seed_456_reward,▁▁▂▂▁▂▆▆▃▅▃▃▃▃▃▄▂▂▃▆▃▂▂▃▄▃▂▇▆▄▄▄▃█▃▄▅▇▃▅
seed_789_episode,▁▁▁▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇████

0,1
average_reward_across_seeds,170.6
seed_101_episode,999.0
seed_101_reward,199.0
seed_123_episode,999.0
seed_123_reward,180.0
seed_42_episode,999.0
seed_42_reward,188.0
seed_456_episode,999.0
seed_456_reward,120.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: mqd2un6f with config:
[34m[1mwandb[0m: 	alpha: 0.333579133119166
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 101
[34m[1mwandb[0m: 	temperature: 1.551547752589765


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇███████
seed_101_reward,▁▁▂▂▂▄▃▂▂▂▂▃▄▂█▄▂▂▂▃▄▃▂▂▂▂▂▄▄▃▃▅▃▄▂▄▂▆▃▄
seed_123_episode,▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇██
seed_123_reward,▁▂▁▂▁▂▄▆▃▃▂▄▂▅▃▃▅▃▃▃▃▃█▃▂▃▃█▂▂▂▂▃▃▃▄█▄▂▇
seed_42_episode,▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇██
seed_42_reward,▁▁▂▁▃▁▂▃▄▁▆▄▄▅▄▂▂▅▇▆▇▃▆▆▁▅▃▆▅▅▅▆▇▅▅▃▇▅▃█
seed_456_episode,▁▁▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
seed_456_reward,▂▁▁▁▄▄▃▄▂▄▂▄▁▄▂▄▄▄▅▃▆▁▅▄▅▄▅▃▅█▅▅▅▄▂▄▂▂▆▅
seed_789_episode,▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇█

0,1
average_reward_across_seeds,172.82
seed_101_episode,999.0
seed_101_reward,262.0
seed_123_episode,999.0
seed_123_reward,172.0
seed_42_episode,999.0
seed_42_reward,148.0
seed_456_episode,999.0
seed_456_reward,156.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: 67ufsz7j with config:
[34m[1mwandb[0m: 	alpha: 0.386837466898987
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 101
[34m[1mwandb[0m: 	temperature: 0.46609176174595435


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
seed_101_reward,▂▃▂▂▇▂▃▁▃▅▂▅▃▂▁▁▇█▂▂▁█▃▂▂▂▁▁▂▃▆▇▃▄▁▄▆▂▂▂
seed_123_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
seed_123_reward,▁▁▁▁▂▂▁▅▁▁▂▁▃▁▄▂█▁▁▂▁█▁▁▁▆▁▁▁▁▁▁▁▁▇▁▁▁▁█
seed_42_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇███
seed_42_reward,▁▄▁▅▁▄▄▄▃▄▁▃█▁▄▁▁▁▁▂▃▂▁▁▆▁▄▁▆▁▁▄▄▃▁▅▁▁▄▁
seed_456_episode,▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
seed_456_reward,▂█▃▁▅▃▄▃▃▇▄█▁▂▃▂▂▅▂▂▂▃▄▄▃▃▃▆▄▂▄▃▄▁▄▂▁▂▄▃
seed_789_episode,▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
average_reward_across_seeds,54.78
seed_101_episode,999.0
seed_101_reward,69.0
seed_123_episode,999.0
seed_123_reward,31.0
seed_42_episode,999.0
seed_42_reward,67.0
seed_456_episode,999.0
seed_456_reward,34.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: gq11esae with config:
[34m[1mwandb[0m: 	alpha: 0.16966668634830356
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	temperature: 0.9583772547918896


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇██
seed_101_reward,▁▃▁▅▄▃▄▃▂▄▃▃▄▄▂▃▄▃▂▂▃▄▆▂▃▃▄▄▂▃▃▃▆▄▃▅▄▄█▄
seed_123_episode,▁▁▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
seed_123_reward,▃▁▃▃▂▃▂▃▄▅▄▂▆▄▃▆▇▆▅▄██▇▅▇▇▅▇▇▇▅▆▆▅▆▆█▆▆▅
seed_42_episode,▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇███
seed_42_reward,▁▁▄▆▄▅▇▃▂▄▄▄▅▅▄▄▅▆▆▆▆▅▅▅▅▅▅▅▅▃▆█▅▆▆▅▅▅▆█
seed_456_episode,▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇████
seed_456_reward,▁▁▂▂▁▁▂▃▂▂▃▄▅▂▃▂▃▂▄▂▄▄▄▄▂▄█▄▃▃▄▃▂▄▄▄▂▃▃▂
seed_789_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇████

0,1
average_reward_across_seeds,85.136
seed_101_episode,999.0
seed_101_reward,73.0
seed_123_episode,999.0
seed_123_reward,94.0
seed_42_episode,999.0
seed_42_reward,83.0
seed_456_episode,999.0
seed_456_reward,73.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: psrp1fnq with config:
[34m[1mwandb[0m: 	alpha: 0.49471549249337143
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 10
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	temperature: 0.5109684485299866


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇███
seed_101_reward,▂▁▁█▂▃▄▅█▃▂▅▂▅▂▂▅▄▄▂▃▂▃▁▅▃▃▃▃▃▃▄▁▂▂▁▃▂▄▁
seed_123_episode,▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇██
seed_123_reward,▂▅▁▁▁▅█▁▄▁▁▅▄▅▅▄▅▁▄▅▄▁▄▄▁▅▁▁▄▄▄▄▁▁▁▁▁▄▄▄
seed_42_episode,▁▁▁▁▁▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇████
seed_42_reward,▃▁▂▂▄▁▃▁▂▃▃▁▂▁▃▂▆▆▄▆▂▅▅▆███▇▇▅▇▄▇▅▄▅▆▄▆█
seed_456_episode,▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▅▅▅▅▅▅▅▆▆▇▇▇▇▇█████
seed_456_reward,▃▃▃▁▂▂▁▂▂▃▃▃▅▂▃█▃▂▃▄▃▃▃▃▂▃▆▂▇▅▃▃▁▃▂▆▃▆▆▁
seed_789_episode,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇█████

0,1
average_reward_across_seeds,37.26
seed_101_episode,999.0
seed_101_reward,41.0
seed_123_episode,999.0
seed_123_reward,52.0
seed_42_episode,999.0
seed_42_reward,27.0
seed_456_episode,999.0
seed_456_reward,16.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: l70dlc0w with config:
[34m[1mwandb[0m: 	alpha: 0.06531826328093572
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 456
[34m[1mwandb[0m: 	temperature: 1.4800694662112472


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇█
seed_101_reward,▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▂▂▁▂▂▂▁▂▂▂▂▂▃▂▃▃▃▂▂▃▄█▂▂▂
seed_123_episode,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇█████
seed_123_reward,▂▂▁▁▂▁▂▂▁▁▃▁▁▁▃▁▄▁▂▁▁▁▆▂▅█▅▆▂▂▆▄▇▆▂▆▆▅▃▇
seed_42_episode,▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▇▇███
seed_42_reward,▁▁▁▁▂▁▂▂▃▂▂▁▂▂▁▂▁▂▂▂▁▂▂▂▂▃▂▄▂▂▂▄▂▂▅▆█▅▃▄
seed_456_episode,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇███
seed_456_reward,▂▂▁▂▁▁▂▁▂▃▂▁▁▃▃▁▁█▂▁▅▂▃▆▂▇▆▁▃▃▃▆▂▂█▄▅▄▁▇
seed_789_episode,▁▁▁▁▂▂▂▂▂▃▃▄▄▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇██

0,1
average_reward_across_seeds,120.896
seed_101_episode,999.0
seed_101_reward,214.0
seed_123_episode,999.0
seed_123_reward,77.0
seed_42_episode,999.0
seed_42_reward,120.0
seed_456_episode,999.0
seed_456_reward,500.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: 874ik242 with config:
[34m[1mwandb[0m: 	alpha: 0.3979447687895547
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	temperature: 1.4011001965489205


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▅▅▅▅▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇████
seed_101_reward,▄▃▅▅▄▅▄▅▆▆▃▄▅▁▆▄▃█▃▂▆▆▄▆▆▇▇▇▂▅▅▆▆▃▂▃▄▄▄▂
seed_123_episode,▁▁▁▂▂▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇███
seed_123_reward,▁▁▂▂▂▂▅▃▃▃▃▂▄▂▃▃▂▃▂▂▃▃▆▄▃▂▆█▅▄▅▃▃▆▃▄▅▅▅▃
seed_42_episode,▁▁▁▁▁▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇███
seed_42_reward,▁▂▁▃▃▁▄▃▂▄▅▄▁▄▂▅▅▅▄▇▅▄█▆▇▅▅▇▄▅▅▆▆▇▇▆█▇▄▆
seed_456_episode,▁▁▁▂▂▂▃▃▃▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇██████
seed_456_reward,▁▃▁▄▄▃▃▅▃▃▃▃▄▂▄▃▅▄▃▃▆▂▆▇▂▃▇▃██▄▃▇█▄▃▇▇█▃
seed_789_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇███

0,1
average_reward_across_seeds,172.408
seed_101_episode,999.0
seed_101_reward,105.0
seed_123_episode,999.0
seed_123_reward,197.0
seed_42_episode,999.0
seed_42_reward,164.0
seed_456_episode,999.0
seed_456_reward,189.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: 3p5rpn0z with config:
[34m[1mwandb[0m: 	alpha: 0.14601429753764594
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 10
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 456
[34m[1mwandb[0m: 	temperature: 0.16017220106342803


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇██
seed_101_reward,▃▄▁▃█▂▄▂▄▃▇▂▃▂▂▄▂▂▂▃▄▃▁▂▃▃▅▅▃▃▂▄▂▃▂▁▄▄▂▂
seed_123_episode,▁▁▁▁▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇███
seed_123_reward,█▅▂▁▃▅▄▃▄▂▃▂▄▄▃▁▁▅▃▃▆▃▂▂▃▁▄▄▄▂▃▃▄▃▃▂▃▁▂▂
seed_42_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇████
seed_42_reward,▁▁▂▃▃▃▂▃▁▂▃▅▂▁█▂▄▃▂▂▁▂▂▃▄▂▁▂▄▃▂▄▄▃▂▄▂▃▅▇
seed_456_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇███
seed_456_reward,▆▁▆▄▅▇▃▇▃▃▅▄▃▂▆▂▇▂▂█▆█▆▅▃▂▃▃▆▇▃▁▂▂▃▁▂▅▂▆
seed_789_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
average_reward_across_seeds,25.868
seed_101_episode,999.0
seed_101_reward,41.0
seed_123_episode,999.0
seed_123_reward,10.0
seed_42_episode,999.0
seed_42_reward,20.0
seed_456_episode,999.0
seed_456_reward,17.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: 8vkq226x with config:
[34m[1mwandb[0m: 	alpha: 0.4109515037326622
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	temperature: 0.8215464949882635


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇█
seed_101_reward,▁▁▅▂▅▂▆▁▂▅▂▆▃▆▁▂▁▁▁▅▆▂▆▁▇▁▂▄▂▅▁▂▅▂▄██▁▂█
seed_123_episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇██
seed_123_reward,▃▂▁▆▄▆▆▆▅▃▂▇▁▃▃▁▆▂▆▁▁▂▆▇▄▄▁▃▅█▂▂█▆▃▇▄▇▅▄
seed_42_episode,▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
seed_42_reward,▁▁▁▁▆▁▃▄▅▅▂▄▄▃▆▂▁▂▅▁▂▂█▆▆▃▆▂▄▃▇▁▄▆▇▂▆▄▃▁
seed_456_episode,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇█████
seed_456_reward,▃▃▃▄▅▂▁▁▃▄▄▂▃▁▄▄▁▄▂▁▁▁▄▃▁▁▄▁▄▁▁▁▂▁▇▁▄▅▁█
seed_789_episode,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▆▆▇▇▇███

0,1
average_reward_across_seeds,99.02
seed_101_episode,999.0
seed_101_reward,36.0
seed_123_episode,999.0
seed_123_reward,57.0
seed_42_episode,999.0
seed_42_reward,97.0
seed_456_episode,999.0
seed_456_reward,29.0
seed_789_episode,999.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: rx6i5qud with config:
[34m[1mwandb[0m: 	alpha: 0.3681456272848385
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 789
[34m[1mwandb[0m: 	temperature: 0.5991713826663416


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███
seed_101_reward,▁▄▅▄▇▄▄▃▄▆▄▇▇▅▄▆▄▇▅▄▅▃▆▄▆▄▃█▆▄▅▄▇█▆▅▄▂▆▅
seed_123_episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▆▆▆▆▆▆▆▇▇▇▇▇▇██████
seed_123_reward,▄▄▄▄▁▃▃▄▄▄█▇▄▅▄▆▇▄▃▇▄█▃▆▄▅▃▇▅▆▅▅▁▄▅▅▅▇▆▄
seed_42_episode,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
seed_42_reward,▂▄▄▅▅▅▅▅▄▅▅▅▄▅▅▅▅▄▅▆▅▅▅▇▆▅▃▅▅▅▁▇▆▅▆▆▂▇█▄
seed_456_episode,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
seed_456_reward,▄▃▂▂▄▂▂▂▃▁▂▂▁▇▆▂▃▁▃▄▁▂▂▂▂▂▇▁▂▄▄▂▃▂▃█▂▂▂▂
seed_789_episode,▁▁▁▁▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇███

0,1
average_reward_across_seeds,73.172
seed_101_episode,999.0
seed_101_reward,63.0
seed_123_episode,999.0
seed_123_reward,107.0
seed_42_episode,999.0
seed_42_reward,116.0
seed_456_episode,999.0
seed_456_reward,23.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: 9fdbyi9q with config:
[34m[1mwandb[0m: 	alpha: 0.2517672419831519
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 456
[34m[1mwandb[0m: 	temperature: 1.288219813610793


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇███
seed_101_reward,▁▁▁▂▄▅▄▄▄▅▆▇▅▃▄▅▆▆▆▆▅█▆▅▆▅▇▄▆▆▅▇▆▄▆▄▄▆▄▇
seed_123_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇██
seed_123_reward,▁▁▁▁▃▂▂▃▃▃▂▃▃▃█▅▃▂▃▃▄▃▃▃▂▄▄▃▃▃▃▄▃▄▃▃▅▃▃▃
seed_42_episode,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▆▇▇█
seed_42_reward,▁▂▁▂▁▁▂▁▄▁▅▇▁██▄█▁▁▅▁▁▁▇▂▄▃▅▂▃█▃▁▃▅▅▃▆▁▄
seed_456_episode,▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇███
seed_456_reward,▁▁▂▂▃▄▃▆▂▂▂▆▃▃▃▃▄▃▄▄▃▃▆▂█▃▃▃▃▄▄▄▅▅▄▅▄▃▄▄
seed_789_episode,▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▇▇▇▇▇█████

0,1
average_reward_across_seeds,148.248
seed_101_episode,999.0
seed_101_reward,151.0
seed_123_episode,999.0
seed_123_reward,130.0
seed_42_episode,999.0
seed_42_reward,148.0
seed_456_episode,999.0
seed_456_reward,137.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: odzw5f3n with config:
[34m[1mwandb[0m: 	alpha: 0.4347247014688747
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 101
[34m[1mwandb[0m: 	temperature: 1.3223136078270197


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▂▂▂▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇█
seed_101_reward,▃▂▂▄▂▃▄▃▄▅▅▂▂▆▄▅▂▂▄▅▁▂▃▆▅▃▆▅▆▂▂▆▆▄▃█▇▄▆▆
seed_123_episode,▁▁▁▁▂▂▂▃▃▃▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇████
seed_123_reward,▅▁▃▃▃▁▃▃▁▃▃▄▅▃▄▄▁▃▅▅▇▆▅▆▅▅▂▅▂▆▂▇▄▃█▅▅▇▄▇
seed_42_episode,▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇███
seed_42_reward,▁▂▂▅▁▆▄▆▆▅▆▇▆▃▆▄▁▄▇▆▃▇▆▆▇▆▇█▄▆▅▆▂▇▆▅▄▂▇▅
seed_456_episode,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇█
seed_456_reward,▁▁▂▁▂▃▂▄▃▃▃▃▂▅▄▅▄▅▄▃▆▅▅▄▅▆▄▆▇▇▆▆▆▆▇█▆▆▆█
seed_789_episode,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇███

0,1
average_reward_across_seeds,129.532
seed_101_episode,999.0
seed_101_reward,105.0
seed_123_episode,999.0
seed_123_reward,119.0
seed_42_episode,999.0
seed_42_reward,329.0
seed_456_episode,999.0
seed_456_reward,114.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: t2djtvjq with config:
[34m[1mwandb[0m: 	alpha: 0.10139552335799638
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 456
[34m[1mwandb[0m: 	temperature: 0.19818025379246215


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▂▂▂▂▂▂▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇█
seed_101_reward,▇▆▃▃▁▂▅▄█▄▂▂▄▄▃▂▂▂▂▂▆▂▂▃▂▂▃▂▄▂▂▂▁▁▂▁▄▁▂▂
seed_123_episode,▁▁▁▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
seed_123_reward,▁▁▄▁▁▁▄▁▆▁▃▅▄▁▅▁▁█▁▁▆▁▁█▅▄▄▄▅▁▃▁▄▂▄▅▁█▄▄
seed_42_episode,▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▅▅▅▅▆▆▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇█
seed_42_reward,▄▁▅▃▆█▂▃▃▂▂▂▄▆▅▃▂▂▅▁▃▅▂▂▃▆▃▄▄▅▅▆▄▁▅▂▂▃▅▆
seed_456_episode,▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇████
seed_456_reward,▄▂▄█▃▁▃▂▂▂▃█▁▃▂▁▂▁▂▄▂▄▅▂▂▂▂▁▂▇▁▂▂▂▁▃▁▄▂▆
seed_789_episode,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
average_reward_across_seeds,47.068
seed_101_episode,999.0
seed_101_reward,21.0
seed_123_episode,999.0
seed_123_reward,13.0
seed_42_episode,999.0
seed_42_reward,117.0
seed_456_episode,999.0
seed_456_reward,25.0
seed_789_episode,999.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: luvqp10a with config:
[34m[1mwandb[0m: 	alpha: 0.3586636165420974
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 456
[34m[1mwandb[0m: 	temperature: 1.4301473186110298


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▅▆▆▆▆▇▇███
seed_101_reward,▂▁▁▂▂▃▄▄▅▆▇▅▆▇▆▇▆▆█▆██▆▆█▇▇▇█▆▆▇▇▆▇▇▇▇▇▇
seed_123_episode,▁▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▇████
seed_123_reward,▁▂▄▅▄▂▅▆▅▃▄▇▅▆▆▁▇▆▅▄▂▅█▄▅▄▅█▇▃▇▃▄▂▄▃▄▅▇▄
seed_42_episode,▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▇▇▇▇▇███
seed_42_reward,▁▁▁▁▁▁▁▂▁▁▂▂▃▃▄▃▂▃▄▄▂▄▂▃▂▄▃▃█▃▃▃▃▃▃▃▃▃▄▃
seed_456_episode,▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇█
seed_456_reward,▁▁▂▂▅▁▂▁▁▅▁▂▂▁▆▄▂▅▄▅▂▃▃▇▃▃▄▃▃▅▄▇▃▅▃▄█▃█▆
seed_789_episode,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇█████

0,1
average_reward_across_seeds,163.228
seed_101_episode,999.0
seed_101_reward,129.0
seed_123_episode,999.0
seed_123_reward,404.0
seed_42_episode,999.0
seed_42_reward,180.0
seed_456_episode,999.0
seed_456_reward,130.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: p1c93zua with config:
[34m[1mwandb[0m: 	alpha: 0.3493454251791392
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 789
[34m[1mwandb[0m: 	temperature: 1.7197178294488076


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇█
seed_101_reward,▂▃▁▁▂▂▅▂▃▅▂▂▆▃▂▄▄▄▃▂▃▃█▄▄▄▄▃▅▄▃▃▄▄▃▅▃▃▃▄
seed_123_episode,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇████
seed_123_reward,▁▁▁▂▃█▃▆▃▂▃▄▃▂▄▅▃▆▃▄▇▄▃▂▂▅▅██▅▃█▃▄▅▄▇▅▄▄
seed_42_episode,▁▁▁▁▁▁▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
seed_42_reward,▁▁▁▁▁▁▂▂▄▅▃▃▂█▃▁▃▃▃▅▃▄▄▃▄▃▂▂▂▃▄▃▄▄▂▇▆▃▁▁
seed_456_episode,▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇█
seed_456_reward,▂▁▂▂▂▂▁▁▁▂▂▃▃▂▄▃▂▅▁▃▃▅▄▆▄▆▃▅▃█▆▅▆▅▆█▅▄▄▆
seed_789_episode,▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇████

0,1
average_reward_across_seeds,170.156
seed_101_episode,999.0
seed_101_reward,145.0
seed_123_episode,999.0
seed_123_reward,493.0
seed_42_episode,999.0
seed_42_reward,169.0
seed_456_episode,999.0
seed_456_reward,115.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: uhcxbbw8 with config:
[34m[1mwandb[0m: 	alpha: 0.4746949434304494
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 456
[34m[1mwandb[0m: 	temperature: 1.9841778211432148


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇██
seed_101_reward,▂▂▂▄▁▃▄▃▅▄▅▆▅▆▆▅▆▇▆▆▇█▆▇▆▆▇▆▆▆▇▇▆█▇▇▆▃▇█
seed_123_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇██
seed_123_reward,▁▂▁▃▂▅▅▅▆▆▆▆▃▆▇▇█▅▆▆▇▇▆▆▆▇▇▆▇▆▆▇██▇▅▇▇▇█
seed_42_episode,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇█
seed_42_reward,▁▁▃▃▆▂▄▆▅▇▂▇█▄▅█▅▅█▅▅█▇██▆█▇██▅▆█▅▆▆▅▅██
seed_456_episode,▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇█████
seed_456_reward,▁▂▁▁▃▂▄▅▃▃▄▂▃▄▄▇▆▅▅▆▅▅▆▆▇▇▆▅▆▅▇▆▆▆█▇█▆▆▆
seed_789_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇█

0,1
average_reward_across_seeds,200.28
seed_101_episode,999.0
seed_101_reward,213.0
seed_123_episode,999.0
seed_123_reward,269.0
seed_42_episode,999.0
seed_42_reward,273.0
seed_456_episode,999.0
seed_456_reward,98.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: 5g5mcjv8 with config:
[34m[1mwandb[0m: 	alpha: 0.17742515716252436
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 123
[34m[1mwandb[0m: 	temperature: 0.8466098709084813


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇█████
seed_101_reward,▁▁▁▁▃▂▂▂▂▂▂▂▃▃▂▃▃▁▂▂▄▁▃▃▄▄▆▄█▃▃▂▃▂▃▅▄▆▃▄
seed_123_episode,▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇█████
seed_123_reward,▃▇▄▅▁▆▄▃▅▃▄▄▄▇▇▂▃▇▇▃▁▇▃▄▆▃█▆▅▅▆▅▄▃▃▆▅▆▆▄
seed_42_episode,▁▁▁▁▁▂▂▂▂▂▂▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇█
seed_42_reward,▂▂▁▁▁▁▄▁▁▂▃▁▅▁█▁▁▇▁▁▃▂▆▁▅▁▃▅▇▆▁▄▂▄▅▃▁▂▄▆
seed_456_episode,▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇████
seed_456_reward,▁▂▂▂▃▁▃▄▁▆▂▄▂▁▃▂▁▆▃▄▂▂▄▁▁▂▂▁▃▆▄▃▆▃▃▄█▂▂▄
seed_789_episode,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇█

0,1
average_reward_across_seeds,118.404
seed_101_episode,999.0
seed_101_reward,20.0
seed_123_episode,999.0
seed_123_reward,172.0
seed_42_episode,999.0
seed_42_reward,178.0
seed_456_episode,999.0
seed_456_reward,81.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: cjhqtihe with config:
[34m[1mwandb[0m: 	alpha: 0.3619731583428314
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 123
[34m[1mwandb[0m: 	temperature: 0.4774758655597322


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇███
seed_101_reward,▂▂▁▄▃▇▂▂▂▅▅▅▁▂▁▁▁▂▁█▁▁▁▂▂█▁▃▁▅▂▄▂▄▄▅▃▁▂▇
seed_123_episode,▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▅▆▆▆▆▆▇▇▇▇▇████
seed_123_reward,█▄█▄▁▂▂▂▄▄▄▂▅▅▅▆▃▃▅▄▄▁▁▄▄▄▄▆▆▄▃▄▅▄▅▅▄▁▅▆
seed_42_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇▇▇████
seed_42_reward,▂▂▄▆▃▅▅▅█▃▃▇█▅▅▄▇▄▆▆▆▄▄▅▅▆▆▆▄▆▃▅▁▄█▂▇▂▃▄
seed_456_episode,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇█
seed_456_reward,▂█▁▃▁▃▂▂▇▂▅▄▁▁▇▂▇▆▅▄▆▂▄▄▃▅▇▂▂▃▁▃▆▂▁▇▁▃▂▂
seed_789_episode,▁▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇██

0,1
average_reward_across_seeds,50.392
seed_101_episode,999.0
seed_101_reward,37.0
seed_123_episode,999.0
seed_123_reward,98.0
seed_42_episode,999.0
seed_42_reward,67.0
seed_456_episode,999.0
seed_456_reward,192.0
seed_789_episode,999.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 3yt1q14v with config:
[34m[1mwandb[0m: 	alpha: 0.05480955983534928
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 456
[34m[1mwandb[0m: 	temperature: 1.5803053735590715


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇███
seed_101_reward,▁▁▁▁▁▂▂▁▁▁▂▂▂▂▁▁▂▅▇▂▅▃▄▃▆▁▄▃▂▂▃▅▄▆▄▅▄▅█▄
seed_123_episode,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇███
seed_123_reward,▃▁▂▁▂▃▂▂▃▂▂▁▁▁▁▂▁▁▂▂▄▃▁▄▄▃▇▄▇▆▅▄▆█▃▂▃▃▄▃
seed_42_episode,▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▄▄▄▄▄▅▅▅▆▆▆▇▇▇███
seed_42_reward,▂▂▄▁▁▃▂▁▁▂▅▃▃▂█▂▁▂▃▂▃▂▁▃▄▃▆▄▄▅▅▃▄▄▃▃▄▆▇▇
seed_456_episode,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▇▇▇▇▇████
seed_456_reward,▁▁▁▁▁▁▁▁▂▁▃▂▂▃▄▁▃▂▂▃▄▃▂▂▃▃▄▄▃▄▄▆▃▅█▅▄▄▄▆
seed_789_episode,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇█

0,1
average_reward_across_seeds,123.34
seed_101_episode,999.0
seed_101_reward,233.0
seed_123_episode,999.0
seed_123_reward,69.0
seed_42_episode,999.0
seed_42_reward,74.0
seed_456_episode,999.0
seed_456_reward,92.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: czb1o41o with config:
[34m[1mwandb[0m: 	alpha: 0.1421610230719271
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 15
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	temperature: 1.3598303560901457


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇████
seed_101_reward,▁▁▁▁▁▁▁▁▂▂▁▂▁▂▃▄▄▃▃▃▁▃▃▃▄▄▁▆█▅▅▅▅▃▅▅▆▅▇▂
seed_123_episode,▁▁▁▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇██
seed_123_reward,▂▁▁▃▂▁▃▁▄▄▅▁▆▃▁▄▅▃▁▅▂▅▁▂▅▇▅▂▇▅▅█▅▅▅▅▅▆▂▅
seed_42_episode,▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▆▆▆▆▆▇▇▇▇▇█████
seed_42_reward,▁▁▂▁▂▃▁▂▃▂▅▆▃▅▆▄▅▄▄▅▆▄▄█▅▇▄▆▅▄▅▅▅▅▅▅▆▄▄▅
seed_456_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇█
seed_456_reward,▁▁▂▁▁▂▄▂▃▂▄▂▂▃▃▄▃▅▂▃▃▅▁█▄▄▇▅▅▇▅▄▅▇▃▂▇▅▅▆
seed_789_episode,▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇█████

0,1
average_reward_across_seeds,150.644
seed_101_episode,999.0
seed_101_reward,193.0
seed_123_episode,999.0
seed_123_reward,138.0
seed_42_episode,999.0
seed_42_reward,131.0
seed_456_episode,999.0
seed_456_reward,96.0
seed_789_episode,999.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qukqjiz7 with config:
[34m[1mwandb[0m: 	alpha: 0.1787291538126319
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 123
[34m[1mwandb[0m: 	temperature: 1.0103187716528836


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▂▂▂▃▃▃▃▃▄▄▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇███
seed_101_reward,▁▁▂▂▄▅▅▅▆▆▅▆▇▅▇▇▅▆▅▇▆▅▇▆▇▆▇▆▇▇▆▆▇▇▆█▇▇▇█
seed_123_episode,▁▁▁▁▂▂▂▂▂▂▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇██
seed_123_reward,▁▁▄▄▄▅▄▇▅▅█▇▆▆▅▅▅▆▃▇▆▅▄▇▇▅▅▃█▆▅▇▆▇▇▄▆▆▇▆
seed_42_episode,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇█████
seed_42_reward,▂▃▁▁▁▅▁▁▁▂▆█▂▅▁▁▂█▁▆▆▂▆▇▂▂▁▆▆█▆▂▂▂█▂▂▇▂▂
seed_456_episode,▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▇▇████
seed_456_reward,▁▁▁▂▄▁▁▄▆▄▄▃▄▆▆▅▇▅▆▅▅▆█▅▇▆▅▆▆▅▇▆▇▆█▇▇▇▅▇
seed_789_episode,▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇██

0,1
average_reward_across_seeds,105.068
seed_101_episode,999.0
seed_101_reward,110.0
seed_123_episode,999.0
seed_123_reward,92.0
seed_42_episode,999.0
seed_42_reward,48.0
seed_456_episode,999.0
seed_456_reward,100.0
seed_789_episode,999.0


[34m[1mwandb[0m: Agent Starting Run: 1dd40162 with config:
[34m[1mwandb[0m: 	alpha: 0.0584845642617268
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 789
[34m[1mwandb[0m: 	temperature: 0.877312972800229


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇████
seed_101_reward,▆▂▅▃▁▂▆▂▃▂▁▂▃▄▄▅▅▆▅▇▇▄▅▅▆█▃▇▇▃▄▆▄▅▅▅▄▆▄▆
seed_123_episode,▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▄▄▄▅▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆█████
seed_123_reward,▂▁▁█▁▄▂▁▃▂▃▄▂▂▂▂▄▂▃▇▂▂▃▂▃▃▃▄▃▃▃▅▅▄▃▃▄▅▃▄
seed_42_episode,▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇██
seed_42_reward,▁▁▃▄▂▁▄▃▅▁▁▅▆▄▅▆▃▃▇▇▃▆▃▆▁▅▇▇██▇▇▁▂▇▂▇█▂█
seed_456_episode,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇█
seed_456_reward,▃▁▁▁▁▁▁▁▁▂▁▂▂▁▂▂▁▁▂▁▁▂▃▃▃▁▃▂▂▂▅▂▂▂▆▅▂▂▇█
seed_789_episode,▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇████

0,1
average_reward_across_seeds,76.476
seed_101_episode,999.0
seed_101_reward,67.0
seed_123_episode,999.0
seed_123_reward,45.0
seed_42_episode,999.0
seed_42_reward,37.0
seed_456_episode,999.0
seed_456_reward,204.0
seed_789_episode,999.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6ruirnka with config:
[34m[1mwandb[0m: 	alpha: 0.3382421726811943
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	n_bins: 20
[34m[1mwandb[0m: 	n_episodes: 1000
[34m[1mwandb[0m: 	seed: 456
[34m[1mwandb[0m: 	temperature: 1.5236135300583142


0,1
average_reward_across_seeds,▁
seed_101_episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇██
seed_101_reward,▁▁▁▁▅▃▅▅▃▅▅▄▆▅▆▆▆▆▄▆▆▇▆▆▆▆▆▇▆▆▆▇▇▄▆▇█▆▇▆
seed_123_episode,▁▁▁▁▁▁▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇████
seed_123_reward,▄▁▂▃▅▅▇▅▅▅▅▄▄▆▆▄▅▅▆▆▅▇█▆▇▆▇█▆▆▆▇█▇▇▇▆▇▇█
seed_42_episode,▁▁▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
seed_42_reward,▁▄▄▅▅▂▆▅▆▅▆▆▃▆▆▇▆▆▆▅▃▂▇▇▂▃▂▇▇▇▇▃▂█▇▇▇▇▂▂
seed_456_episode,▁▁▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇██████
seed_456_reward,▁▁▄▅▃▃▃▄▃▃▂▂▃▂▄▄▄▄▄▄▅▃▅▆▆▆▄▃▅▆▄▄▆▇█▆▆▃▆▃
seed_789_episode,▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇██

0,1
average_reward_across_seeds,167.928
seed_101_episode,999.0
seed_101_reward,152.0
seed_123_episode,999.0
seed_123_reward,139.0
seed_42_episode,999.0
seed_42_reward,338.0
seed_456_episode,999.0
seed_456_reward,43.0
seed_789_episode,999.0


In [5]:
import numpy as np
import matplotlib.pyplot as plt
import os
import gymnasium as gym

# Function to run SARSA for n_episodes using the existing agent classes
def run_sarsa(env, n_episodes, seeds, alpha, gamma, epsilon, n_bins):
    all_rewards = []

    for seed in seeds:
        np.random.seed(seed)
        env.reset(seed=seed)

        # Initialize agent with the given hyperparameters
        state_discretizer = StateDiscretizer(n_bins=n_bins)
        agent = SARSAAgent(
            state_discretizer=state_discretizer,
            action_space=env.action_space,
            alpha=alpha,
            gamma=gamma,
            epsilon=epsilon
        )

        episode_rewards = []

        for episode in range(n_episodes):
            state, _ = env.reset(seed=seed+episode)
            action = agent.select_action(state)
            done = False
            total_reward = 0

            while not done:
                next_state, reward, terminated, truncated, _ = env.step(action)
                done = terminated or truncated
                total_reward += reward

                next_action = agent.select_action(next_state)
                agent.update(state, action, reward, next_state, next_action, done)

                state = next_state
                action = next_action

            episode_rewards.append(total_reward)

            # Print progress every 1000 episodes
            if (episode + 1) % 1000 == 0:
                print(f"SARSA - Seed {seed}, Episode {episode + 1}/{n_episodes}, Reward: {total_reward}")

        all_rewards.append(episode_rewards)

    return np.array(all_rewards)

# Function to run Q-Learning for n_episodes using the existing agent classes
def run_qlearning(env, n_episodes, seeds, alpha, gamma, temperature, n_bins):
    all_rewards = []

    for seed in seeds:
        np.random.seed(seed)
        env.reset(seed=seed)

        # Initialize agent with the given hyperparameters
        state_discretizer = StateDiscretizer(n_bins=n_bins)
        agent = QLearningAgent(
            state_discretizer=state_discretizer,
            action_space=env.action_space,
            alpha=alpha,
            gamma=gamma,
            temperature=temperature
        )

        episode_rewards = []

        for episode in range(n_episodes):
            state, _ = env.reset(seed=seed+episode)
            done = False
            total_reward = 0

            while not done:
                action = agent.select_action(state)
                next_state, reward, terminated, truncated, _ = env.step(action)
                done = terminated or truncated
                total_reward += reward

                agent.update(state, action, reward, next_state, done)

                state = next_state

            episode_rewards.append(total_reward)

            # Print progress every 1000 episodes
            if (episode + 1) % 1000 == 0:
                print(f"Q-Learning - Seed {seed}, Episode {episode + 1}/{n_episodes}, Reward: {total_reward}")

        all_rewards.append(episode_rewards)

    return np.array(all_rewards)

# Functions for plotting
def plot_results(rewards, title, filename):
    mean_rewards = np.mean(rewards, axis=0)
    std_rewards = np.std(rewards, axis=0)
    episodes = np.arange(1, len(mean_rewards) + 1)

    plt.figure(figsize=(10, 6))
    plt.plot(episodes, mean_rewards, label='Mean Reward')
    plt.fill_between(episodes, mean_rewards - std_rewards, mean_rewards + std_rewards, alpha=0.3, label='Standard Deviation')

    plt.title(title)
    plt.xlabel('Episode')
    plt.ylabel('Reward')
    plt.legend()
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.savefig(filename)
    plt.close()

def plot_comparative_results(sarsa_rewards, qlearning_rewards, filename):
    sarsa_mean = np.mean(sarsa_rewards, axis=0)
    sarsa_std = np.std(sarsa_rewards, axis=0)
    qlearning_mean = np.mean(qlearning_rewards, axis=0)
    qlearning_std = np.std(qlearning_rewards, axis=0)
    episodes = np.arange(1, len(sarsa_mean) + 1)

    plt.figure(figsize=(12, 7))
    plt.plot(episodes, sarsa_mean, label='SARSA Mean', color='blue')
    plt.fill_between(episodes, sarsa_mean - sarsa_std, sarsa_mean + sarsa_std, alpha=0.2, color='blue')
    plt.plot(episodes, qlearning_mean, label='Q-Learning Mean', color='red')
    plt.fill_between(episodes, qlearning_mean - qlearning_std, qlearning_mean + qlearning_std, alpha=0.2, color='red')

    plt.title('SARSA vs Q-Learning in CartPole-v1')
    plt.xlabel('Episode')
    plt.ylabel('Reward')
    plt.legend()
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.savefig(filename)
    plt.close()

# Main function
def main():
    # Create directory for results
    os.makedirs('results', exist_ok=True)

    # Initialize environment
    env = gym.make("CartPole-v1")

    # Set parameters
    n_episodes = 10000
    seeds = [42, 123, 456, 789, 101]  # 5 random seeds

    # Set best hyperparameters from wandb sweeps
    sarsa_params = {
        'alpha': 0.38074,  # Replace with your best value
        'gamma': 0.99,
        'epsilon': 0.15554,  # Replace with your best value
        'n_bins': 10 # Replace with your best value
    }

    qlearning_params = {
        'alpha': 0.2363,  # Replace with best value
        'gamma': 0.99,
        'temperature': 1.89753,  # Replace with best value
        'n_bins': 20  # Replace with best value
    }

    print("Starting SARSA training...")
    sarsa_rewards = run_sarsa(env, n_episodes, seeds, **sarsa_params)
    np.save("results/sarsa_rewards.npy", sarsa_rewards)

    print("Starting Q-Learning training...")
    qlearning_rewards = run_qlearning(env, n_episodes, seeds, **qlearning_params)
    np.save("results/qlearning_rewards.npy", qlearning_rewards)

    # Generate plots
    print("Generating plots...")
    plot_results(sarsa_rewards, 'SARSA in CartPole-v1', 'results/sarsa_plot.png')
    plot_results(qlearning_rewards, 'Q-Learning in CartPole-v1', 'results/qlearning_plot.png')
    plot_comparative_results(sarsa_rewards, qlearning_rewards, 'results/comparative_plot.png')

    print("Done! Results saved in the 'results' directory.")

if __name__ == "__main__":
    main()


Starting SARSA training...
SARSA - Seed 42, Episode 1000/10000, Reward: 27.0
SARSA - Seed 42, Episode 2000/10000, Reward: 114.0
SARSA - Seed 42, Episode 3000/10000, Reward: 174.0
SARSA - Seed 42, Episode 4000/10000, Reward: 185.0
SARSA - Seed 42, Episode 5000/10000, Reward: 13.0
SARSA - Seed 42, Episode 6000/10000, Reward: 47.0
SARSA - Seed 42, Episode 7000/10000, Reward: 182.0
SARSA - Seed 42, Episode 8000/10000, Reward: 121.0
SARSA - Seed 42, Episode 9000/10000, Reward: 184.0
SARSA - Seed 42, Episode 10000/10000, Reward: 208.0
SARSA - Seed 123, Episode 1000/10000, Reward: 174.0
SARSA - Seed 123, Episode 2000/10000, Reward: 119.0
SARSA - Seed 123, Episode 3000/10000, Reward: 45.0
SARSA - Seed 123, Episode 4000/10000, Reward: 112.0
SARSA - Seed 123, Episode 5000/10000, Reward: 66.0
SARSA - Seed 123, Episode 6000/10000, Reward: 225.0
SARSA - Seed 123, Episode 7000/10000, Reward: 261.0
SARSA - Seed 123, Episode 8000/10000, Reward: 203.0
SARSA - Seed 123, Episode 9000/10000, Reward: 170.0