In [None]:
import numpy as np
import gymnasium as gym
from gymnasium import spaces
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

import environments.mountain_car as mc
from useful import trees
from agents import dqn_agent
from agents import ddqn_agent
from agents import pddqn_agent

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# Models

In [None]:
class mountain_car_model_1(nn.Module): 
    def __init__(self, n_observations, n_actions):
        super(mountain_car_model_1, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(n_observations, 256),
            nn.ReLU(),
            nn.Linear(256, n_actions)
        )

    def forward(self, x):
        return self.model(x)

In [None]:
class dueling_dqn(nn.Module):
    def __init__(self, n_observations, n_actions):
        super(dueling_dqn, self).__init__()
        self.n_observations = n_observations
        
        self.model = nn.Sequential(
            nn.Linear(n_observations, 256),
            nn.ReLU()
        )

        self.state_value = nn.Sequential(
            nn.Linear(256, 1)
        )
        
        self.advantage = nn.Sequential(
            nn.Linear(256, n_actions)
        )
        
    def forward(self, x):
        if x.shape == torch.Size([self.n_observations]):
            x = x.unsqueeze(0)
        
        x = self.model(x)
        value = self.state_value(x)
        advantage = self.advantage(x)
        q_value = value + (advantage - torch.mean(advantage, dim = 1, keepdim = True))
        
        return q_value

# Functions

In [None]:
def testing(env, agent, n_episodes, file_name, global_episode, render):
    """
    Testing agents for the mountain car environment
    -----
    Input
    -----
    env: gym environment
    agent: training agent
    n_episodes: an integer for the number of episodes to run
    file_name: a string name of the model to import
    global_episode: an integer for the current episode in training
    render: a boolean if the episode should be printed
    ------
    Output
    ------
    reward_list: mean reward for all validation episodes
    """
    agent.main_model.load_state_dict(torch.load(f"model_weights/mountain_car/{file_name}.pth"))
    agent.main_model.eval()
    reward_list = []

    for episode in range(n_episodes):
        state, _ = env.reset()
        total_reward = 0

        for step in range(env.max_steps):
            action = agent.act(state)
            next_state, reward, termination, truncation, _ = env.step(action)

            if (global_episode + 1) % 100 == 0 and render and render_count == 0:
                print(f"Episode: {global_episode + 1} | Reward: {reward:.2f}")
                env.render()

            state = next_state
            total_reward += reward
            if termination or truncation:
                break

        reward_list.append(total_reward)

    return np.mean(reward_list)

In [None]:
def training(env, agent, v_agent, n_episodes, file_name, target_update_steps, version, render = False, optimal = False):
    """
    Training agents for the mountain car environment. Provides the mean positions as a graph with the mean rewards. 
    -----
    Input
    -----
    env: gym environment
    agent: training agent
    v_agent: validation agent
    n_episodes: number of episodes to run
    file_name: file name for saving the model 
    target_update_steps: how many steps to update the target model
    version: what is being tested
    render: if the episode should be printed
    optimal: boolean to take the best possible validation reward
    ------
    output
    ------
    validation_rewards: a list of average rewards per validation batch
    """
    reward_list, validation_rewards, global_steps = [], [], 0
    
    for episode in range(n_episodes):
        state, _ = env.reset()
        steps_loss, position_list, total_reward = [], [], 0

        for steps in range(env.max_steps):
            global_steps += 1
            if global_steps % target_update_steps == 0:
                agent.update_target()
            
            action = agent.act(state)
            next_state, reward, termination, truncation, _ = env.step(action)
            agent.update_memory(state, action, reward, next_state, termination)
            
            loss = agent.train_step()
            agent.decay_epsilon()
            state = next_state
            total_reward += reward

            if termination or truncation:
                break

        reward_list.append(total_reward)
        torch.save(agent.main_model.state_dict(), f"model_weights/mountain_car/{file_name}.pth")

        if (episode + 1) % 50 == 0:
            validation_reward = testing(env, v_agent, 100, file_name, episode, render)
            print(f"Episode: {episode - 48} - {episode + 1} | Average reward: {validation_reward:.2f}")
            validation_rewards.append(validation_reward)
            if validation_reward > -105.0 and optimal:
                return reward_list, validation_rewards

    plot_training(reward_list, version)
                
    return validation_rewards

In [None]:
def plot_training(rewards, version):
    """
    Plots training episodes
    -----
    Input
    -----
    rewards: a list of rewards
    version: a string for the title
    """
    x = [i for i in range(1, len(rewards) + 1)] 

    fig, axes = plt.subplots(figsize = (20, 3.333))

    axes.scatter(x, rewards, s = 10)
    axes.set_title(f"Learning Progress | {version}")
    axes.set_ylabel("Total Reward")
    axes.set_xlabel("Training Episodes")
    axes.grid()

    plt.tight_layout()
    plt.show()

In [None]:
def plot_validation(v_rewards_list, labels, version, size = (4.5, 3)):
    """
    Plots validation rewards with shown standard deviation
    -----
    Input
    -----
    v_rewards: a list of arrays of different training runs
    legends: a list of strings for each label
    title: a string for the title
    size: a tuple for the dimensions of the graph
    """
    x = [(i + 1)*50 for i in range(len(v_rewards_list[0][0]))]
    fig, axes = plt.subplots(figsize = (size[0], size[1]))

    for validation, legend in zip(v_rewards_list, labels):
        mean, std = np.mean(validation, axis = 0), np.std(validation, axis = 0)
        axes.plot(x, mean, label = legend)
        axes.fill_between(x, mean - std, mean + std, alpha = 0.25)
        
    axes.set_title(f"{version}")
    axes.set_ylabel("Average Total Reward")
    axes.set_xlabel("Episodes")
    axes.grid()
    axes.legend(fontsize = 8)
    axes.set_ylim(-205)

    plt.tight_layout()
    plt.show()

In [None]:
def best_curve(v_rewards_list, labels):
    """
    Finds the curve closest to the mean
    -----
    Input
    -----
    v_rewards_list: a list of arrays of different training runs
    labels: a list of strings for each label
    """
    for validation in v_rewards_list:
        global_mean = np.mean(validation, axis = 0) 
        for i in range(len(validation)):
            print(f"{np.mean((validation[i] - global_mean)**2, axis = 0):.2f} | {labels[i]}")

# Algorithms

In [None]:
global_dqn = []
file_name = ["algorithms/dqn/mse_1", "algorithms/dqn/mse_2", "algorithms/dqn/mse_3", "algorithms/dqn/mse_4", "algorithms/dqn/mse_5"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()
    
    v_agent = dqn_agent.dqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                      epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = dqn_agent.dqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                    epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 20000, batch_size = 128, device = device)

    dqn_mse_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                 target_update_steps = 1000, version = "DQN")

    global_dqn.append(dqn_mse_v_rewards)

In [None]:
global_ddqn = []
file_name = ["algorithms/ddqn/mse_1", "algorithms/ddqn/mse_2", "algorithms/ddqn/mse_3", "algorithms/ddqn/mse_4", "algorithms/ddqn/mse_5"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()
    
    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 20000, batch_size = 128, device = device)

    ddqn_mse_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                  target_update_steps = 1000, version = "DDQN")

    global_ddqn.append(ddqn_mse_v_rewards)

In [None]:
global_pddqn = []
file_name = ["algorithms/pddqn/mse_1", "algorithms/pddqn/mse_2", "algorithms/pddqn/mse_3", "algorithms/pddqn/mse_4", "algorithms/pddqn/mse_5"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()
    
    v_agent = pddqn_agent.pddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, alpha = 0, 
                                          beta = 0, lr = 0, epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 1, batch_size = 0, 
                                          n_episodes = 1, device = device)

    agent = pddqn_agent.pddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, alpha = 0.7,
                                        beta = 0.5, lr = 0.0005, epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 20000, 
                                        batch_size = 128, n_episodes = 1500, device = device)

    pddqn_mse_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                   target_update_steps = 1000, version = "PDDQN")

    global_pddqn.append(pddqn_mse_v_rewards)

In [None]:
global_dddqn = []
file_name = ["algorithms/dddqn/mse_1", "algorithms/ddqn/mse_2", "algorithms/dddqn/mse_3", "algorithms/dddqn/mse_4", "algorithms/dddqn/mse_5"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()
    
    v_agent = ddqn_agent.ddqn_agent_mse(model = dueling_dqn, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = dueling_dqn, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 20000, batch_size = 128, device = device)

    dddqn_mse_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                   target_update_steps = 1000, version = "DDDQN")

    global_dddqn.append(dddqn_mse_v_rewards)

In [None]:
v_rewards_list = [global_dqn, global_ddqn, global_pddqn, global_dddqn]
labels = ["DQN", "DDQN", "PER", "Duelling"]
version = "Algorithms"

plot_validation(v_rewards_list = v_rewards_list, labels = labels, version = version)

# Parameter Evaluation for Mountain Car Version 1
### MSE VS Huber Loss

In [None]:
global_mse = []
file_name = ["parameters/loss/mse_1", "parameters/loss/mse_2", "parameters/loss/mse_3", "parameters/loss/mse_4", "parameters/loss/mse_5"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()
    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 20000, batch_size = 128, device = device)

    mse_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                             target_update_steps = 1000, version = "MSE")

    global_mse.append(mse_v_rewards)

In [None]:
global_huber = []
file_name = ["parameters/loss/huber_1", "parameters/loss/huber_2", "parameters/loss/huber_3", "parameters/loss/huber_4", "parameters/loss/huber_5"]
for i in range(5):
    env = mc.mountain_car_discrete_v1() 

    v_agent = ddqn_agent.ddqn_agent_huber(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                          epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0,batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_huber(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                        epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 20000, batch_size = 128, device = device)

    huber_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                               target_update_steps = 1000, version = "Huber")

    global_huber.append(huber_v_rewards)

In [None]:
v_rewards_list = [global_mse, global_huber]
labels = ["MSE", "Huber Loss"]
version = "Loss Functions"

plot_validation(v_rewards_list = v_rewards_list, labels = labels, version = version)

## Learning Rate

In [None]:
global_lr_0001 = []
file_name = ["parameters/lr/0001", "parameters/lr/0001", "parameters/lr/0001", "parameters/lr/0001", "parameters/lr/0001"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 20000, batch_size = 128, device = device)

    lr_0001_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                 target_update_steps = 1000, version = "0.001")

    global_lr_0001.append(lr_0001_v_rewards)

In [None]:
global_lr_000075 = []
file_name = ["parameters/lr/000075", "parameters/lr/000075", "parameters/lr/000075", "parameters/lr/000075", "parameters/lr/000075"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.00075,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 20000, batch_size = 128, device = device)

    lr_000075_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                   target_update_steps = 1000, version = "0.00075")

    global_lr_000075.append(lr_000075_v_rewards)

In [None]:
global_lr_00005 = []
file_name = ["parameters/lr/00005", "parameters/lr/00005", "parameters/lr/00005", "parameters/lr/00005", "parameters/lr/00005"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 20000, batch_size = 128, device = device)

    lr_00005_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                  target_update_steps = 1000, version = "0.0005")

    global_lr_00005.append(lr_00005_v_rewards)

In [None]:
global_lr_000025 = []
file_name = ["parameters/lr/000025", "parameters/lr/000025", "parameters/lr/000025", "parameters/lr/000025", "parameters/lr/000025"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.00025,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 20000, batch_size = 128, device = device)

    lr_000025_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                   target_update_steps = 1000, version = "0.00025")

    global_lr_000025.append(lr_000025_v_rewards)

In [None]:
global_lr_00001 = []
file_name = ["parameters/lr/00001", "parameters/lr/00001", "parameters/lr/00001", "parameters/lr/00001", "parameters/lr/00001"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 20000, batch_size = 128, device = device)

    lr_00001_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                  target_update_steps = 1000, version = "0.0001")

    global_lr_00001.append(lr_00001_v_rewards)

In [None]:
v_rewards_list = [global_lr_0001, global_lr_000075, global_lr_00005, global_lr_000025, global_lr_00001]
labels = ["0.001", "0.00075", "0.0005", "0.00025", "0.0001"]
version = "Learning Rates"

plot_validation(v_rewards_list = v_rewards_list, labels = labels, version = version)

## Epsilon Minimum

In [None]:
global_em_01 = []
file_name = ["parameters/em/01", "parameters/em/01", "parameters/em/01", "parameters/em/01", "parameters/em/01"]
for i in range(5):
    env = mc.mountain_car_discrete_v1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0,batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.1, decay_steps = 20000, buffer_size = 20000, batch_size = 128, device = device)

    em_01_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                               target_update_steps = 1000, version = "0.1")

    global_em_01.append(em_01_v_rewards)

In [None]:
global_em_005 = []
file_name = ["parameters/em/005", "parameters/em/005", "parameters/em/005", "parameters/em/005", "parameters/em/005"]
for i in range(5):
    env = mc.mountain_car_discrete_v1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0,batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.05, decay_steps = 20000, buffer_size = 20000, batch_size = 128, device = device)

    em_005_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                target_update_steps = 1000, version = "0.05")

    global_em_005.append(em_005_v_rewards)

In [None]:
global_em_001 = []
file_name = ["parameters/em/001", "parameters/em/001", "parameters/em/001", "parameters/em/001", "parameters/em/001"]
for i in range(5):
    env = mc.mountain_car_discrete_v1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0,batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 20000, batch_size = 128, device = device)

    em_001_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                target_update_steps = 1000, version = "0.01")

    global_em_001.append(em_001_v_rewards)

In [None]:
global_em_0001 = []
file_name = ["parameters/em/0001", "parameters/em/0001", "parameters/em/0001", "parameters/em/0001", "parameters/em/0001"]
for i in range(5):
    env = mc.mountain_car_discrete_v1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0,batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.001, decay_steps = 20000, buffer_size = 20000, batch_size = 128, device = device)

    em_0001_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                 target_update_steps = 1000, version = "0.001")

    global_em_0001.append(em_0001_v_rewards)

In [None]:
v_rewards_list = [global_em_01, global_em_005, global_em_001, global_em_0001]
labels = ["0.1", "0.05", "0.01", "0.001"]
version = "Epsilon Minimum"

plot_validation(v_rewards_list = v_rewards_list, labels = labels, version = version)

## Decay Steps

In [None]:
global_ds_100000 = []
file_name = ["parameters/ds/100000", "parameters/ds/100000", "parameters/ds/100000", "parameters/ds/100000", "parameters/ds/100000"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0,batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 100000, buffer_size = 20000, batch_size = 128, device = device)

    ds_100000_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                   target_update_steps = 1000, version = "100000")

    global_ds_100000.append(ds_100000_v_rewards)

In [None]:
global_ds_50000 = []
file_name = ["parameters/ds/50000", "parameters/ds/50000", "parameters/ds/50000", "parameters/ds/50000", "parameters/ds/50000"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0,batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 50000, buffer_size = 20000, batch_size = 128, device = device)

    ds_50000_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                  target_update_steps = 1000, version = "50000")

    global_ds_50000.append(ds_50000_v_rewards)

In [None]:
global_ds_20000 = []
file_name = ["parameters/ds/20000", "parameters/ds/20000", "parameters/ds/20000", "parameters/ds/20000", "parameters/ds/20000"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0,batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 20000, batch_size = 128, device = device)

    ds_20000_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                  target_update_steps = 1000, version = "20000")

    global_ds_20000.append(ds_20000_v_rewards)

In [None]:
global_ds_10000 = []
file_name = ["parameters/ds/10000", "parameters/ds/10000", "parameters/ds/10000", "parameters/ds/10000", "parameters/ds/10000"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0,batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 10000, buffer_size = 20000, batch_size = 128, device = device)

    ds_10000_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                  target_update_steps = 1000, version = "10000")

    global_ds_10000.append(ds_10000_v_rewards)

In [None]:
global_ds_1000 = []
file_name = ["parameters/ds/1000", "parameters/ds/1000", "parameters/ds/1000", "parameters/ds/1000", "parameters/ds/1000"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0,batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 1000, buffer_size = 20000, batch_size = 128, device = device)

    ds_1000_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                  target_update_steps = 1000, version = "1000")

    global_ds_1000.append(ds_1000_v_rewards)

In [None]:
v_rewards_list = [global_ds_100000, global_ds_50000, global_ds_20000, global_ds_10000, global_ds_1000]
labels = ["100000", "50000", "20000", "10000", "1000"]
version = "Decay Steps"

plot_validation(v_rewards_list = v_rewards_list, labels = labels, version = version)

## Memory Size

In [None]:
global_buffer_100000 = []
file_name = ["parameters/buffer/100000", "parameters/buffer/100000", "parameters/buffer/100000", "parameters/buffer/100000", "parameters/buffer/100000"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0,batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 100000, batch_size = 128, device = device)

    buffer_100000_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                       target_update_steps = 1000, version = "100000")

    global_buffer_100000.append(buffer_100000_v_rewards)

In [None]:
global_buffer_50000 = []
file_name = ["parameters/buffer/50000", "parameters/buffer/50000", "parameters/buffer/50000", "parameters/buffer/50000", "parameters/buffer/50000"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0,batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 50000, batch_size = 128, device = device)

    buffer_50000_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                      target_update_steps = 1000, version = "50000")

    global_buffer_50000.append(buffer_50000_v_rewards)

In [None]:
global_buffer_20000 = []
file_name = ["parameters/buffer/20000", "parameters/buffer/20000", "parameters/buffer/20000", "parameters/buffer/20000", "parameters/buffer/20000"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0,batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 20000, batch_size = 128, device = device)

    buffer_20000_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                      target_update_steps = 1000, version = "20000")

    global_buffer_20000.append(buffer_20000_v_rewards)

In [None]:
global_buffer_10000 = []
file_name = ["parameters/buffer/10000", "parameters/buffer/10000", "parameters/buffer/10000", "parameters/buffer/10000", "parameters/buffer/10000"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0,batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 10000, batch_size = 128, device = device)

    buffer_10000_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                      target_update_steps = 1000, version = "10000")

    global_buffer_10000.append(buffer_10000_v_rewards)

In [None]:
global_buffer_1000 = []
file_name = ["parameters/buffer/1000", "parameters/buffer/1000", "parameters/buffer/1000", "parameters/buffer/1000", "parameters/buffer/1000"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0,batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 1000, batch_size = 128, device = device)

    buffer_1000_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                      target_update_steps = 1000, version = "1000")

    global_buffer_1000.append(buffer_1000_v_rewards)

In [None]:
v_rewards_list = [global_buffer_100000, global_buffer_50000, global_buffer_20000, global_buffer_10000, global_buffer_1000]
labels = ["100000", "50000", "20000", "10000", "1000"]
version = "Memory Size"

plot_validation(v_rewards_list = v_rewards_list, labels = labels, version = version)

## Batch Size

In [None]:
global_batch_512 = []
file_name = ["parameters/batch/512", "parameters/batch/512", "parameters/batch/512", "parameters/batch/512", "parameters/batch/512"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0,batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 20000, batch_size = 512, device = device)

    batch_512_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                   target_update_steps = 1000, version = "512")

    global_batch_512.append(batch_512_v_rewards)

In [None]:
global_batch_256 = []
file_name = ["parameters/batch/256", "parameters/batch/256", "parameters/batch/256", "parameters/batch/256", "parameters/batch/256"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0,batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 20000, batch_size = 256, device = device)

    batch_256_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                   target_update_steps = 1000, version = "256")

    global_batch_256.append(batch_256_v_rewards)

In [None]:
global_batch_128 = []
file_name = ["parameters/batch/128", "parameters/batch/128", "parameters/batch/128", "parameters/batch/128", "parameters/batch/128"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0,batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 20000, batch_size = 128, device = device)

    batch_128_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                   target_update_steps = 1000, version = "128")

    global_batch_128.append(batch_128_v_rewards)

In [None]:
global_batch_64 = []
file_name = ["parameters/batch/64", "parameters/batch/64", "parameters/batch/64", "parameters/batch/64", "parameters/batch/64"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0,batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 20000, batch_size = 64, device = device)

    batch_64_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                  target_update_steps = 1000, version = "64")

    global_batch_64.append(batch_64_v_rewards)

In [None]:
global_batch_32 = []
file_name = ["parameters/batch/32", "parameters/batch/32", "parameters/batch/32", "parameters/batch/32", "parameters/batch/32"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0,batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 20000, batch_size = 32, device = device)

    batch_32_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                  target_update_steps = 1000, version = "32")

    global_batch_32.append(batch_32_v_rewards)

In [None]:
v_rewards_list = [global_batch_512, global_batch_256, global_batch_128, global_batch_64, global_batch_32]
labels = ["512", "256", "128", "64", "32"]
version = "Batch Size"

plot_validation(v_rewards_list = v_rewards_list, labels = labels, version = version)

# Update Target

In [None]:
global_ut_10000 = []
file_name = ["parameters/ut/10000", "parameters/ut/10000", "parameters/ut/10000", "parameters/ut/10000", "parameters/ut/10000"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0,batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 20000, batch_size = 128, device = device)

    ut_10000_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                  target_update_steps = 10000, version = "10000")

    global_ut_10000.append(ut_10000_v_rewards)

In [None]:
global_ut_5000 = []
file_name = ["parameters/ut/5000", "parameters/ut/5000", "parameters/ut/5000", "parameters/ut/5000", "parameters/ut/5000"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0,batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 20000, batch_size = 128, device = device)

    ut_5000_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                 target_update_steps = 5000, version = "5000")

    global_ut_5000.append(ut_5000_v_rewards)

In [None]:
global_ut_1000 = []
file_name = ["parameters/ut/1000", "parameters/ut/1000", "parameters/ut/1000", "parameters/ut/1000", "parameters/ut/1000"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0,batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 20000, batch_size = 128, device = device)

    ut_1000_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                 target_update_steps = 1000, version = "1000")

    global_ut_1000.append(ut_1000_v_rewards)

In [None]:
global_ut_500 = []
file_name = ["parameters/ut/500", "parameters/ut/500", "parameters/ut/500", "parameters/ut/500", "parameters/ut/500"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0,batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 20000, batch_size = 128, device = device)

    ut_500_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                target_update_steps = 500, version = "500")

    global_ut_500.append(ut_500_v_rewards)

In [None]:
global_ut_100 = []
file_name = ["parameters/ut/100", "parameters/ut/100", "parameters/ut/100", "parameters/ut/100", "parameters/ut/100"]
for i in range(5):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0,
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0,batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 20000, buffer_size = 20000, batch_size = 128, device = device)

    ut_100_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                target_update_steps = 100, version = "100")

    global_ut_100.append(ut_100_v_rewards)

In [None]:
v_rewards_list = [global_ut_10000, global_ut_5000, global_ut_1000, global_ut_500, global_ut_100]
labels = ["10000", "5000", "1000", "500", "100"]
version = "Target Network Update Frequency"

plot_validation(v_rewards_list = v_rewards_list, labels = labels, version = version)

# Training

In [None]:
global_train_v1 = []
file_name = ["mcv1_1", "mcv1_2", "mcv1_3", "mcv1_4", "mcv1_5", "mcv1_6", "mcv1_7", "mcv1_8", "mcv1_9", "mcv1_10"]
for i in range(10):
    env = mc.mountain_car_discrete_v1()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.05, decay_steps = 50000, buffer_size = 100000, batch_size = 512, device = device)

    trainv1_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                 target_update_steps = 1000, version = " ")
    
    global_train_v1.append(trainv1_v_rewards)

In [None]:
v_rewards_list = [global_train_v1]
labels = ["MC.V1"]
version = "Mountain Car Version 1"

plot_validation(v_rewards_list = v_rewards_list, labels = labels, version = version)

In [None]:
global_train_v2 = []
file_name = ["mcv2_1", "mcv2_2", "mcv2_3", "mcv2_4", "mcv2_5", "mcv2_6", "mcv2_7", "mcv2_8", "mcv2_9", "mcv2_10"]
for i in range(10):
    env = mc.mountain_car_discrete_v2()

    v_agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = mountain_car_model_1, state_dim = 2, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.05, decay_steps = 50000, buffer_size = 100000, batch_size = 512, device = device)

    trainv2_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1350, file_name = file_name[i], 
                                 target_update_steps = 1000, version = " ")

    global_train_v2.append(trainv2_v_rewards)

In [None]:
v_rewards_list = [global_train_v2]
labels = ["MC.V2"]
version = "Mountain Car Version 2"

plot_validation(v_rewards_list = v_rewards_list, labels = labels, version = version)

In [None]:
v_rewards_list = [global_train_v1]
labels = ["mcv1_1", "mcv1_2", "mcv1_3", "mcv1_4", "mcv1_5", "mcv1_6", "mcv1_7", "mcv1_8", "mcv1_9", "mcv1_10"]

best_curve(v_rewards_list = v_rewards_list, labels = labels)

In [None]:
v_rewards_list = [global_train_v2]
labels = ["mcv2_1", "mcv2_2", "mcv2_3", "mcv2_4", "mcv2_5", "mcv2_6", "mcv2_7", "mcv2_8", "mcv2_9", "mcv2_10"]

best_curve(v_rewards_list = v_rewards_list, labels = labels)