In [None]:
import numpy as np
import gymnasium as gym
from gymnasium import spaces
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

import environments.house_temp as ht
from useful import trees
from agents import dqn_agent
from agents import ddqn_agent
from agents import pddqn_agent

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# Models

In [None]:
class house_temp_model_3(nn.Module):
    def __init__(self, n_observations, n_actions):
        super(house_temp_model_3, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(n_observations, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, n_actions)
        )

    def forward(self, x):
        return self.model(x)

In [None]:
class duelling_dqn_htm3(nn.Module):
    def __init__(self, n_observations, n_actions):
        super(duelling_dqn_htm3, self).__init__()
        self.n_observations = n_observations
        
        self.model = nn.Sequential(
            nn.Linear(n_observations, 128),
            nn.ReLU()
        )

        self.state_value = nn.Sequential(
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 1)
        )
        
        self.advantage = nn.Sequential(
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, n_actions)
        )
        
    def forward(self, x):
        if x.shape == torch.Size([self.n_observations]):
            x = x.unsqueeze(0)
        
        x = self.model(x)
        value = self.state_value(x)
        advantage = self.advantage(x)
        q_value = value + (advantage - torch.mean(advantage, dim = 1, keepdim = True))
        
        return q_value

In [None]:
class smaller_model_3(nn.Module):
    def __init__(self, n_observations, n_actions):
        super(smaller_model_3, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(n_observations, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, n_actions)
        )

    def forward(self, x):
        return self.model(x)

In [None]:
class smaller_model_duelling(nn.Module):
    def __init__(self, n_observations, n_actions):
        super(smaller_model_duelling, self).__init__()
        self.n_observations = n_observations
        
        self.model = nn.Sequential(
            nn.Linear(n_observations, 64),
            nn.ReLU()
        )

        self.state_value = nn.Sequential(
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )
        
        self.advantage = nn.Sequential(
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, n_actions)
        )
        
    def forward(self, x):
        if x.shape == torch.Size([self.n_observations]):
            x = x.unsqueeze(0)
        
        x = self.model(x)
        value = self.state_value(x)
        advantage = self.advantage(x)
        q_value = value + (advantage - torch.mean(advantage, dim = 1, keepdim = True))
        
        return q_value

# Functions

In [None]:
def testing(env, agent, n_episodes, file_name, global_episode, render):
    """
    Testing agents for the house temperature environment 
    -----
    Input
    -----
    env: gym environment
    agent: training agent
    n_episodes: number of episodes to run
    file_name: file name for saving the model 
    global_episode: the current episode in training
    render: if the episode should be printed
    ------
    Output
    ------
    reward_list: mean reward for all validation episodes
    """
    agent.main_model.load_state_dict(torch.load(f"model_weights/house_temp/{file_name}.pth"))
    agent.main_model.eval()
    reward_list, render_count = [], 0

    for episode in range(n_episodes):
        state, _ = env.reset()
        total_reward = 0
        
        for steps in range(env.max_steps):            
            action = agent.act(state)
            next_state, reward, termination, truncation, _ = env.step(action)
            state = next_state
            total_reward += reward

            if (global_episode + 1) % 100 == 0 and render and render_count == 0:
                print(f"Episode: {global_episode + 1} | Reward: {reward:.2f}")
                env.render()
            
            if termination or truncation:
                break

        render_count = 1
        reward_list.append(total_reward)

    return np.mean(reward_list)

In [None]:
def training(env, agent, v_agent, n_episodes, file_name, target_update_steps, title, render = False):
    """
    Training agents for the house temperature environment
    -----
    Input
    -----
    env: gym environment
    agent: training agent
    v_agent: validation agent
    n_episodes: number of episodes to run
    file_name: a list of file names for saving the model 
    target_update_steps: how many steps to update the target model
    title: name of the graph
    render: boolean that controls if the states should be shown
    ------
    Output
    ------
    validation_rewards: a list of average rewards per set of validation episodes
    """
    reward_list, validation_rewards, global_steps = [], [], 0
    for episode in range(n_episodes):
        state, _ = env.reset()
        total_reward = 0
        
        for steps in range(env.max_steps):
            global_steps += 1
            if global_steps % target_update_steps == 0:
                agent.update_target()
            
            action = agent.act(state)
            next_state, reward, termination, truncation, _ = env.step(action)
            agent.update_memory(state, action, reward, next_state, termination)
        
            loss = agent.train_step()
            agent.decay_epsilon()
            state = next_state
            total_reward += reward
            
            if termination or truncation:
                break
    
        reward_list.append(total_reward)
        torch.save(agent.main_model.state_dict(), f"model_weights/house_temp/{file_name}.pth")
        
        if (episode + 1) % 50 == 0:
            validation_reward = testing(env, v_agent, 100, file_name, episode, render)
            print(f"Episode {episode - 48} - {episode + 1} | Average Reward: {validation_reward:.2f}")
            validation_rewards.append(validation_reward)

    plot_training(reward_list, title, n_episodes)

    return validation_rewards

In [None]:
def plot_training(rewards, title, episodes):
    """
    Plots training episodes
    -----
    Input
    -----
    rewards: a list of rewards
    title: a string for the title
    episodes: integer value for the total episodes
    """
    x = [i for i in range(episodes)] 

    fig, axes = plt.subplots(figsize = (20, 3.333))

    axes.scatter(x, rewards, s = 10)
    axes.set_title(f"Learning Progress | {title}")
    axes.set_ylabel("Training Reward")
    axes.set_xlabel("Episodes")
    axes.grid(alpha = 0.25)

    plt.tight_layout()
    plt.show()

In [None]:
def plot_validation(v_rewards, legends, title, size = (4.5, 3)):
    """
    Plots validation rewards with shown standard deviation
    -----
    Input
    -----
    v_rewards: a list of arrays of different training runs
    legends: a list of strings for each label
    title: a string for the title
    size: a tuple for the dimensions of the graph
    """
    x = [(i+1)*50 for i in range(len(v_rewards[0][0]))] 
    fig, axes = plt.subplots(figsize = (size[0], size[1]))

    for validation, legend in zip(v_rewards, legends):
        mean, std = np.mean(validation, axis = 0), np.std(validation, axis = 0)
        axes.plot(x, mean, label = legend)
        axes.fill_between(x, mean - std, mean + std, alpha = 0.25)

    axes.set_title(f"{title}")
    axes.set_ylabel("Average Validation Reward")
    axes.set_xlabel("Episodes")
    axes.legend(fontsize = 8)
    axes.grid(alpha = 0.25)
    axes.set_ylim(-300, 0)

    plt.tight_layout()
    plt.savefig("_ht.png")
    plt.show()

In [None]:
def best_curve(v_rewards_list, labels):
    """
    Finds the curve closest to the mean
    -----
    Input
    -----
    v_rewards_list: a list of arrays of different training runs
    labels: a list of strings for each label
    """
    for validation in v_rewards_list:
        global_mean = np.mean(validation, axis = 0) 
        for i in range(len(validation)):
            print(f"{np.mean((validation[i] - global_mean)**2, axis = 0):.2f} | {labels[i]}")

# Algorithms

In [None]:
global_dqn = []
file_name = ["algorithms/dqn/mse_1", "algorithms/dqn/mse_2", "algorithms/dqn/mse_3", "algorithms/dqn/mse_4", "algorithms/dqn/mse_5"]
for i in range(5):
    env = ht.house_temp_v4_1() 
    
    v_agent = dqn_agent.dqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                      epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = dqn_agent.dqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                    epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 128, device = device)

    dqn_mse_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                 target_update_steps = 1440, title = "DQN")

    global_dqn.append(dqn_mse_v_rewards)

In [None]:
global_ddqn = []
file_name = ["algorithms/ddqn/mse_1", "algorithms/ddqn/mse_2", "algorithms/ddqn/mse_3", "algorithms/ddqn/mse_4", "algorithms/ddqn/mse_5"]
for i in range(5):
    env = ht.house_temp_v4_1() 
    
    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 128, device = device)

    ddqn_mse_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                  target_update_steps = 1440, title = "DDQN")

    global_ddqn.append(ddqn_mse_v_rewards)

In [None]:
global_pddqn = []
file_name = ["algorithms/pddqn/mse_1", "algorithms/pddqn/mse_2", "algorithms/pddqn/mse_3", "algorithms/pddqn/mse_4", "algorithms/pddqn/mse_5"]
for i in range(5):
    env = ht.house_temp_v4_1() 
    
    v_agent = pddqn_agent.pddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, alpha = 0, 
                                          beta = 0, lr = 0, epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 1, batch_size = 0, 
                                          n_episodes = 1, device = device)

    agent = pddqn_agent.pddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, alpha = 0.7,
                                        beta = 0.5, lr = 0.0001, epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, 
                                        batch_size = 128, n_episodes = 1500, device = device)

    pddqn_mse_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                   target_update_steps = 1440, title = "PDDQN")

    global_pddqn.append(pddqn_mse_v_rewards)

In [None]:
global_dddqn = []
file_name = ["algorithms/dddqn/mse_1", "algorithms/dddqn/mse_2", "algorithms/dddqn/mse_3", "algorithms/dddqn/mse_4", "algorithms/dddqn/mse_5"]
for i in range(5):
    env = ht.house_temp_v4_1() 
    
    v_agent = ddqn_agent.ddqn_agent_mse(model = duelling_dqn_htm3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = duelling_dqn_htm3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 128, device = device)

    dddqn_mse_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                   target_update_steps = 1440, title = "DDDQN")

    global_dddqn.append(dddqn_mse_v_rewards)

In [None]:
v_rewards = [global_dqn, global_ddqn, global_pddqn, global_dddqn]
legends = ["DQN", "DDQN", "PER", "Duelling"]
title = "Algorithms"

plot_validation(v_rewards = v_rewards, legends = legends, title = title)

In [None]:
env = ht.house_temp_v4_1() 

v_agent = dqn_agent.dqn_agent_mse(model = smaller_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                  epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

agent = dqn_agent.dqn_agent_mse(model = smaller_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 128, device = device)

dqn_mse_rewards, dqn_mse_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = "to_delete", 
                                              target_update_steps = 1440, render = True)

plot_training(rewards = dqn_mse_rewards, title = "Deep Q-Networks", episodes = 1000)

In [None]:
env = ht.house_temp_v4_1() 

v_agent = ddqn_agent.ddqn_agent_mse(model = smaller_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                    epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

agent = ddqn_agent.ddqn_agent_mse(model = smaller_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                  epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 128, device = device)

ddqn_mse_rewards, ddqn_mse_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = "to_delete", 
                                                target_update_steps = 1440, render = True)

plot_training(rewards = ddqn_mse_rewards, title = "Double Deep Q-networks", episodes = 1000)

In [None]:
env = ht.house_temp_v4_1()

v_agent = pddqn_agent.pddqn_agent_mse(model = smaller_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, alpha = 0, 
                                      beta = 0, lr = 0, epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 1, batch_size = 0, 
                                      n_episodes = 1, device = device)

agent = pddqn_agent.pddqn_agent_mse(model = smaller_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, alpha = 0.7,
                                    beta = 0.5, lr = 0.0001, epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, 
                                    batch_size = 128, n_episodes = 1000, device = device)

pddqn_mse_rewards, pddqn_mse_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = "to_delete", 
                                                  target_update_steps = 1440, render = True)

plot_training(rewards = pddqn_mse_rewards, title = "Prioritized Replay", episodes = 1000)

In [None]:
env = ht.house_temp_v4_1() 

v_agent = ddqn_agent.ddqn_agent_mse(model = smaller_model_duelling, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                    epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

agent = ddqn_agent.ddqn_agent_mse(model = smaller_model_duelling, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                  epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 128, device = device)

dddqn_mse_rewards, dddqn_mse_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = "to_delete", 
                                                  target_update_steps = 1440, render = True)

plot_training(rewards = dddqn_mse_rewards, title = "Duelling", episodes = 1000)

In [None]:
v_rewards = [dqn_mse_v_rewards, ddqn_mse_v_rewards, pddqn_mse_v_rewards, dddqn_mse_v_rewards]
legends = ["DQN", "DDQN", "PER", "Duelling"]
title = "Algorithms"

plot_validation(v_rewards = v_rewards, legends = legends, title = title)

# MSE VS Huber

In [None]:
global_mse = []
file_name = ["parameters/loss/mse_1", "parameters/loss/mse_2", "parameters/loss/mse_3", "parameters/loss/mse_4", "parameters/loss/mse_5"]
for i in range(5):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 128, device = device)

    mse_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                             target_update_steps = 1440, title = "MSE")

    global_mse.append(mse_v_rewards)

In [None]:
global_huber = []
file_name = ["parameters/loss/huber_1", "parameters/loss/huber_2", "parameters/loss/huber_3", "parameters/loss/huber_4", "parameters/loss/huber_5"]
for i in range(5):
    env = ht.house_temp_v4_1() 
    
    v_agent = ddqn_agent.ddqn_agent_huber(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                          epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_huber(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                        epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 128, device = device)

    huber_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                               target_update_steps = 1440, title = "Huber")

    global_huber.append(huber_v_rewards)

In [None]:
v_rewards = [global_mse, global_huber]
legends = ["MSE", "Huber"]
title = "Loss Functions"

plot_validation(v_rewards = v_rewards, legends = legends, title = title)

# Learning Rates

In [None]:
global_lr_00001 = []
file_name = ["parameters/lr/00001", "parameters/lr/00001", "parameters/lr/00001", "parameters/lr/00001", "parameters/lr/00001"]
for i in range(5):
    env = ht.house_temp_v4_1() 
    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 128, device = device)

    lr_00001_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                  target_update_steps = 1440, title = "0.0001")

    global_lr_00001.append(lr_00001_v_rewards)

In [None]:
global_lr_000025 = []
file_name = ["parameters/lr/000025", "parameters/lr/000025", "parameters/lr/000025", "parameters/lr/000025", "parameters/lr/000025"]
for i in range(5):
    env = ht.house_temp_v4_1() 
    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.00025,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 128, device = device)

    lr_000025_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                   target_update_steps = 1440, title = "0.00025")

    global_lr_000025.append(lr_000025_v_rewards)

In [None]:
global_lr_00005 = []
file_name = ["parameters/lr/00005", "parameters/lr/00005", "parameters/lr/00005", "parameters/lr/00005", "parameters/lr/00005"]
for i in range(5):
    env = ht.house_temp_v4_1() 
    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0005,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 128, device = device)

    lr_00005_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                  target_update_steps = 1440, title = "0.0005")

    global_lr_00005.append(lr_00005_v_rewards)

In [None]:
global_lr_000075 = []
file_name = ["parameters/lr/000075", "parameters/lr/000075", "parameters/lr/000075", "parameters/lr/000075", "parameters/lr/000075"]
for i in range(5):
    env = ht.house_temp_v4_1() 
    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.00075,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 128, device = device)

    lr_000075_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                  target_update_steps = 1440, title = "0.00075")

    global_lr_000075.append(lr_000075_v_rewards)

In [None]:
global_lr_0001 = []
file_name = ["parameters/lr/0001", "parameters/lr/0001", "parameters/lr/0001", "parameters/lr/0001", "parameters/lr/0001"]
for i in range(5):
    env = ht.house_temp_v4_1() 
    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 128, device = device)

    lr_0001_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                 target_update_steps = 1440, title = "0.001")

    global_lr_0001.append(lr_0001_v_rewards)

In [None]:
v_rewards = [global_lr_00001, global_lr_000025, global_lr_00005, global_lr_000075, global_lr_0001]
legends = ["0.0001", "0.00025", "0.0005", "0.00075", "0.001"]
title = "Learning Rates"

plot_validation(v_rewards = v_rewards, legends = legends, title = title)

# Epsilon Minimum

In [None]:
global_em_0001 = []
file_name = ["parameters/em/0001", "parameters/em/0001", "parameters/em/0001", "parameters/em/0001", "parameters/em/0001"]
for i in range(5):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.001, decay_steps = 57600, buffer_size = 28800, batch_size = 128, device = device)

    em_0001_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                 target_update_steps = 1440, title = "0.001")

    global_em_0001.append(em_0001_v_rewards)

In [None]:
global_em_001 = []
file_name = ["parameters/em/001", "parameters/em/001", "parameters/em/001", "parameters/em/001", "parameters/em/001"]
for i in range(5):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 128, device = device)

    em_001_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                target_update_steps = 1440, title = "0.01")

    global_em_001.append(em_001_v_rewards)

In [None]:
global_em_005 = []
file_name = ["parameters/em/005", "parameters/em/005", "parameters/em/005", "parameters/em/005", "parameters/em/005"]
for i in range(5):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.05, decay_steps = 57600, buffer_size = 28800, batch_size = 128, device = device)

    em_005_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                target_update_steps = 1440, title = "0.05")

    global_em_005.append(em_005_v_rewards)

In [None]:
global_em_01 = []
file_name = ["parameters/em/01", "parameters/em/01", "parameters/em/01", "parameters/em/01", "parameters/em/01"]
for i in range(5):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.1, decay_steps = 57600, buffer_size = 28800, batch_size = 128, device = device)

    em_01_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                               target_update_steps = 1440, title = "0.1")

    global_em_01.append(em_01_v_rewards)

In [None]:
v_rewards = [global_em_0001, global_em_001, global_em_005, global_em_01]
legends = ["0.001", "0.01", "0.05", "0.1"]
title = "Epsilon Minimum"

plot_validation(v_rewards = v_rewards, legends = legends, title = title)

# Decay Steps

In [None]:
global_ds_2880 = []
file_name = ["parameters/ds/2880", "parameters/ds/2880", "parameters/ds/2880", "parameters/ds/2880", "parameters/ds/2880"]
for i in range(5):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 2880, buffer_size = 28800, batch_size = 128, device = device)

    ds_2880_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                 target_update_steps = 1440, title = "2880")

    global_ds_2880.append(ds_2880_v_rewards)

In [None]:
global_ds_14400 = []
file_name = ["parameters/ds/14400", "parameters/ds/14400", "parameters/ds/14400", "parameters/ds/14400", "parameters/ds/14400"]
for i in range(5):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 14400, buffer_size = 28800, batch_size = 128, device = device)

    ds_14400_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                  target_update_steps = 1440, title = "14400")

    global_ds_14400.append(ds_14400_v_rewards)

In [None]:
global_ds_28800 = []
file_name = ["parameters/ds/28800", "parameters/ds/28800", "parameters/ds/28800", "parameters/ds/28800", "parameters/ds/28800"]
for i in range(5):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 28800, buffer_size = 28800, batch_size = 128, device = device)

    ds_28800_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                  target_update_steps = 1440, title = "28800")

    global_ds_28800.append(ds_28800_v_rewards)

In [None]:
global_ds_57600 = []
file_name = ["parameters/ds/57600", "parameters/ds/57600", "parameters/ds/57600", "parameters/ds/57600", "parameters/ds/57600"]
for i in range(5):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 128, device = device)

    ds_57600_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                  target_update_steps = 1440, title = "57600")

    global_ds_57600.append(ds_57600_v_rewards)

In [None]:
global_ds_144000 = []
file_name = ["parameters/ds/144000", "parameters/ds/144000", "parameters/ds/144000", "parameters/ds/144000", "parameters/ds/144000"]
for i in range(5):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 144000, buffer_size = 28800, batch_size = 128, device = device)

    ds_144000_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                   target_update_steps = 1440, title = "144000")

    global_ds_144000.append(ds_144000_v_rewards)

In [None]:
v_rewards = [global_ds_2880, global_ds_14400, global_ds_28800, global_ds_57600, global_ds_144000]
legends = ["2880", "14400", "28800", "57600", "144000"]
title = "Decay Steps"

plot_validation(v_rewards = v_rewards, legends = legends, title = title)

# Memory Size

In [None]:
global_buffer_7200 = []
file_name = ["parameters/buffer/7200", "parameters/buffer/7200", "parameters/buffer/7200", "parameters/buffer/7200", "parameters/buffer/7200"]
for i in range(5):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 7200, batch_size = 128, device = device)

    buffer_7200_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                     target_update_steps = 1440, title = "7200")

    global_buffer_7200.append(buffer_7200_v_rewards)

In [None]:
global_buffer_14400 = []
file_name = ["parameters/buffer/14400", "parameters/buffer/14400", "parameters/buffer/14400", "parameters/buffer/14400", "parameters/buffer/14400"]
for i in range(5):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 14400, batch_size = 128, device = device)

    buffer_14400_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                      target_update_steps = 1440, title = "14400")

    global_buffer_14400.append(buffer_14400_v_rewards)

In [None]:
global_buffer_28800 = []
file_name = ["parameters/buffer/28800", "parameters/buffer/28800", "parameters/buffer/28800", "parameters/buffer/28800", "parameters/buffer/28800"]
for i in range(5):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 128, device = device)

    buffer_28800_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                      target_update_steps = 1440, title = "28800")

    global_buffer_28800.append(buffer_28800_v_rewards)

In [None]:
global_buffer_57600 = []
file_name = ["parameters/buffer/57600", "parameters/buffer/57600", "parameters/buffer/57600", "parameters/buffer/57600", "parameters/buffer/57600"]
for i in range(5):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 57600, batch_size = 128, device = device)

    buffer_57600_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                      target_update_steps = 1440, title = "57600")

    global_buffer_57600.append(buffer_57600_v_rewards)

In [None]:
global_buffer_144000 = []
file_name = ["parameters/buffer/144000", "parameters/buffer/144000", "parameters/buffer/144000", "parameters/buffer/144000", "parameters/buffer/144000"]
for i in range(5):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 144000, batch_size = 128, device = device)

    buffer_144000_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                      target_update_steps = 1440, title = "144000")

    global_buffer_144000.append(buffer_144000_v_rewards)

In [None]:
v_rewards = [global_buffer_7200, global_buffer_14400, global_buffer_28800, global_buffer_57600, global_buffer_144000]
legends = ["7200", "14400", "28800", "57600", "144000"]
title = "Memory Size"

plot_validation(v_rewards = v_rewards, legends = legends, title = title)

# Batch Size

In [None]:
global_batch_32 = []
file_name = ["parameters/batch/32", "parameters/batch/32", "parameters/batch/32", "parameters/batch/32", "parameters/batch/32"]
for i in range(5):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 32, device = device)

    batch_32_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                  target_update_steps = 1440, title = "32")

    global_batch_32.append(batch_32_v_rewards)

In [None]:
global_batch_64 = []
file_name = ["parameters/batch/64", "parameters/batch/64", "parameters/batch/64", "parameters/batch/64", "parameters/batch/64"]
for i in range(5):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 64, device = device)

    batch_64_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                  target_update_steps = 1440, title = "64")

    global_batch_64.append(batch_64_v_rewards)

In [None]:
global_batch_128 = []
file_name = ["parameters/batch/128", "parameters/batch/128", "parameters/batch/128", "parameters/batch/128", "parameters/batch/128"]
for i in range(5):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 128, device = device)

    batch_128_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                   target_update_steps = 1440, title = "128")

    global_batch_128.append(batch_128_v_rewards)

In [None]:
global_batch_256 = []
file_name = ["parameters/batch/256", "parameters/batch/256", "parameters/batch/256", "parameters/batch/256", "parameters/batch/256"]
for i in range(5):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 256, device = device)

    batch_256_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                   target_update_steps = 1440, title = "256")

    global_batch_256.append(batch_256_v_rewards)

In [None]:
global_batch_512 = []
file_name = ["parameters/batch/512", "parameters/batch/512", "parameters/batch/512", "parameters/batch/512", "parameters/batch/512"]
for i in range(5):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 512, device = device)

    batch_512_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                   target_update_steps = 1440, title = "512")

    global_batch_512.append(batch_512_v_rewards)

In [None]:
v_rewards = [global_batch_32, global_batch_64, global_batch_128, global_batch_256, global_batch_512]
legends = ["32", "64", "128", "256", "512"]
title = "Batch Size"

plot_validation(v_rewards = v_rewards, legends = legends, title = title)

# Update Frequency 

In [None]:
global_uf_1440 = []
file_name = ["parameters/uf/1440", "parameters/uf/1440", "parameters/uf/1440", "parameters/uf/1440", "parameters/uf/1440"]
for i in range(5):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 128, device = device)

    uf_1440_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                 target_update_steps = 1440, title = "1440")

    global_uf_1440.append(uf_1440_v_rewards)

In [None]:
global_uf_2880 = []
file_name = ["parameters/uf/2880", "parameters/uf/2880", "parameters/uf/2880", "parameters/uf/2880", "parameters/uf/2880"]
for i in range(5):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 128, device = device)

    uf_2880_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                 target_update_steps = 2880, title = "2880")

    global_uf_2880.append(uf_2880_v_rewards)

In [None]:
global_uf_7200 = []
file_name = ["parameters/uf/7200", "parameters/uf/7200", "parameters/uf/7200", "parameters/uf/7200", "parameters/uf/7200"]
for i in range(5):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 128, device = device)

    uf_7200_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                 target_update_steps = 7200, title = "7200")

    global_uf_7200.append(uf_7200_v_rewards)

In [None]:
global_uf_14400 = []
file_name = ["parameters/uf/14400", "parameters/uf/14400", "parameters/uf/14400", "parameters/uf/14400", "parameters/uf/14400"]
for i in range(5):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 128, device = device)

    uf_14400_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                  target_update_steps = 14400, title = "14400")

    global_uf_14400.append(uf_14400_v_rewards)

In [None]:
global_uf_28800 = []
file_name = ["parameters/uf/28800", "parameters/uf/28800", "parameters/uf/28800", "parameters/uf/28800", "parameters/uf/28800"]
for i in range(5):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.0001,
                                      epsilon = 1.0, epsilon_min = 0.01, decay_steps = 57600, buffer_size = 28800, batch_size = 128, device = device)

    uf_28800_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i], 
                                  target_update_steps = 28800, title = "28800")

    global_uf_28800.append(uf_28800_v_rewards)

In [None]:
v_rewards = [global_uf_1440, global_uf_2880, global_uf_7200, global_uf_14400, global_uf_28800]
legends = ["1440", "2880", "7200", "14400", "28800"]
title = "Target Network Update Frequency"

plot_validation(v_rewards = v_rewards, legends = legends, title = title)

# Complete Runs

In [None]:
global_htv4_0 = []
file_name = ["1_htv4_0", "2_htv4_0", "3_htv4_0", "4_htv4_0", "5_htv4_0", "6_htv4_0", "7_htv4_0", "8_htv4_0", "9_htv4_0", "10_htv4_0"]
for i in range(10):
    env = ht.house_temp_v4_0() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.00025,
                                      epsilon = 1.0, epsilon_min = 0.001, decay_steps = 28800, buffer_size = 57600, batch_size = 256, device = device)

    htv4_0_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1000, file_name = file_name[i], 
                                target_update_steps = 14400, title = " ")

    global_htv4_0.append(htv4_0_v_rewards)

In [None]:
v_rewards = [global_htv4_0]
legends = ["V4.0"]
title = "V4.0"

plot_validation(v_rewards = v_rewards, legends = legends, title = title)

In [None]:
global_htv4_1 = []
file_name = ["1_htv4_1", "2_htv4_1", "3_htv4_1", "4_htv4_1", "5_htv4_1", "6_htv4_1", "7_htv4_1", "8_htv4_1", "9_htv4_1", "10_htv4_1"]
for i in range(10):
    env = ht.house_temp_v4_1() 

    v_agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                        epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

    agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0.99, lr = 0.00025,
                                      epsilon = 1.0, epsilon_min = 0.001, decay_steps = 28800, buffer_size = 57600, batch_size = 256, device = device)

    htv4_1_v_rewards = training(env = env, agent = agent, v_agent = v_agent, n_episodes = 1500, file_name = file_name[i],
                                target_update_steps = 14400, title = " ")

    global_htv4_1.append(htv4_1_v_rewards)

In [None]:
v_rewards = [global_htv4_1]
legends = ["V4.1"]
title = "V4.1"

plot_validation(v_rewards = v_rewards, legends = legends, title = title)

In [None]:
v_rewards_list = [global_htv4_0]
labels = ["1_htv4_0", "2_htv4_0", "3_htv4_0", "4_htv4_0", "5_htv4_0", "6_htv4_0", "7_htv4_0", "8_htv4_0", "9_htv4_0", "10_htv4_0"]

best_curve(v_rewards_list = v_rewards_list, labels = labels)

In [None]:
v_rewards_list = [global_htv4_1]
labels = ["1_htv4_1", "2_htv4_1", "3_htv4_1", "4_htv4_1", "5_htv4_1", "6_htv4_1", "7_htv4_1", "8_htv4_1", "9_htv4_1", "10_htv4_1"]

best_curve(v_rewards_list = v_rewards_list, labels = labels)