In [None]:
import numpy as np
import gymnasium as gym
from gymnasium import spaces
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

import environments.house_temp as ht
from useful import trees
from agents import dqn_agent
from agents import ddqn_agent
from agents import pddqn_agent

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# Models

In [None]:
class house_temp_model_1(nn.Module):
    def __init__(self, n_observations, n_actions):
        super(house_temp_model_1, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(n_observations, 32),
            nn.ReLU(),
            nn.Linear(32, n_actions)
        )

    def forward(self, x):
        return self.model(x)

In [None]:
class house_temp_model_2(nn.Module):
    def __init__(self, n_observations, n_actions):
        super(house_temp_model_2, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(n_observations, 128),
            nn.ReLU(),
            nn.Linear(128, n_actions)
        )

    def forward(self, x):
        return self.model(x)

In [None]:
class house_temp_model_3(nn.Module):
    def __init__(self, n_observations, n_actions):
        super(house_temp_model_3, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(n_observations, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, n_actions)
        )

    def forward(self, x):
        return self.model(x)

In [None]:
class dueling_dqn_htm3(nn.Module):
    def __init__(self, n_observations, n_actions):
        super(dueling_dqn_htm3, self).__init__()
        self.n_observations = n_observations
        
        self.model = nn.Sequential(
            nn.Linear(n_observations, 128),
            nn.ReLU()
        )

        self.state_value = nn.Sequential(
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 1)
        )
        
        self.advantage = nn.Sequential(
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, n_actions)
        )
        
    def forward(self, x):
        if x.shape == torch.Size([self.n_observations]):
            x = x.unsqueeze(0)
        
        x = self.model(x)
        value = self.state_value(x)
        advantage = self.advantage(x)
        q_value = value + (advantage - torch.mean(advantage, dim = 1, keepdim = True))
        
        return q_value

# Functions

In [None]:
def testing(env, agent, n_episodes, file_name, options = None):
    """
    Testing agents for the house temperature environment 
    -----
    Input
    -----
    env: gym environment
    agent: training agent
    n_episodes: number of episodes to run
    file_name: file name for saving the model 
    options: dict to alter the environment
    ------
    Output
    ------
    reward_array: an array of the total reward per episode
    action_dict: a dict of the total actions taken per action 
    ep_state_list: a list of states per episode
    """
    agent.main_model.load_state_dict(torch.load(f"model_weights/house_temp/{file_name}.pth"))
    agent.main_model.eval()
    reward_array, action_dict, ep_state_list, reward_list = np.zeros((n_episodes, env.max_steps)), {}, [], []

    for episode in range(n_episodes):
        state, _ = env.reset(options = options)
        total_reward, reward, state_list, steps_loss = 0, 0, [], []
        
        for steps in range(env.max_steps):
            state_list.append(state)
            action = agent.act(state)
            if action not in action_dict:
                action_dict[action] = 1
            else:
                action_dict[action] += 1

            next_state, reward, termination, truncation, _ = env.step(action)
            reward_array[episode, steps] = reward
            total_reward += reward
            
            state = next_state
            if termination or truncation:
                break

        reward_list.append(total_reward)
        ep_state_list.append(state_list)

    print(f"Average Reward: {np.mean(reward_list):.2f}")
    
    return reward_array, action_dict, ep_state_list

In [None]:
def in_target_v2(state_list):
    """
    Analyses the states into metrics for version 2
    -----
    Input
    -----
    state_list: a list of states
    """
    in_target_total, preheating_total = [], [] 
    for episode in state_list:
        steps, in_target, preheating = 0, 0, 0
        for state in episode:
            inside, outside, target, next_target, time, heater, cooler = state

            if target - 1.0 < inside < target + 1.0:
                in_target += 1
                
            if time == 30 and steps > 0:
                if target - 1.0 < inside < target + 1.0:
                    preheating += 1

            steps += 1

        in_target_total.append(in_target)
        preheating_total.append(preheating)

    print(f"In target temperature range: {100 * np.mean(in_target_total)/288:.2f}%")
    print(f"In preheating/cooling range: {100 * np.mean(preheating_total)/9:.2f}%")

In [None]:
def in_target_v3(state_list):
    """
    Analyses the states into metrics for version 3
    -----
    Input
    -----
    state_list: a list of states
    """
    in_target_total, preheating_total, heater_total, cooler_total, window_total, ep_step = [], [], [], [], [], []
    for episode in state_list:
        steps, in_target, preheating, heater_v, cooler_v, window_v = 0, 0, 0, 0, 0, 0
        for state in episode:
            inside, outside, target, next_target, time, heater, cooler, window = state
            heater_v += heater
            cooler_v += cooler
            window_v += window
            energy = heater * 0.5 + cooler * 0.5
            ep_step.append(energy)
            
            if target - 1.0 < inside < target + 1.0:
                in_target += 1
                
            if time == 24 and steps > 0:
                if target - 1.0 < inside < target + 1.0:
                    preheating += 1

            steps += 1

        in_target_total.append(in_target)
        preheating_total.append(preheating)
        heater_total.append(heater_v)
        cooler_total.append(cooler_v)
        window_total.append(window_v)

    print(f"In target temperature range: {100 * np.mean(in_target_total)/288:.2f}%")
    print(f"In preheating/cooling range: {100 * np.mean(preheating_total)/9:.2f}%")
    print(f"Heater Used: {100 * np.mean(heater_total)/288:.2f}% | Cooler Used: {100 * np.mean(cooler_total)/288:.2f}% | Window Used: {100 * np.mean(window_total)/288:.2f}%")
    print(f"Average Energy Per Step: {np.mean(ep_step):.2f}")

In [None]:
def in_target_v4(state_list, switch_value = 24):
    """
    Analyses the states into metrics for version 4
    -----
    Input
    -----
    state_list: a list of states
    swtich_value: how often the target temperature changes
    """
    in_target_total, preheating_total, heater_total, cooler_total, window_total, ep_step = [], [], [], [], [], []
    for episode in state_list:
        steps, counter, in_target, preheating, heater_v, cooler_v, window_v = 0, 0, 0, 0, 0, 0, 0
        for state in episode:
            inside, outside, next_outside, target, next_target, time, heater, cooler, window = state
            heater_v += heater
            cooler_v += cooler
            window_v += window
            energy = heater * 0.5 + cooler * 0.5
            ep_step.append(energy)

            if target - 0.5 < inside < target + 0.5:
                in_target += 1
                
            if time == switch_value and steps > 0:
                counter += 1
                if target - 0.5 < inside < target + 0.5:
                    preheating += 1

            steps += 1

        in_target_total.append(in_target)
        preheating_total.append(preheating)
        heater_total.append(heater_v)
        cooler_total.append(cooler_v)
        window_total.append(window_v)

    print(f"In target temperature range: {100 * np.mean(in_target_total)/288:.2f}%")
    print(f"In preheating/cooling range: {100 * np.mean(preheating_total)/counter:.2f}%")
    print(f"Heater Used: {100 * np.mean(heater_total)/288:.2f}% | Cooler Used: {100 * np.mean(cooler_total)/288:.2f}% | Window Used: {100 * np.mean(window_total)/288:.2f}%")
    print(f"Average Energy Per Step: {np.mean(ep_step):.2f}")

In [None]:
def plot_ra(max_steps, rewards, actions, action_space, version):
    """
    Plots rewards per step and total actions
    -----
    Input
    -----
    max_steps: an integer number of total steps
    rewards: a list of rewards
    actions: a dict of actions
    action_space: a dict of action meaning
    version: a string for the version
    """
    x = [i for i in range(max_steps)] 
    mean, std = np.mean(rewards, axis = 0), np.std(rewards, axis = 0)

    fig, axes = plt.subplots(2, 1, figsize = (20, 6.667))

    axes[0].plot(x, mean)
    axes[0].fill_between(x, mean - std, mean + std, alpha = 0.25)
    axes[0].set_title(f"Testing | DDQN | MSE | {version}")
    axes[0].set_ylabel("Mean Reward")
    axes[0].set_xlabel("Steps")
    axes[0].grid()
    
    for key, value in actions.items():
        axes[1].bar(action_space[key], value)
    axes[1].grid(axis = "y")

    plt.tight_layout()
    plt.show()

In [None]:
def random_episodes_v1(state_list):
    """
    Plots random episodes for version 1
    -----
    Input
    -----
    state_list: a list of states
    """
    fig, axes = plt.subplots(2, 4, figsize = (40, 10))

    for i in range(2):
        for j in range(4):
            random = np.random.randint(1001)
            inside_temp = [array[0] for array in state_list[random]]
            target_temp = [array[1] for array in state_list[random]]
            x = [i for i in range(len(inside_temp))]
            axes[i, j].plot(x, inside_temp)
            axes[i, j].plot(x, target_temp)
            axes[i, j].grid()

    plt.tight_layout()
    plt.show()

In [None]:
def random_episodes_v2_3(state_list, episodes, max_steps):
    """
    Plots random episodes for versions 2 and 3
    -----
    Input
    -----
    state_list: a list of states
    episodes: an integer amount of total episodes
    max_steps: an integer amount of total steps
    """
    x = [i for i in range(max_steps)]
    fig, axes = plt.subplots(2, 4, figsize = (40, 10))

    for i in range(2):
        for j in range(4):
            random = np.random.randint(episodes + 1)
            inside_temp = [array[0] for array in state_list[random]]
            outside_temp = [array[1] for array in state_list[random]]
            target_temp = [array[2] for array in state_list[random]]
            axes[i, j].plot(x, inside_temp)
            axes[i, j].plot(x, outside_temp, alpha = 0.3)
            axes[i, j].plot(x, target_temp, "--", alpha = 0.5)
            axes[i, j].grid()

    plt.tight_layout()
    plt.show()

In [None]:
def random_episodes_v4(state_list, episodes, max_steps):
    """
    Plots random episodes for version 4
    -----
    Input
    -----
    state_list: a list of states
    episodes: an integer amount of total episodes
    max_steps: an integer amount of total steps
    """
    x = [i for i in range(max_steps)]
    fig, axes = plt.subplots(1, 6, figsize = (30, 3))

    for j in range(6):
        random = np.random.randint(episodes + 1)
        inside_temp = [array[0] for array in state_list[random]]
        outside_temp = [array[1] for array in state_list[random]]
        target_temp = [array[3] for array in state_list[random]]
        axes[j].plot(x, inside_temp)
        axes[j].plot(x, outside_temp, alpha = 0.3)
        axes[j].plot(x, target_temp, "--", alpha = 0.5)
        axes[j].grid(alpha = 0.25)

    plt.tight_layout()
    plt.show()

# House Temperature Version 1.0 Tests

In [None]:
env = ht.house_temp_v1_0() 
agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_1, state_dim = 4, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                  epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

ddqn_mse_rewards_v1_0, ddqn_mse_actions_v1_0, ddqn_mse_states_v1_0 = testing(env = env, agent = agent, n_episodes = 1000, 
                                                                             file_name = "ddqn_mse_htv1_0")

In [None]:
action_space = {0: "Do Nothing", 1: "Window Switch", 2: "Heater Switch"}

plot_ra(max_steps = 50, rewards = ddqn_mse_rewards_v1_0, actions = ddqn_mse_actions_v1_0, action_space = action_space, version = "V1.0")

random_episodes_v1(state_list = ddqn_mse_states_v1_0)

# House Temperature Version 2.0 Tests

In [None]:
env = ht.house_temp_v2_0() 
agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_2, state_dim = 7, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                  epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

ddqn_mse_rewards_v2_0, ddqn_mse_actions_v2_0, ddqn_mse_states_v2_0 = testing(env = env, agent = agent, n_episodes = 1000, 
                                                                             file_name = "ddqn_mse_htv2_0")

in_target_v2(ddqn_mse_states_v2_0)

In [None]:
action_space = {0: "Do Nothing", 1: "Heater Switch", 2: "Cooler Switch"}

plot_ra(max_steps = 288, rewards = ddqn_mse_rewards_v2_0, actions = ddqn_mse_actions_v2_0, action_space = action_space, version = "V2.0")

random_episodes_v2_3(state_list = ddqn_mse_states_v2_0, episodes = 1000, max_steps = 288)

### Version 2.0 with Extended Min/Max

In [None]:
env = ht.house_temp_v2_0() 
agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_2, state_dim = 7, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                  epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

options = {"temp_min": -20, "temp_max": 50}

v2_0_rewards_min_max, v2_0_actions_min_max, v2_0_states_min_max = testing(env = env, agent = agent, n_episodes = 1000, 
                                                                          file_name = "ddqn_mse_htv2_0",options = options)

in_target_v2(v2_0_states_min_max)

In [None]:
action_space = {0: "Do Nothing", 1: "Heater Switch", 2: "Cooler Switch"}

plot_ra(max_steps = 288, rewards = v2_0_rewards_min_max, actions = v2_0_actions_min_max, action_space = action_space, version = "V2.1")

random_episodes_v2_3(state_list = v2_0_states_min_max, episodes = 1000, max_steps = 288)

### Version 2.0 with Different Outside Temperature Curves

In [None]:
env = ht.house_temp_v2_0() 
agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_2, state_dim = 7, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                  epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

options = {"outside_temp_curve": [np.random.uniform(0, 31) for i in range(576)]}

v2_0_rewards_outside_1, v2_0_actions_outside_1, v2_0_states_outside_1 = testing(env = env, agent = agent, n_episodes = 1000, 
                                                                                file_name = "ddqn_mse_htv2_0",options = options)

in_target_v2(v2_0_states_outside_1)

In [None]:
action_space = {0: "Do Nothing", 1: "Heater Switch", 2: "Cooler Switch"}

plot_ra(max_steps = 288, rewards = v2_0_rewards_outside_1, actions = v2_0_actions_outside_1, action_space = action_space, version = "V2.1")

random_episodes_v2_3(state_list = v2_0_states_outside_1, episodes = 1000, max_steps = 288)

In [None]:
env = ht.house_temp_v2_0() 
agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_2, state_dim = 7, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                  epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

options = {"outside_temp_curve": 15 * np.sin(10 * env.x) + 15}

v2_0_rewards_outside_2, v2_0_actions_outside_2, v2_0_states_outside_2 = testing(env = env, agent = agent, n_episodes = 1000, 
                                                                                file_name = "ddqn_mse_htv2_0",options = options)

in_target_v2(v2_0_states_outside_2)

In [None]:
action_space = {0: "Do Nothing", 1: "Heater Switch", 2: "Cooler Switch"}

plot_ra(max_steps = 288, rewards = v2_0_rewards_outside_2, actions = v2_0_actions_outside_2, action_space = action_space, version = "V2.1")

random_episodes_v2_3(state_list = v2_0_states_outside_2, episodes = 1000, max_steps = 288)

# House Temperature Version 2.1 Tests

In [None]:
env = ht.house_temp_v2_1() 
agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_2, state_dim = 7, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                  epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

ddqn_mse_rewards_v2_1, ddqn_mse_actions_v2_1, ddqn_mse_states_v2_1 = testing(env = env, agent = agent, n_episodes = 1000, 
                                                                             file_name = "ddqn_mse_htv2_1")

in_target_v2(ddqn_mse_states_v2_1)

In [None]:
action_space = {0: "Do Nothing", 1: "Heater Switch", 2: "Cooler Switch"}

plot_ra(max_steps = 288, rewards = ddqn_mse_rewards_v2_1, actions = ddqn_mse_actions_v2_1, action_space = action_space, version = "V2.1")

random_episodes_v2_3(state_list = ddqn_mse_states_v2_1, episodes = 1000, max_steps = 288)

### Version 2.1 with Extended Min/Max

In [None]:
env = ht.house_temp_v2_1() 
agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_2, state_dim = 7, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                  epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

options = {"temp_min": -20, "temp_max": 50}

v2_1_rewards_min_max, v2_1_actions_min_max, v2_1_states_min_max = testing(env = env, agent = agent, n_episodes = 1000, 
                                                                          file_name = "ddqn_mse_htv2_1",options = options)
in_target_v2(v2_1_states_min_max)

In [None]:
action_space = {0: "Do Nothing", 1: "Heater Switch", 2: "Cooler Switch"}

plot_ra(max_steps = 288, rewards = v2_1_rewards_min_max, actions = v2_1_actions_min_max, action_space = action_space, version = "V2.1")

random_episodes_v2_3(state_list = v2_1_states_min_max, episodes = 1000, max_steps = 288)

### Version 2.1 with Different Outside Temperature Curves

In [None]:
env = ht.house_temp_v2_1() 
agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_2, state_dim = 7, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                  epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

options = {"outside_temp_curve": [np.random.uniform(0, 31) for i in range(576)]}

v2_1_rewards_outside_1, v2_1_actions_outside_1, v2_1_states_outside_1 = testing(env = env, agent = agent, n_episodes = 1000, 
                                                                                file_name = "ddqn_mse_htv2_1",options = options)

in_target_v2(v2_1_states_outside_1)

In [None]:
action_space = {0: "Do Nothing", 1: "Heater Switch", 2: "Cooler Switch"}

plot_ra(max_steps = 288, rewards = v2_1_rewards_outside_1, actions = v2_1_actions_outside_1, action_space = action_space, version = "V2.1")

random_episodes_v2_3(state_list = v2_1_states_outside_1, episodes = 1000, max_steps = 288)

In [None]:
env = ht.house_temp_v2_1() 
agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_2, state_dim = 7, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                  epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

options = {"outside_temp_curve": 15 * np.sin(10 * env.x) + 15}

v2_1_rewards_outside_2, v2_1_actions_outside_2, v2_1_states_outside_2 = testing(env = env, agent = agent, n_episodes = 1000, 
                                                                                file_name = "ddqn_mse_htv2_1",options = options)

in_target_v2(v2_1_states_outside_2)

In [None]:
action_space = {0: "Do Nothing", 1: "Heater Switch", 2: "Cooler Switch"}

plot_ra(max_steps = 288, rewards = v2_1_rewards_outside_2, actions = v2_1_actions_outside_2, action_space = action_space, version = "V2.1")

random_episodes_v2_3(state_list = v2_1_states_outside_2, episodes = 1000, max_steps = 288)

# House Temperature Version 3.0 Tests

In [None]:
env = ht.house_temp_v3_0() 
agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 8, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                  epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

ddqn_mse_rewards_v3_0, ddqn_mse_actions_v3_0, ddqn_mse_states_v3_0 = testing(env = env, agent = agent, n_episodes = 1000, 
                                                                             file_name = "ddqn_mse_htv3_0")

in_target_v3(ddqn_mse_states_v3_0)

In [None]:
action_space = {0: "All Off", 1: "Heater On", 2: "Cooler On", 3: "Window On", 
                4: "Heater/Cooler On", 5: "Heater/Window On", 6: "Cooler/Window On", 7: "All On"}

plot_ra(max_steps = 288, rewards = ddqn_mse_rewards_v3_0, actions = ddqn_mse_actions_v3_0, action_space = action_space, version = "V3.0")

random_episodes_v2_3(state_list = ddqn_mse_states_v3_0, episodes = 1000, max_steps = 288)

# House Temperature Version 3.1 Tests

In [None]:
env = ht.house_temp_v3_1() 
agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 8, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                  epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

ddqn_mse_rewards_v3_1, ddqn_mse_actions_v3_1, ddqn_mse_states_v3_1 = testing(env = env, agent = agent, n_episodes = 1000, 
                                                                             file_name = "ddqn_mse_htv3_1")

in_target_v3(ddqn_mse_states_v3_1)

In [None]:
action_space = {0: "All Off", 1: "Heater On", 2: "Cooler On", 3: "Window On", 
                4: "Heater/Cooler On", 5: "Heater/Window On", 6: "Cooler/Window On", 7: "All On"}

plot_ra(max_steps = 288, rewards = ddqn_mse_rewards_v3_1, actions = ddqn_mse_actions_v3_1, action_space = action_space, version = "V3.1")

random_episodes_v2_3(state_list = ddqn_mse_states_v3_1, episodes = 1000, max_steps = 288)

# House Temperature Version 4.0 Tests

In [None]:
env = ht.house_temp_v4_0() 
agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                  epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

ddqn_mse_rewards_v4_0, ddqn_mse_actions_v4_0, ddqn_mse_states_v4_0 = testing(env = env, agent = agent, n_episodes = 1000, 
                                                                             file_name = "htv4_0/7_htv4_0")

in_target_v4(ddqn_mse_states_v4_0)

In [None]:
action_space = {0: "All Off", 1: "Heater On", 2: "Cooler On", 3: "Window On", 
                4: "Heater/Cooler On", 5: "Heater/Window On", 6: "Cooler/Window On", 7: "All On"}

plot_ra(max_steps = 288, rewards = ddqn_mse_rewards_v4_0, actions = ddqn_mse_actions_v4_0, action_space = action_space, version = "V4.0")

random_episodes_v4(state_list = ddqn_mse_states_v4_0, episodes = 1000, max_steps = 288)

## 4.0 Switch Values

In [None]:
options = {"switch_value": 12}
env = ht.house_temp_v4_0() 
agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                  epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

sv_12_rewards_v4_0, sv_12_actions_v4_0, sv_12_states_v4_0 = testing(env = env, agent = agent, n_episodes = 1000, 
                                                                    file_name = "htv4_0/7_htv4_0", options = options)

in_target_v4(sv_12_states_v4_0, switch_value = options["switch_value"])

In [None]:
action_space = {0: "All Off", 1: "Heater On", 2: "Cooler On", 3: "Window On", 
                4: "Heater/Cooler On", 5: "Heater/Window On", 6: "Cooler/Window On", 7: "All On"}

plot_ra(max_steps = 288, rewards = sv_12_rewards_v4_0, actions = sv_12_actions_v4_0, action_space = action_space, version = "Switch: 12 | V4.0")

random_episodes_v4(state_list = sv_12_states_v4_0, episodes = 1000, max_steps = 288)

In [None]:
options = {"switch_value": 144}
env = ht.house_temp_v4_0() 
agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                  epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

sv_144_rewards_v4_0, sv_144_actions_v4_0, sv_144_states_v4_0 = testing(env = env, agent = agent, n_episodes = 1000, 
                                                                       file_name = "htv4_0/7_htv4_0", options = options)

in_target_v4(sv_144_states_v4_0, switch_value = options["switch_value"])

In [None]:
action_space = {0: "All Off", 1: "Heater On", 2: "Cooler On", 3: "Window On", 
                4: "Heater/Cooler On", 5: "Heater/Window On", 6: "Cooler/Window On", 7: "All On"}

plot_ra(max_steps = 288, rewards = sv_144_rewards_v4_0, actions = sv_144_actions_v4_0, action_space = action_space, version = "Switch: 144 | V4.0")

random_episodes_v4(state_list = sv_144_states_v4_0, episodes = 1000, max_steps = 288)

## 4.0 Different Planet? 

In [None]:
options = {"seasonal_variation": {"mean": 50.0, "fluctuation": 5.0, "noise": (-3, 3), "temp_min": 40, "temp_max": 60}, "inside_temp": 50.0}
env = ht.house_temp_v4_0() 
agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                  epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

dp_rewards_v4_0, dp_actions_v4_0, dp_states_v4_0 = testing(env = env, agent = agent, n_episodes = 1000, 
                                                           file_name = "htv4_0/7_htv4_0", options = options)

in_target_v4(dp_states_v4_0)

In [None]:
action_space = {0: "All Off", 1: "Heater On", 2: "Cooler On", 3: "Window On", 
                4: "Heater/Cooler On", 5: "Heater/Window On", 6: "Cooler/Window On", 7: "All On"}

plot_ra(max_steps = 288, rewards = dp_rewards_v4_0, actions = dp_actions_v4_0, action_space = action_space, version = "Different Planet | V4.0")

random_episodes_v4(state_list = dp_states_v4_0, episodes = 1000, max_steps = 288)

## 4.0 Temperature Distance

In [None]:
env = ht.house_temp_v4_0() 
options = {"low_change": 0, "high_change": 4}
agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                  epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

td_rewards_v4_0, td_actions_v4_0, td_states_v4_0 = testing(env = env, agent = agent, n_episodes = 1000, 
                                                           file_name = "htv4_0/7_htv4_0", options = options)

in_target_v4(td_states_v4_0)

In [None]:
action_space = {0: "All Off", 1: "Heater On", 2: "Cooler On", 3: "Window On", 
                4: "Heater/Cooler On", 5: "Heater/Window On", 6: "Cooler/Window On", 7: "All On"}

plot_ra(max_steps = 288, rewards = td_rewards_v4_0, actions = td_actions_v4_0, action_space = action_space, version = "Temperature Distance | V4.0")

random_episodes_v4(state_list = td_states_v4_0, episodes = 1000, max_steps = 288)

## 4.0 Switch with Temperature Distance

In [None]:
env = ht.house_temp_v4_0() 
options = {"low_change": 0, "high_change": 2, "switch_value": 8}
agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                  epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

swtd_rewards_v4_0, swtd_actions_v4_0, swtd_states_v4_0 = testing(env = env, agent = agent, n_episodes = 1000, 
                                                                 file_name = "htv4_0/7_htv4_0", options = options)

in_target_v4(swtd_states_v4_0, switch_value = options["switch_value"])

In [None]:
action_space = {0: "All Off", 1: "Heater On", 2: "Cooler On", 3: "Window On", 
                4: "Heater/Cooler On", 5: "Heater/Window On", 6: "Cooler/Window On", 7: "All On"}

plot_ra(max_steps = 288, rewards = swtd_rewards_v4_0, actions = swtd_actions_v4_0, action_space = action_space, version = "V4.1")

random_episodes_v4(state_list = swtd_states_v4_0, episodes = 1000, max_steps = 288)

# House Temperature Version 4.1 Tests

In [None]:
env = ht.house_temp_v4_1() 
agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                  epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

ddqn_mse_rewards_v4_1, ddqn_mse_actions_v4_1, ddqn_mse_states_v4_1 = testing(env = env, agent = agent, n_episodes = 1000, 
                                                                             file_name = "htv4_1/6_htv4_1")

in_target_v4(ddqn_mse_states_v4_1)

In [None]:
action_space = {0: "All Off", 1: "Heater On", 2: "Cooler On", 3: "Window On", 
                4: "Heater/Cooler On", 5: "Heater/Window On", 6: "Cooler/Window On", 7: "All On"}

plot_ra(max_steps = 288, rewards = ddqn_mse_rewards_v4_1, actions = ddqn_mse_actions_v4_1, action_space = action_space, version = "V4.1")

random_episodes_v4(state_list = ddqn_mse_states_v4_1, episodes = 1000, max_steps = 288)

## 4.1 Switch Values

In [None]:
options = {"switch_value": 12}
env = ht.house_temp_v4_1() 
agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                  epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

sv_12_rewards_v4_1, sv_12_actions_v4_1, sv_12_states_v4_1 = testing(env = env, agent = agent, n_episodes = 1000, 
                                                                    file_name = "htv4_1/6_htv4_1", options = options)

in_target_v4(sv_12_states_v4_1, switch_value = options["switch_value"])

In [None]:
action_space = {0: "All Off", 1: "Heater On", 2: "Cooler On", 3: "Window On", 
                4: "Heater/Cooler On", 5: "Heater/Window On", 6: "Cooler/Window On", 7: "All On"}

plot_ra(max_steps = 288, rewards = sv_12_rewards_v4_1, actions = sv_12_actions_v4_1, action_space = action_space, version = "Switch: 12 | V4.1")

random_episodes_v4(state_list = sv_12_states_v4_1, episodes = 1000, max_steps = 288)

In [None]:
options = {"switch_value": 144}
env = ht.house_temp_v4_1() 
agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                  epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

sv_144_rewards_v4_1, sv_144_actions_v4_1, sv_144_states_v4_1 = testing(env = env, agent = agent, n_episodes = 1000, 
                                                                       file_name = "htv4_1/6_htv4_1", options = options)

in_target_v4(sv_144_states_v4_1, switch_value = options["switch_value"])

In [None]:
action_space = {0: "All Off", 1: "Heater On", 2: "Cooler On", 3: "Window On", 
                4: "Heater/Cooler On", 5: "Heater/Window On", 6: "Cooler/Window On", 7: "All On"}

plot_ra(max_steps = 288, rewards = sv_144_rewards_v4_1, actions = sv_144_actions_v4_1, action_space = action_space, version = "Switch: 144 | V4.1")

random_episodes_v4(state_list = sv_144_states_v4_1, episodes = 1000, max_steps = 288)

## 4.1 Different Planet? 

In [None]:
options = {"seasonal_variation": {"mean": 50.0, "fluctuation": 10.0, "noise": (-3, 3), "temp_min": 40, "temp_max": 60}, "inside_temp": 50.0}
env = ht.house_temp_v4_1() 
agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                  epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

dp_rewards_v4_1, dp_actions_v4_1, dp_states_v4_1 = testing(env = env, agent = agent, n_episodes = 1000, 
                                                           file_name = "htv4_1/6_htv4_1", options = options)

in_target_v4(dp_states_v4_1)

In [None]:
action_space = {0: "All Off", 1: "Heater On", 2: "Cooler On", 3: "Window On", 
                4: "Heater/Cooler On", 5: "Heater/Window On", 6: "Cooler/Window On", 7: "All On"}

plot_ra(max_steps = 288, rewards = dp_rewards_v4_1, actions = dp_actions_v4_1, action_space = action_space, version = "Different Planet | V4.1")

random_episodes_v4(state_list = dp_states_v4_1, episodes = 1000, max_steps = 288)

## 4.1 Temperature Distance

In [None]:
env = ht.house_temp_v4_1() 
options = {"low_change": 0, "high_change": 4}
agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                  epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

td_rewards_v4_1, td_actions_v4_1, td_states_v4_1 = testing(env = env, agent = agent, n_episodes = 1000, 
                                                           file_name = "htv4_1/6_htv4_1", options = options)

in_target_v4(td_states_v4_1)

In [None]:
action_space = {0: "All Off", 1: "Heater On", 2: "Cooler On", 3: "Window On", 
                4: "Heater/Cooler On", 5: "Heater/Window On", 6: "Cooler/Window On", 7: "All On"}

plot_ra(max_steps = 288, rewards = td_rewards_v4_1, actions = td_actions_v4_1, action_space = action_space, version = "Max Starting Temperature | V4.1")

random_episodes_v4(state_list = td_states_v4_1, episodes = 1000, max_steps = 288)

## 4.1 Switch with Temperature Distance

In [None]:
env = ht.house_temp_v4_1() 
options = {"low_change": 0, "high_change": 2, "switch_value": 8}
agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                  epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

swtd_rewards_v4_1, swtd_actions_v4_1, swtd_states_v4_1 = testing(env = env, agent = agent, n_episodes = 1000, 
                                                                 file_name = "htv4_1/6_htv4_1", options = options)

in_target_v4(swtd_states_v4_1, switch_value = options["switch_value"])

In [None]:
action_space = {0: "All Off", 1: "Heater On", 2: "Cooler On", 3: "Window On", 
                4: "Heater/Cooler On", 5: "Heater/Window On", 6: "Cooler/Window On", 7: "All On"}

plot_ra(max_steps = 288, rewards = swtd_rewards_v4_1, actions = swtd_actions_v4_1, action_space = action_space, version = "V4.1")

random_episodes_v4(state_list = swtd_states_v4_1, episodes = 1000, max_steps = 288)

# House Temperature Version 4.2 Tests

In [None]:
env = ht.house_temp_v4_2() 
agent = ddqn_agent.ddqn_agent_mse(model = house_temp_model_3, state_dim = 9, action_dim = env.action_space.n, gamma = 0, lr = 0, 
                                  epsilon = 0, epsilon_min = 0, decay_steps = 1, buffer_size = 0, batch_size = 0, device = device)

ddqn_mse_rewards_v4_2, ddqn_mse_actions_v4_2, ddqn_mse_states_v4_2 = testing(env = env, agent = agent, n_episodes = 1000, 
                                                                             file_name = "trial_runs/ddqn_mse_htv4_2")

in_target_v4(ddqn_mse_states_v4_2)

In [None]:
action_space = {0: "All Off", 1: "Heater On", 2: "Cooler On", 3: "Window On", 
                4: "Heater/Cooler On", 5: "Heater/Window On", 6: "Cooler/Window On", 7: "All On"}

plot_ra(max_steps = 288, rewards = ddqn_mse_rewards_v4_2, actions = ddqn_mse_actions_v4_2, action_space = action_space, version = "V4.2")

random_episodes_v4(state_list = ddqn_mse_states_v4_2, episodes = 1000, max_steps = 288)