# Import dependencies

In [1]:
import gymnasium as gym
from gymnasium import Env
from gymnasium.spaces import Discrete, Box

import numpy as np
import random
import os

from stable_baselines3 import DQN,A2C,PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

2024-05-01 19:23:44.533725: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Objectives:

The objective of the project is to design, train and evaluate a framework for an agent in a low-energy environment. It seeks to efficiently manage the heat and air in the room to reduce energy consumption and improve occupant comfort. 

Leveraging reinforcement learning, the goal is to train agents to dynamically adjust controls based on factors such as occupancy, solar radiation, energy availability, and so on. 

Ultimately, the project aims to help achieve optimal energy without compromising comfort, as likewise sustainable building design.

# The Environment

This environment is designed to be able to simulate the house of a consumer/user and train an agent specific to all the rooms in the house to utilize energy as efficiently as possible.

In [2]:
import numpy as np
import random
import gym
import gym.spaces as spaces

class EnergySavingEnv(gym.Env):
    def __init__(self, num_bedrooms=3, num_people_per_bedroom=[1,2,1], num_people_in_living_room=2):
        self.num_bedrooms = num_bedrooms
        self.num_people_per_bedroom = num_people_per_bedroom if num_people_per_bedroom is not None else [2] * num_bedrooms
        self.num_people_in_living_room = num_people_in_living_room

        # Define the action space
        self.action_space = spaces.Discrete(28)  # Total number of actions: 7 temperature adjustments * 4 airflow adjustments
        
        # Define the observation space with normalized ranges
        num_rooms = num_bedrooms + 1  # Include living room as well
        self.observation_space = spaces.Box(low=-1, high=1, shape=(num_rooms+1,1), dtype=np.float32)

        # Define the initial temperature (random between 24 and 26 degrees Celsius) for each room
        self.initial_temperatures = np.random.uniform(24, 26, size=(num_rooms,))

        # Set the current temperature to the initial temperature
        self.temperatures = self.initial_temperatures.copy()

        # Initialize energy level for the whole house
        self.energy_level = np.array([120])

        # Define the ideal temperature for each room
        self.ideal_temperatures = [22] * num_rooms  # Assuming ideal temperature of 22 for all rooms

        # Define the resource manager properties
        self.energy_capacity = 250
        self.energy_threshold = 50  # Energy threshold for energy-saving mode
        self.energy_usage_factor = 2  # Factor to control energy usage for temperature adjustment
        self.base_airflow = 0  # Base airflow without energy consumption
        self.max_airflow = 3  # Maximum airflow that can be achieved with energy consumption
        self.airflow_energy_consumption = 0.5  # Energy consumption per unit increase in airflow

        # Define the number of time steps for the resource manager
        self.time_steps = 100

        # Initialize the current time step
        self.current_step = 0

        self.cleaning_energy_consumption = 1  # Energy consumed during home cleaning

        # Define the frequency of home cleaning (once a day/episode)
        self.cleaning_frequency = 1

        # Add solar energy parameters
        self.solar_panel_efficiency = 0.2  # Efficiency of solar panels
        self.solar_panel_area = 100  # Area covered by solar panels in square meters

        # Define outside electricity cost
        self.base_electricity_cost = 1  # Base cost per unit of electricity

        # Initialize solar radiation parameters
        self.solar_radiation_variation = 100  # Maximum variation in solar radiation
        self.solar_radiation_mean = 700  # Mean solar radiation during the day
        self.randomness_factor = 0.1  # Factor to control randomness

        # Define the maximum number of people in each room
        self.max_people_per_room = 3
        self.num_people_in_rooms = self.num_people_per_bedroom.copy() + [0] * (num_bedrooms + 1)  # Initialize with specified people in bedrooms
        self.num_people_in_rooms[-1] = num_people_in_living_room  # Set the number of people in the living room

        # Define the frequency of adding/removing people (every 20 time steps)
        self.add_remove_frequency = 25

        # Exploration parameters
        self.epsilon = 0.1 # Exploration rate
        self.min_epsilon = 0.01  # Minimum exploration rate
        self.epsilon_decay = 0.99  # Exploration decay rate

    def get_solar_radiation(self, time_of_day):
        # Simulate time-dependent solar radiation
        # For simplicity, we'll assume a pattern with variations based on time of day

        # Define solar radiation ranges for different times of the day
        if time_of_day < 6:  # Night
            solar_radiation = np.random.uniform(0, 100)
        elif time_of_day < 9:  # Early morning
            solar_radiation = np.random.uniform(500, 700)
        elif time_of_day < 15:  # Afternoon
            solar_radiation = np.random.uniform(700, 1000)
        elif time_of_day < 19:  # Evening
            solar_radiation = np.random.uniform(500, 700)
        else:  # Night
            solar_radiation = 0

        # Introduce randomness with occasional complete randomness
        if random.random() < self.randomness_factor:
            solar_radiation = np.random.uniform(self.solar_radiation_mean - self.solar_radiation_variation,
                                                 self.solar_radiation_mean + self.solar_radiation_variation)

        return solar_radiation

    def step(self, action):
        temperature_action = action // 4 + 1  # Temperature adjustment
        airflow_action = action % 4  # Airflow adjustment

        # Determine the temperature change based on the action
        temp_changes = np.zeros_like(self.temperatures)
        temp_change = 0
        if temperature_action == 1:  # Decrease temperature by 1 degree
            temp_change = -1
        elif temperature_action == 2:  # Keep temperature (no change)
            temp_change = 0
        elif temperature_action == 3:  # Increase temperature by 1 degree
            temp_change = 1
        elif temperature_action == 4:  # Decrease temperature by 2 degrees
            temp_change = -2
        elif temperature_action == 5:  # Increase temperature by 2 degrees
            temp_change = 2
        elif temperature_action == 6:  # Decrease temperature by 3 degrees
            temp_change = -3
        elif temperature_action == 7:  # Increase temperature by 3 degrees
            temp_change = 3

        # Apply the temperature change to all rooms
        temp_changes[:self.num_bedrooms] = temp_change

        # Clip temperature within the valid range
        self.temperatures += temp_changes
        self.temperatures = np.clip(self.temperatures, 16, 33)

        # Calculate energy produced by solar panels
        solar_radiation = self.get_solar_radiation(self.current_step / self.time_steps * 24)
        solar_energy = self.solar_panel_area * self.solar_panel_efficiency * solar_radiation

        # Calculate energy usage for temperature adjustment
        energy_usage = np.abs(temp_changes) * self.energy_usage_factor

        # Adjust airflow based on the action and number of people in the room
        airflow = self.base_airflow + airflow_action * (self.max_airflow / (self.action_space.n // 4 - 1))
        airflow_energy_usage = airflow * self.airflow_energy_consumption  # Initialize airflow energy usage
        
        # Adjust airflow energy consumption based on the number of people in each room
        for room_idx, num_people in enumerate(self.num_people_in_rooms):
            if num_people > 0:
                airflow_energy_usage += self.airflow_energy_consumption * num_people

        # Calculate net energy (energy usage - solar energy)
        self.energy_level = min(self.energy_level + solar_energy, self.energy_capacity)
        self.energy_level -= (np.sum(energy_usage) + airflow_energy_usage)

        # Clip energy level within the valid range
        self.energy_level = np.clip(self.energy_level, 0, self.energy_capacity)

        # Calculate reward based on energy-saving state and temperature
        # temperature_rewards = -0.5 * (np.abs(self.temperatures - self.ideal_temperatures) ** 2)
        eps = 1e-6 
        temperature_rewards = -np.abs(self.temperatures - self.ideal_temperatures) * np.log(np.abs(self.temperatures - self.ideal_temperatures + eps))
        temperature_rewards_sum = 0.25*np.sum(temperature_rewards)

        energy_reward = 1 if self.energy_level >= self.energy_threshold else -1

        rewards = temperature_rewards_sum + energy_reward

        # Check if the episode is done
        done = self.current_step >= self.time_steps

        # Increment the current time step
        self.current_step += 1

        # Add or remove people from the room every add_remove_frequency time steps
        if self.current_step % self.add_remove_frequency == 0:
            for i in range(self.num_bedrooms):
                if self.num_people_in_rooms[i] < self.max_people_per_room:
                    self.num_people_in_rooms[i] += 1
                    break

        # Exploration decay
        self.epsilon = max(self.min_epsilon, self.epsilon * self.epsilon_decay)

        # Choose action with ε-greedy exploration
        if random.random() < self.epsilon:
            action = self.action_space.sample()

        # Additional info can be an empty dictionary
        info = {}

        # Calculate normalized observation
        normalized_observation = np.hstack(((self.temperatures - 16) / 17, (self.energy_level - 0) / self.energy_capacity))
        normalized_observation = normalized_observation.reshape((-1, 1))

        # Return the next state, reward, whether the episode is done, and additional info
        return normalized_observation, rewards, done, info

    def reset(self):
        # Reset the current time step
        self.current_step = 0
        
        # Check if it's time for home cleaning
        if self.current_step % self.cleaning_frequency == 0:
            # Consume energy for home cleaning
            self.energy_level -= self.cleaning_energy_consumption

            # Clip energy level within the valid range
            self.energy_level = max(0, min(self.energy_capacity, self.energy_level))

        # Normalize observation space
        normalized_observation = np.hstack(((self.temperatures - 16) / 17, (self.energy_level - 0) / self.energy_capacity))
        normalized_observation = normalized_observation.reshape((-1, 1))

        # Return the initial state
        return normalized_observation

# Test the Environment

In [3]:
env_energy_saving = EnergySavingEnv()
env_energy_saving.observation_space.sample()

array([[ 0.5925787 ],
       [ 0.34205627],
       [ 0.8047423 ],
       [ 0.93458307],
       [-0.2202779 ]], dtype=float32)

In [4]:
env_energy_saving.reset()

array([[0.55417734],
       [0.48281269],
       [0.50782215],
       [0.58078679],
       [0.476     ]])

In [5]:
episodes = 2
for episode in range(1, episodes+1):
  obs = env_energy_saving.reset()
  done = False
  score = 0

  while not done:
    # env_energy_saving.render()
    action = env_energy_saving.action_space.sample()
    obs, reward, done, info = env_energy_saving.step(action)
    score += reward

  print('Episode: {} Score {}'.format(episode, score))

Episode: 1 Score -401.864256101493
Episode: 2 Score -990.6116630135333


# Train model

We are going to train the model with different agents/models and compare the results obtained.

In [6]:
log_path = os.path.join('Training', 'Logs')

### DQN

In [7]:
energy_saving_model = DQN('MlpPolicy', env_energy_saving, verbose=1, tensorboard_log=log_path)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




In [8]:
energy_saving_model.learn(total_timesteps=250000)

Logging to Training/Logs/DQN_3
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 101      |
|    ep_rew_mean      | -926     |
|    exploration_rate | 0.985    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 5234     |
|    time_elapsed     | 0        |
|    total_timesteps  | 404      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 101      |
|    ep_rew_mean      | -978     |
|    exploration_rate | 0.969    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 5641     |
|    time_elapsed     | 0        |
|    total_timesteps  | 808      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 101      |
|    ep_rew_mean      | -950     |
|    exploration_rate | 0.954    |
| time/               | 

Saving the model:

In [None]:
path = os.path.join('Training', 'Saved Models', f'DQN_250k_env_house')
energy_saving_model.save(path)
del energy_saving_model

In [None]:
energy_model = DQN.load(path, env_energy_saving)

### Evaluation:

In [None]:
mean_reward, std_reward = evaluate_policy(energy_model, env_energy_saving, n_eval_episodes=10, render=True)

print(f"Mean reward:{mean_reward:.2f} +/- {std_reward:.2f}")

In [None]:
episodes = 5

for episode in range(1, episodes+1):
    obs = env_energy_saving.reset() 
    done = False
    score = 0

    while not done:
        # obs = obs.reshape((1, -1))
        action, _ = energy_model.predict(obs)  
        obs, reward, done, info = env_energy_saving.step(action)
        score += reward

    print('Episode: {} Score {}'.format(episode, score))

### Action-Critic Model

In [None]:
energy_saving_model = A2C('MlpPolicy', env_energy_saving, verbose=1, tensorboard_log=log_path)

In [None]:
energy_saving_model.learn(total_timesteps=250000)

In [None]:
path = os.path.join('Training', 'Saved Models', f'A2C_250k_env_house')
energy_saving_model.save(path)

del energy_saving_model

In [None]:
energy_model = A2C.load(path, env_energy_saving)

#### Evalutaion

In [None]:
mean_reward, std_reward = evaluate_policy(energy_model, env_energy_saving, n_eval_episodes=10, render=True)

print(f"Mean reward:{mean_reward:.2f} +/- {std_reward:.2f}")

In [None]:
episodes = 5

for episode in range(1, episodes+1):
    obs = env_energy_saving.reset() 
    done = False
    score = 0

    while not done:
        # obs = obs.reshape((1, -1))
        action, _ = energy_model.predict(obs)  
        obs, reward, done, info = env_energy_saving.step(action)
        score += reward

    print('Episode: {} Score {}'.format(episode, score))

### PPO

In [None]:
energy_saving_model = PPO('MlpPolicy', env_energy_saving, verbose=1, tensorboard_log=log_path)

In [None]:
energy_saving_model.learn(total_timesteps=250000)

In [None]:
path = os.path.join('Training', 'Saved Models', f'PPO_250k_env_house')
energy_saving_model.save(path)

del energy_saving_model

In [None]:
energy_model = PPO.load(path, env_energy_saving)

#### Evalutation:

In [None]:
mean_reward, std_reward = evaluate_policy(energy_model, env_energy_saving, n_eval_episodes=10, render=True)

print(f"Mean reward:{mean_reward:.2f} +/- {std_reward:.2f}")

In [None]:
episodes = 5

for episode in range(1, episodes+1):
    obs = env_energy_saving.reset() 
    done = False
    score = 0

    while not done:
        # obs = obs.reshape((1, -1))
        action, _ = energy_model.predict(obs)  
        obs, reward, done, info = env_energy_saving.step(action)
        score += reward

    print('Episode: {} Score {}'.format(episode, score))