## Decentralized Multi-Agent Reinforcement Learning Approach to Reduce the Electricity Consumption and Billing on the Demand Side
### Dissertation Project Code
#### 2556613S
#### Msc in Data Science

In [None]:
# Import area
import os
import torch
import gym
import pandas as pd
import re
import itertools
import datetime
import numpy as np
import math
from sklearn.preprocessing import OrdinalEncoder
import json
import itertools 
import random


import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
from itertools import count
from collections import namedtuple, deque

import matplotlib.pyplot as plt
import seaborn as sns

import copy
from sklearn import preprocessing 
import pickle
import json


# From the local copy to solve error
# AttributeError: Can't pickle local object 'S.<locals>.f'
#  when using pickle to save the object
import fuzzylogic_local.classes
import fuzzylogic_local.functions

import warnings

### Stand alone functions

In [None]:
# Seeds
def call_seeds(seed_:int):
    """Set the seed for the random functions

    Args:
        seed_ (int): Seed to be used
    """
    np.random.seed(seed_)
    random.seed(seed_)
    torch.manual_seed(seed_)
    torch.cuda.manual_seed(seed_)
    os.environ['PYTHONHASHSEED'] = str(seed_)
    torch.cuda.manual_seed_all(seed_) 
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

# General seed for the project
SEED = 1993

# Call set the seeds
call_seeds(SEED)

In [None]:
def get_encode_temp(temp:float):
    """Return the encoding of a given temperature by assuming its representation 
    as a position in a fixed list, the values are gotten using the fuzzy definition. 

    Args:
        temp (float): Temperature to be converted

    Returns:
        int: Encoding of the given temperature
    """

    # Define the Fuzzy variable with its states
    temperature = fuzzylogic_local.classes.Domain("temperature", -15, 70)
    temperature.cold = fuzzylogic_local.functions.S(-15, 20)
    temperature.medium = fuzzylogic_local.functions.triangular(15,30, c=23)
    temperature.hot = fuzzylogic_local.functions.R(25,70)
    # Set as a list
    list_temp = [temperature.cold(temp), temperature.medium(temp), temperature.hot(temp)]
    # Get the max therefore the one where the given temperature belongs more
    return np.argmax(list_temp)


def get_encode_wind(wind_temp:float):
    """Returns the encoding of a given wind speed by assuming its representation
    as a position in a fixed list, the values are gotten using the fuzzy definition.

    Args:
        wind_temp (float): Wind speed to be converted

    Returns:
        int: Encoding of the given wind speed
    """

    # Define the Fuzzy variable with its states
    wind = fuzzylogic_local.classes.Domain("wind", 0, 100)
    wind.light = fuzzylogic_local.functions.S(0,24)
    wind.moderate = fuzzylogic_local.functions.triangular(13,30,c=19)
    wind.strong = fuzzylogic_local.functions.R(24,100)
    # Set as a list
    list_temp = [wind.light(wind_temp), wind.moderate(wind_temp), wind.strong(wind_temp)]
    # Get the max therefore the one where the given wind speed belongs more
    return np.argmax(list_temp)


In [None]:
# Function to log a message with datetime
def log_message(message:str):
    """Log a message as a print in console
    adding the day and time of the print

    Args:
        message (str): Message to be printed
    """
    print("["+datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")+"] "+message)

In [None]:
# Code taken from: 
#   Greenstick [internet]. Text Progress Bar in the Console. 2017. [cited 2021 November 20]. Available from: https://stackoverflow.com/a/34325723/2394370
# Print iterations progress.
def printProgressBar (iteration:int, total:int, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█', printEnd = "\r"):
    """Generate a progress var on console

    Args:
        iteration (int): Iteration
        total (int): Number of total iteration
        prefix (str, optional): If a prefix is needed on the string. Defaults to ''.
        suffix (str, optional): If a suffix is needed on the string. Defaults to ''.
        decimals (int, optional): Percantage of completance. Defaults to 1.
        length (int, optional): Length of the progress bar. Defaults to 100.
        fill (str, optional): To fill the string with a character. Defaults to '█'.
        printEnd (str, optional): Character to end a string. Defaults to "\r".
    """
    percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
    filledLength = int(length * iteration // total)
    bar = fill * filledLength + '-' * (length - filledLength)
    print(f'\r{prefix} |{bar}| {percent}% {suffix}', end = printEnd)
    # Print New Line on Complete
    if iteration == total: 
        print()

In [None]:
def create_states(dataframe, list_appliances:list, list_weather:list, list_price_meta=[], is_training=False):
    """Create a set of states given a dataframe

    Args:
        dataframe (DataFrame): Dataframe with the information to create each state. A state per row
        list_appliances (list): List with the name of appliances to consider
        list_weather (list): List with the weather column name
        list_price_meta (list, optional): List with the column name of price information. Defaults to [].
        is_training (bool, optional): Seperate the flow when training and testing. Defaults to False.

    Returns:
        list: List of States 
    """
    # List to save the appliances dictionary states
    list_states = []
    l = len(dataframe)
    # Start the progress bar
    log_message("Starting to create states")
    printProgressBar(0, l, prefix = 'Progress:', suffix = 'Complete', length = 50)

    # Get the average of consumption of appliances
    df_avg = dataframe.groupby(['month','hour']).mean().reset_index()[['month','hour']+list_appliances]
    avg_consumption_ = {}
    for appliance_ in list_appliances:
        for index, row in df_avg.iterrows():
            avg_consumption_[(row['month'], row['hour'], appliance_)] = float(row[appliance_])

    
    # Precalculate the avg consumption per appliance and save it on a dictionary
    for name_a in list_appliances:
        # Get the rows of the appliances
        avg_cnsptn = dataframe.groupby(['month','hour']).mean().reset_index()[['month', 'hour', name_a]]
        avg_consumption_[name_a] = avg_cnsptn

    # Iterate over the dataframe
    count = 0
    
    for i, row in dataframe.iterrows():
        # Save the appliances as dictionary
        dict_state_app = {}
        for appliance in list_appliances:
            # Get the instance of the object
            obj = instance_object_appliance(appliance, row[appliance])
            # If the obj is Intermittent then add the avg consumption
            if isinstance(obj, IntermittentAppliance):
                obj.avg_consumption = avg_consumption_[(row['month'], row['hour'], appliance)]#avg_consumption_[appliance]
            #print(obj)
            dict_state_app[appliance] = obj
        # Save the weather dictionary
        dict_state_weather = {}
        dict_price_metadata = {}
        for weather in list_weather:
            dict_state_weather[weather] = row[weather]
        for price_meta in list_price_meta:
            dict_price_metadata[price_meta] = row[price_meta]
        # If count is greater than zero
        #   then it is needed to check if the new state is continuos with previous
        new_state = State(
                row['month'],
                row['day'],
                row['hour'],
                row['minute'],
                row['Price'],
                dict_state_weather,
                dict_state_app,
                dict_price_metadata = dict_price_metadata
            )
        # Add the created State
        list_states.append(
            new_state
        )
        printProgressBar(count + 1, l, prefix = 'Progress:', suffix = 'Complete', length = 50)
        count += 1
    log_message("Finishing to create states")
    return list_states

In [None]:
# Method to get the object given a column name
def instance_object_appliance(name:str, numeric_value:float):
    """Create an instance of an object given the name with a value

    Args:
        name (str): Name of the column of the appliance
        numeric_value (float): Value to be set

    Returns:
        object: The isntanced object
    """
    if name == 'air1':
        return AirCompressor(name, consumption = numeric_value)
    elif name == 'bathroom1':
        return Room(name, type_room="bathroom", consumption = numeric_value)
    elif name == 'car1':
        return Car(name, consumption = numeric_value)
    elif name == 'clotheswasher1':
        return ClothesWasher(name, consumption = numeric_value)
    elif name == 'dishwasher1': 
        return DishWasher(name, consumption = numeric_value)
    elif name == 'disposal1':
        return GarbageDisposal(name, consumption = numeric_value)
    elif name == 'drye1':
        return Dryer(name, type_dryer="electric", consumption = numeric_value)
    elif name == 'dryg1':
        return Dryer(name, type_dryer="gas", consumption = numeric_value)
    elif name == 'kitchenapp1' or name == 'kitchenapp2':
        return Room(name, type_room="kitchen", consumption = numeric_value)
    elif name == 'microwave1':
        return Microwave(name, consumption = numeric_value)
    elif name == 'oven1':
        return Oven(name, consumption = numeric_value)
    elif name == 'refrigerator1':
        return Refrigerator(name, consumption = numeric_value)
    elif name == 'solar':
        return SolarPV(name, generation = numeric_value)
    elif name == 'livingroom1':
        return Room(name, type_room="livingroom", consumption = numeric_value)
    elif name == 'diningroom1':
        return Room(name, type_room="dinningroom", consumption = numeric_value)
    elif name == 'freezer':
        return Freezer(name, consumption = numeric_value)
    elif name == 'water_heater':
        return WaterHeater(name, consumption = numeric_value)
    elif name == 'grid':
        return Grid(name, consumption = numeric_value)
    else:
        print("ERROR: None Found, check instance_object_appliance method. Param: ",name)
        return None

In [None]:
# Encode columns
def get_encode_columns(dataframe, list_columns:list):
    """Get the encode of a given columns

    Args:
        dataframe (DataFrame): Dataframe to get the information to create the encoder
        list_columns (list): Columns from which get the encoder

    Returns:
        tuple: The object encoder and the altered dataframe
    """
    dictionary_encoders = {}
    transformed_dataframe = pd.DataFrame()
    for column_ in list_columns:
        # Create encoder
        current_encoder = preprocessing.LabelEncoder()
        current_encoder.fit(dataframe[column_].tolist())
        # Transform input
        transform_column = current_encoder.transform(dataframe[column_].tolist())
        # Add column to the new dataframe
        dataframe[column_] = transform_column
        # Add the enconder to the dictionary
        dictionary_encoders[column_] = current_encoder
    
    return dictionary_encoders, dataframe

In [None]:
def get_type_of_date(date_):
    """Get when a day number is weekend or weekday

    Args:
        date_ (int): The day

    Returns:
        str: The type of the day
    """
    # If greater than 4 = weekend
    if date_.dayofweek > 4:
        return "Weekends"
    else:
        return "Weekdays"

def create_pivot_df(start_date_=datetime.date(2018, 1, 1), end_date_=datetime.date(2019, 1, 1)):
    """Generate a dataframe with continuous date every 15 minutes to serve
    as a pivot and complete the missing data

    Args:
        start_date_ (datetime, optional): Init datetime. Defaults to datetime.date(2018, 1, 1).
        end_date_ (datetime, optional): End datetime. Defaults to datetime.date(2019, 1, 1).

    Returns:
        DataFrame: Resulting dataframe
    """
    # Get a coy of the variables
    start_date = start_date_
    end_date = end_date_
    # Create a range using the start date, and the with the end date it is needed to substract 30 mins
    #   to get the end date that is needed
    list_dates = pd.date_range(start_date,end_date - datetime.timedelta(minutes=30), freq='15min')[:-1]
    # Create the dataframe
    pivot_df = pd.DataFrame({"datetime_pivot": list_dates})
    # Set the datetime columns as numeric ones
    pivot_df['year_pivot'] = pivot_df['datetime_pivot'].dt.year
    pivot_df['month_pivot'] = pivot_df['datetime_pivot'].dt.month
    pivot_df['day_pivot'] = pivot_df['datetime_pivot'].dt.day
    pivot_df['hour_pivot'] = pivot_df['datetime_pivot'].dt.hour
    pivot_df['minute_pivot'] = pivot_df['datetime_pivot'].dt.minute
    return pivot_df

def load_price_file(path_price):
    """Load the price file as DataFrame

    Args:
        path_price (str): Path to the csv file

    Returns:
        DataFrame: Loaded dataframe with price information
    """
    # Read the file of prices
    df_prices = pd.read_csv(path_price)
    # Months per season
    moths_summer = [6, 7, 8, 9]
    moths_no_summer = [1, 2, 3, 4, 5, 10, 11, 12]

    # Date range - This should be a variable part of the function signature
    start_date = datetime.date(2018, 1, 1)
    end_date = datetime.date(2019, 1, 1)
    # Generate list of dates
    list_dates = pd.date_range(start_date,end_date, freq='h')[:-1] #pd.date_range(start_date,end_date - datetime.timedelta(days=1), freq='h')
    # Expand the months in list - every date should be 24, that represents the hours
    expand_months_summer = list(itertools.chain(*[[m] for m in list_dates]))
    # Create a dataframe with the expanded list
    months_summer_df = pd.DataFrame({"date": expand_months_summer})
    months_summer_df['month'] = months_summer_df['date'].dt.month
    months_summer_df['day'] = months_summer_df['date'].dt.day
    months_summer_df['hour'] = months_summer_df['date'].dt.hour
    #print(months_summer_df.head(5))

    # Filter the summer data
    df_prices_summer_ = df_prices[df_prices["Season"] == "Summer"]
    #print("price summer", len(df_prices_summer_))
    months_summer_df = months_summer_df[months_summer_df['month'].isin(moths_summer)]
    #print("summer", len(months_summer_df)) # 24* (30+31+31+30) = 2928
    # Add the season column    
    months_summer_df["type"] = "Summer"
    months_summer_df["daytype"] = months_summer_df.apply(lambda x: get_type_of_date(x['date']), axis=1)
    # Merge to expand
    df_price_summer = pd.merge(
        df_prices_summer_, 
        months_summer_df,  
        how='right', 
        left_on=['Season', 'DayType', 'Hour'], 
        right_on = ['type', 'daytype', 'hour']
    )

    # Expand the months in list
    # expand_months_no_summer = list(itertools.chain(*[[m]*24 for m in moths_no_summer]))
    expand_months_no_summer = list(itertools.chain(*[[m] for m in list_dates]))
    # Create a dataframe with the expanded list
    # Create a dataframe with the expanded list
    months_no_summer_df = pd.DataFrame({"date": expand_months_no_summer})
    months_no_summer_df['month'] = months_no_summer_df['date'].dt.month
    months_no_summer_df['day'] = months_no_summer_df['date'].dt.day
    months_no_summer_df['hour'] = months_no_summer_df['date'].dt.hour

    # Filter the non-summer data
    df_prices_no_summer_ = df_prices[df_prices["Season"] == "Non-Summer"]
    months_no_summer_df = months_no_summer_df[months_no_summer_df['month'].isin(moths_no_summer)]
    # Add the season column
    months_no_summer_df["type"] = "Non-Summer"
    months_no_summer_df["daytype"] = months_no_summer_df.apply(lambda x: get_type_of_date(x['date']), axis=1)
    # Merge to expand
    df_price_no_summer = pd.merge(
        df_prices_no_summer_, 
        months_no_summer_df,  
        how='right', 
        left_on=['Season', 'DayType', 'Hour'], 
        right_on = ['type', 'daytype', 'hour']
    )

    # Merge both dataframes
    df_prices = df_price_summer.append(df_price_no_summer)
    return df_prices

In [None]:
def init_loader(
    path_data:str, 
    path_appliances:str, 
    path_metadata:str, 
    path_prices:str,
    path_weather:str,
    building_ids:list, 
    city:str,
    months_to_load:list,
    is_training = False,
    n_rows_episode = 30,
    encode_columns_price = ['Type', 'DayType', 'Season'],
    encode_columns_weather = ['weather'],
    minute_weather_filter = 51, # 51 was for Texas
    copy_env=None
):
    """Load the information from a CSV to create buildings and their information

    Args:
        path_data (str): Path to the main data (the Pecan Street Dataset)
        path_appliances (str): Path to a csv containing the information of the appliance to use
        path_metadata (str): Path to the building metadata
        path_prices (str): Path to the CSV with the price information
        path_weather (str): Path to the CSV with the weather information
        building_ids (list): List of the building IDs
        city (str): Name of the city to be used
        months_to_load (list): List of the months as int to be used
        is_training (bool, optional): Control the flow of training and test. Defaults to False.
        n_rows_episode (int, optional): Count of the rows to be used. Defaults to 30.
        encode_columns_price (list, optional): List of the columns to encode from price data. Defaults to ['Type', 'DayType', 'Season'].
        encode_columns_weather (list, optional): List of the columns to encode from weather data. Defaults to ['weather'].
        minute_weather_filter (int, optional): To filter the data to avoid more than observation every 15 minutes. Defaults to 51.

    Returns:
        tuple: Set of list with the information of buldings, possible actions and others
    """
    # The representation of buldings is as json objects
    buildings, observation_spaces, action_spaces = {},[],[]
    # Read the files of the building in the city
    metadata = pd.read_csv(path_metadata)
    # Remove the first row
    metadata = metadata.iloc[1:]
    metadata = metadata[metadata['dataid'].isin(building_ids)]
    # Load price dataframe
    prices_data = load_price_file(path_prices)
    # Encode the columns price
    encoder_used_price, prices_data = get_encode_columns(prices_data, encode_columns_price)
    # List of columns on weather dataframe
    list_weather_columns = ['temperature[C]', 'weather', 'wind[mph]', 'humidity[%]', 'barameter[mbar]', 'visibility[km]']
    # List columns on price dataframe
    list_price_columns = ['Price', 'Type', 'DayType', 'Season']
    # Get pivot of date
    dataframe_date_pivot = create_pivot_df()

    # First read the files
    df_city_general = pd.read_csv(path_data)
    # Copy values when nan of dryers (gas to electric)
    df_city_general.drye1.fillna(df_city_general.dryg1, inplace=True)
    # Read appliances
    df_appliances = pd.read_csv(path_appliances)
    # Get the list of appliances to use
    list_appliances = list(df_appliances[city])
    # Clean NaNs
    list_appliances = [ap for ap in list_appliances if str(ap) != 'nan']
    # Filter the columns
    df_city_general_tmp = df_city_general[['furnace1', 'furnace2', 'car1', 'car2', 'air1', 'air2', 'air3']].copy()
    df_city_general = df_city_general[["dataid", "local_15min"]+list_appliances]
    # If there is grid
    if 'grid' in list_appliances:
        temp_lst_app = list_appliances.copy()
        temp_lst_app.remove("grid")
        # We need to get the real value of the rest of appliances
        df_city_general['grid']  = df_city_general['grid'] - df_city_general[temp_lst_app].sum(axis=1)
        # And make sure there is no negative numbers
        df_city_general['grid'] = df_city_general['grid'].apply(lambda x: x if x > 0 else 0)
        # Remove the tmp
        del df_city_general_tmp
    # Read weather file
    df_weather = pd.read_csv(path_weather)
    df_weather = df_weather[df_weather['minute'] == minute_weather_filter]
    # Fill nans
    df_weather[['temperature[C]', 'wind[mph]']] = df_weather[['temperature[C]', 'wind[mph]']].fillna(value=df_weather[['temperature[C]', 'wind[mph]']].mean())
    # Encode the columns weather
    encoder_used_weather, df_weather = get_encode_columns(df_weather, encode_columns_weather)
    # For each building get the important metadata for this project
    for index, row in metadata.iterrows():
        building = None
        #if copy_env is None:
        building_data = {}
        building_data["id_building"] = row["dataid"]
        building_metadata = {}
        building_metadata['building_type'] = row['building_type']
        building_metadata['city'] = row['city']
        building_metadata['state'] = row['state']
        building_metadata['egauge_1min_data_availability'] = row['egauge_1min_data_availability']
        building_metadata['egauge_1s_data_availability'] = row['egauge_1s_data_availability']
        building_metadata['water_data_availability'] = row['water_data_availability']
        building_metadata['gas_data_availability'] = row['gas_data_availability']
        building_metadata['indoor_temp_min_time'] = row['indoor_temp_min_time']
        building_metadata['indoor_temp_max_time'] = row['indoor_temp_max_time']
        building_metadata['indoor_temp_data_availability'] = row['indoor_temp_data_availability']
        building_metadata['house_construction_year'] = row['house_construction_year']
        building_metadata['total_square_footage'] = row['total_square_footage']
        building_metadata['first_floor_square_footage'] = row['first_floor_square_footage']
        building_metadata['second_floor_square_footage'] = row['second_floor_square_footage']
        building_metadata['third_floor_square_footage'] = row['third_floor_square_footage']
        building_metadata['pv'] = row['pv']
        building_metadata['pv_panel_direction'] = row['pv_panel_direction']
        building_metadata['total_amount_of_pv'] = row['total_amount_of_pv']
        building_metadata['amount_of_south_facing_pv'] = row['amount_of_south_facing_pv']
        building_metadata['amount_of_west_facing_pv'] = row['amount_of_west_facing_pv']
        building_metadata['amount_of_east_facing_pv'] = row['amount_of_east_facing_pv']
        # Create an id in the dictionaryy and add metadata
        building_data["metadata"] = building_data
        # Add the basic information
        building_data["type_building"] = "house"
        building_data["id_building"] = row["dataid"]
        # Create object for appliances
        building_data['air_compressor'] = AirCompressor()
        building_data['bathroom'] = Room(type_room="bathroom")
        building_data['car'] = Car()
        building_data['washer'] = ClothesWasher()
        building_data['dish_washer'] = DishWasher()
        building_data['garbage_disposal'] = GarbageDisposal()
        building_data['dryer'] = Dryer(type_dryer="electric")
        building_data['kitchen'] = Room(type_room="kitchen")
        building_data['microwave'] = Microwave()
        building_data['oven'] = Oven()
        building_data['refrigerator'] = Refrigerator()
        building_data['solar_pv'] = SolarPV()
        building_data['livingroom'] = Room(type_room="livingroom")
        building_data['dinningroom'] = Room(type_room="dinningroom")
        building_data['freezer'] = Freezer()
        building_data['water_heater'] = WaterHeater()
        building_data['list_appliances'] = list_appliances
    
        # Initialize the bulding object
        building = Building(**building_data)
        #else:
            #copy_env.buildings
            #building = copy.deepcopy(copy_env.buildings[row["dataid"]])
        if copy_env is not None:
            building.simulation = {}
            building.state_mins = copy_env.buildings[row["dataid"]].state_mins
            building.state_maxs = copy_env.buildings[row["dataid"]].state_maxs
            #log_message("COPIED STATE "+str(row['dataid']))
 
        
        # Load data into the simulation property
        # NOTE: The data per appliance should be in an array form 
        #       and each appliance has its own array
        # Filter rows by building id
        df_city = df_city_general[df_city_general['dataid'] == int(row["dataid"])].copy(deep=True)

        # Add the time and date attributes in the simulation 
        #   (month, day, hour, minute)
        df_city['dateTime'] = pd.to_datetime(df_city['local_15min'], utc=True).dt.tz_convert('US/Central')
        df_city['year'] = pd.DatetimeIndex(df_city['dateTime']).year
        df_city['month'] = pd.DatetimeIndex(df_city['dateTime']).month
        df_city['day'] = pd.DatetimeIndex(df_city['dateTime']).day
        df_city['hour'] = pd.DatetimeIndex(df_city['dateTime']).hour
        df_city['minute'] = pd.DatetimeIndex(df_city['dateTime']).minute

        # Use the pivot dataframe to ensure there is no step lost
        #   when there is missed data, add the avg consumption 
        df_city = pd.merge(
            dataframe_date_pivot,
            df_city,
            how='left',
            left_on=['year_pivot', 'month_pivot', 'day_pivot', 'hour_pivot', 'minute_pivot'],
            right_on=['year', 'month', 'day', 'hour', 'minute']
        )

        # Fill NaN approach based from:
        #   jpp [internet]. Pandas: filling missing values by mean in each group. 2021. [cited 2021 November 20]. Available from: https://stackoverflow.com/a/53339320/2394370
        df_city[list_appliances] = df_city[list_appliances].fillna(
            df_city.groupby(['month_pivot', 'hour_pivot'])[list_appliances].transform('mean')
        )

        # Select the columns
        df_city = df_city[["dataid", "datetime_pivot"] + ['year_pivot', 'month_pivot', 'day_pivot', 'hour_pivot', 'minute_pivot'] + list_appliances]
        # Rename the columns
        df_city.columns = ['dataid', 'local_15min', 'year', 'month', 'day', 'hour', 'minute']+ list_appliances

        # Filter those months that were specified
        df_city = df_city[df_city['month'].isin(months_to_load)]

        # Join with the city dataframe and weather
        df_city = pd.merge(
            df_city, 
            df_weather,  
            how='left', 
            left_on=['year', 'month', 'day', 'hour'], 
            right_on=['year', 'month', 'day', 'hour']
        )
        # Replace with mean
        df_city[list_weather_columns] = df_city[list_weather_columns].fillna(
            df_city.groupby(['month', 'hour'])[list_weather_columns].transform('mean')
        )
        # Select columns
        df_city = df_city[["dataid", "local_15min"] + ['year', 'month', 'day', 'hour', 'minute_x'] + list_appliances + list_weather_columns]
        # Rename columns
        df_city.columns = ['dataid', 'local_15min', 'year', 'month', 'day', 'hour', 'minute']+ list_appliances + list_weather_columns

        # Join with electricity prices
        df_city = pd.merge(
            df_city, 
            prices_data,  
            how='left', 
            left_on=['month', 'day', 'hour'], 
            right_on = ['month', 'day', 'Hour']
        )
        df_city = df_city[["dataid", "local_15min"] + ['year', 'month', 'day', 'hour_x', 'minute'] + list_appliances+list_weather_columns + list_price_columns]
        df_city.columns = ["dataid", "local_15min"] + ['year', 'month', 'day', 'hour', 'minute'] + list_appliances+list_weather_columns + list_price_columns

        # Clean braketes
        list_new_clean_columns = []
        for e in df_city.columns:
            list_new_clean_columns.append(re.sub("[\[].*?[\]]", "", e))
        df_city.columns = list_new_clean_columns

        if is_training == False:
            # Sort by local_15min
            df_city.sort_values(by=['local_15min'], inplace=True)
        # Set columns to positive values on the consumption ones
        appl_tmp = list_appliances.copy()
        # Remove solar from the tmp
        if 'solar' in appl_tmp:
            appl_tmp.remove('solar')
        # Set columns to positive ones
        df_city[appl_tmp] = df_city[appl_tmp].abs()
        # ----------------- END LOADING DATAFRAME -----------------
        
        # Load the weather (temperature, humidity, solar radiation [direct and diffuse])
        list_weather = []
        for weather_column in list_weather_columns:
            # Remove values between brackets
            weather_col_ = re.sub("[\[].*?[\]]", "", weather_column)
            building.simulation[weather_col_] = list(df_city[weather_col_])
            list_weather.append(weather_col_)
        
        # Add the datetime information
        building.simulation['month'] = list(df_city['month'])
        building.simulation['day'] = list(df_city['day'])
        building.simulation['hour'] = list(df_city['hour'])
        building.simulation['minute'] = list(df_city['minute'])

        # Load the consumption per appliance
        for appliance in list_appliances:
            building.simulation[appliance] = list(df_city[appliance])

        # Load the prices files
        building.simulation['price'] = list(df_city['Price'])
        building.simulation['type'] = list(df_city['Type'])
        building.simulation['day_type'] = list(df_city['DayType'])
        building.simulation['season'] = list(df_city['Season'])

        list_price_metadata = ['DayType', 'Type', 'Season']
        
         # Remove local_15min so there is no date column
        df_city_columns = list(df_city.columns)
        df_city_columns.remove('local_15min')
        df_city =  df_city[df_city_columns]
        
        # Get the max and mins of each column to conform the state box
        if copy_env is None:
            building.state_mins = df_city.max().to_frame().T
            building.state_maxs = df_city.max().to_frame().T
        
        # Chek if training to add N more ros
        if is_training:
            # Get the difference of rows needed
            needed_rows = n_rows_episode - len(df_city)
            # If truly need more rows
            if needed_rows > 0:
                # Random sample
                df_add = df_city.sample(n=needed_rows, replace=True)
                # Add at the end of the original dataframe
                df_city = df_city.append(df_add, ignore_index = True)
            # Shuffle the dataframe
            #df_city = df_city.sample(frac=1).reset_index(drop=True)

            # Ensuring correct distrubution
            sample_on = df_city[df_city['Type'] == 1].copy(deep=True)
            sample_off = df_city[df_city['Type'] == 0].copy(deep=True)
            sample_on = sample_on.sample(n=int(0.1*n_rows_episode), replace=True)
            sample_off = sample_off.sample(n=int(0.9*n_rows_episode), replace=True)

            df_city = sample_on.append(sample_off)

            # Shuffle the dataframe
            df_city = df_city.sample(frac=1).reset_index(drop=True)
                 
        # Get max and min values of each appliance
        state_low = building.get_mins()
        state_high = building.get_maxs()

        # Actions 
        action_low, action_high = [], []
        # Create a list of states
        log_message("Working with: "+str(building.id_building))
        #print(len(building.get_list_appliances()), building.get_list_appliances())
        building.list_states = create_states(df_city, building.get_list_appliances(), list_weather, list_price_meta=list_price_metadata,is_training=is_training)

        # For each list_states in buidling set the intermittent applianaces
        build_intermittent_app = building.get_intermittedent_appliances()
        for state_ in building.list_states:
            state_.intermittent_appliances = build_intermittent_app
        # From the first state get those appliances that can be altered and statics ones
        appliances_dictionary_state = building.list_states[0].dictionary_of_appliances
        list_intermittent_appliances = []
        for appliance_key, appliance_object in appliances_dictionary_state.items():
            if isinstance(appliance_object, IntermittentAppliance):
                list_intermittent_appliances.append(appliance_object)
        
        # For the non-static add a =1 as min and 1 as max in the list 
        for appl_inter in list_intermittent_appliances:
            action_low.append(0.0)
            action_high.append(1.0)
        
        # Add the information of the state and actions if there is a 
        #   enviroment to copy from
        if copy_env is None:
            building.set_state_space(np.array(state_high), np.array(state_low))
            building.set_action_space(np.array(action_high), np.array(action_low))
        
        # Add information to the corresponding list
        observation_spaces.append(building.observation_space)
        action_spaces.append(building.action_space)
        building.init_vars()

        buildings[row["dataid"]] = building
        # Delete the dataframe from memory
        del df_city     

    return buildings, observation_spaces, action_spaces, None, None, list_appliances, encoder_used_price  

### Classes 

In [None]:
class Human:
    """Class representing the simulated human behaviour 
    """

    def __init__(self, age:int):
        """Class contructor

        Args:
            age (int): Age to be considered in the simulated behaviour
        """
        self.age = age
        self.generation, self.weights = self.get_generation()

    def get_generation(self):
        """Return a tuple containing the generation (not used) and the
        weights for the probability of accepting or rejecting the suggestion

        Returns:
            tuple: Generation and weights for probability
        """
        # [1.0, 0.0]
        # Define the probabilities and generation as text
        if self.age >= 25 and self.age <=40:
            return 'millenial', [0.9, 0.1]
        elif self.age >=41 and self.age <=56:
            return 'genX', [0.7, 0.3]
        elif self.age >=57 and self.age <=75:
            return 'Boomer', [0.3, 0.7]
        elif self.age >=76:
            return 'PrevBoom', [0.1, 0.9]
        else:
            return [0.0,0.0]

    def evaluate_fuzzy(self, tp:float, w:float, tm:float, plot=False):
        """Get the evaluation of the fuzzy system. 
        If plot is true, then it will create the graphs of the fuzzy variables.

        Args:
            tp (float): Temperature to be considered
            w (float): Wind speed to be considered
            tm (float): Hour of the day to be considered
            plot (bool, optional): If True, plots the variables. Defaults to False.

        Returns:
            float: Level of acceptance of the simulated human
        """
        # Define a fuzzy system
        temperature = fuzzylogic_local.classes.Domain("temperature", -15, 70)
        wind = fuzzylogic_local.classes.Domain("wind", 0, 100)
        time = fuzzylogic_local.classes.Domain("time", 0, 23)
        acceptance =fuzzylogic_local.classes. Domain("acceptance", 0, 100)

        # Define states for temperature
        temperature.cold = fuzzylogic_local.functions.S(-15, 20)
        temperature.medium = fuzzylogic_local.functions.triangular(15,30, c=23)
        temperature.hot = fuzzylogic_local.functions.R(25,70)

        # Define states for wind speed
        wind.light = fuzzylogic_local.functions.S(0,24)
        wind.moderate = fuzzylogic_local.functions.triangular(13,30,c=19)
        wind.strong = fuzzylogic_local.functions.R(24,100)

        # Define states for time
        time.early_morning = fuzzylogic_local.functions.S(6,7)
        time.morning = fuzzylogic_local.functions.trapezoid(6,7,12,13)
        time.afternoon = fuzzylogic_local.functions.trapezoid(12,13,18,19)
        time.night = fuzzylogic_local.functions.R(18,19)

        # Define states for acceptance
        acceptance.low = fuzzylogic_local.functions.S(35,50)
        acceptance.high = ~acceptance.low

        # Define fuzzy rules
        rules = fuzzylogic_local.classes.Rule({
                (temperature.cold, wind.light, time.early_morning): acceptance.high,
                (temperature.cold, wind.moderate, time.early_morning): acceptance.high,
                (temperature.cold, wind.strong, time.early_morning): acceptance.low,

                (temperature.medium, wind.light, time.early_morning): acceptance.high,
                (temperature.medium, wind.moderate, time.early_morning): acceptance.high,
                (temperature.medium, wind.strong, time.early_morning): acceptance.low,

                (temperature.hot, wind.light, time.early_morning): acceptance.high,
                (temperature.hot, wind.moderate, time.early_morning): acceptance.low,
                (temperature.hot, wind.strong, time.early_morning): acceptance.low,

                (temperature.cold, wind.light, time.morning): acceptance.high,
                (temperature.cold, wind.moderate, time.morning): acceptance.high,
                (temperature.cold, wind.strong, time.morning): acceptance.low,

                (temperature.medium, wind.light, time.morning): acceptance.high,
                (temperature.medium, wind.moderate, time.morning): acceptance.high,
                (temperature.medium, wind.strong, time.morning): acceptance.low,

                (temperature.hot, wind.light, time.morning): acceptance.high,
                (temperature.hot, wind.moderate, time.morning): acceptance.low,
                (temperature.hot, wind.strong, time.morning): acceptance.low,

                (temperature.cold, wind.light, time.afternoon): acceptance.high,
                (temperature.cold, wind.moderate, time.afternoon): acceptance.high,
                (temperature.cold, wind.strong, time.afternoon): acceptance.low,

                (temperature.medium, wind.light, time.afternoon): acceptance.high,
                (temperature.medium, wind.moderate, time.afternoon): acceptance.low,
                (temperature.medium, wind.strong, time.afternoon): acceptance.low,

                (temperature.hot, wind.light, time.afternoon): acceptance.low,
                (temperature.hot, wind.moderate, time.afternoon): acceptance.high,
                (temperature.hot, wind.strong, time.afternoon): acceptance.low,

                (temperature.cold, wind.light, time.night): acceptance.high,
                (temperature.cold, wind.moderate, time.night): acceptance.high,
                (temperature.cold, wind.strong, time.night): acceptance.low,

                (temperature.medium, wind.light, time.night): acceptance.high,
                (temperature.medium, wind.moderate, time.night): acceptance.high,
                (temperature.medium, wind.strong, time.night): acceptance.low,

                (temperature.hot, wind.light, time.night): acceptance.high,
                (temperature.hot, wind.moderate, time.night): acceptance.low,
                (temperature.hot, wind.strong, time.night): acceptance.low,

            }
        )
        if plot:
            # Plot variables
            plt.figure(figsize=(15,10))
            plt.xlabel("Temperature Outside")
            plt.ylabel("Membership Degree")
            temperature.cold.plot()
            temperature.medium.plot()
            temperature.hot.plot()
            plt.show()

            plt.figure(figsize=(15,10))
            plt.xlabel("MPH self.wind")
            plt.ylabel("Membership Degree")
            wind.light.plot()
            wind.moderate.plot()
            wind.strong.plot()
            plt.show()

            plt.figure(figsize=(15,10))
            plt.xlabel("Time of the day")
            plt.ylabel("Membership Degree")
            time.early_morning.plot()
            time.morning.plot()
            time.afternoon.plot()
            time.night.plot()
            plt.show()

            plt.figure(figsize=(15,10))
            plt.xlabel("Acceptance")
            plt.ylabel("Membership Degree")
            acceptance.low.plot()
            acceptance.high.plot()
            plt.show()
        return rules({temperature: tp, wind:  w, time: tm})

    def get_decision(self, temperature:float, wind:float, time:float, size:int):
        """Get the final evaluation after considering the evaluation of the fuzzy system 
        and the considering the probability of acceptance

        Args:
            temperature (float): Temperature to be considered
            wind (float): Wind to be considered
            time (float): Time of the day to be considered
            size (int): Number of decision (dependent of appliances) to return

        Returns:
            list: List of decisions
        """
        # Set antecendents 
        
        #values = {"temperature": temperature, "wind": wind, "time": time}
        value_acceptance_fuzzy = self.evaluate_fuzzy(temperature, wind, time)
        
        value_acceptance = 1.0 if value_acceptance_fuzzy > 35 else 0.0

        # Get the probability
        # Generate an array of 1 and 0 of the same length of number of appliances (size)
        prob_accept_fuzzy = np.random.choice([1.0, 0.0], size, p=self.weights)
        # Fuzzy_action * Pro(Accept Fuzzy Action)
        result = value_acceptance * prob_accept_fuzzy

        return result #value_acceptance

In [None]:
class ContinuousAppliance:
    """Class to be inherited by those appliances that cannot be discconected
    """
    def __init__(self, name="", consumption = 0):
        """Class constructor, setting the name and mean consumption

        Args:
            name (str, optional): Name of the appliance. Defaults to "".
            consumption (int, optional): Avg consumption. Defaults to 0.
        """
        # Set variables
        self.type_appliance = "continuous"
        self.can_be_interrumpted = False
        self.electrical_consumption = []
        self.last_consumption = None
        self.consumption = consumption
        self.name = name
        self.mean_consumption = 0.0
        
    def get_total_consumption(self):
        """Get the total consumption of the appliance

        Returns:
            float: Sum of all recorded consumptions
        """
        total_consumption = sum(self.electrical_consumption)
        return total_consumption
    
    def energy_consumed(self, energy, action, consider_model=True, decision_h=0):
        """Calculate the energy consumed at a given step. 
        Since it is a constant appliance the energy should keep the same as in the data

        Args:
            energy (float): Energy in the data
            action (int): Type of action
            consider_model (bool, optional): If needs to differentiate when there the model. Defaults to True.
            decision_h (int, optional): If needs to consider the human behaviour. Defaults to 0.

        Returns:
            float: Consumption
        """
        return energy

    def __repr__(self):
        """Override of the represnetation of the class

        Returns:
            str: Text containing the representation of the class
        """
        str_representation = self.__class__.__name__
        str_representation += " = Type: "+str(self.type_appliance)+" ; "
        str_representation +="Consumption: "+str(self.consumption)+" ; "
        str_representation += "Name: "+str(self.name)+".\n"
        return str_representation

class IntermittentAppliance:
    """Class to be inherited by those appliances that can be disconnected
    """
    def __init__(self, name="", type_consumption = "direct", consumption = 0):
        """Class constructor

        Args:
            name (str, optional): Name of the appliance. Defaults to "".
            type_consumption (str, optional): Type of consumption. Defaults to "direct".
            consumption (int, optional): Mean consumption. Defaults to 0.
        """
        # Set the variables
        self.type_appliance = "intermittent"
        self.can_be_interrumpted = True
        self.electrical_consumption =  []
        self.number_of_usages = 0
        self.current_state = "off"
        self.type_consumption = type_consumption
        self.last_consumption = None
        self.consumption = consumption
        # 0-3 per day - cleaning appliances
        # 1-15 per day - cooking appliances
        self.max_usages = 0
        self.name = name
        self.avg_consumption = 0
        
    def get_total_consumption(self):
        """Get the total consumption of the appliance

        Returns:
            float: Sum of all the consumptions recorded
        """
        total_consumption = sum(self.electrical_consumption)
        return total_consumption
    
    def energy_consumed(self, energy, action, consider_model=True, decision_h=0):
        """Calculate the energy consumed at a given step. 
        This needs to consider the human behaviour and the action

        Args:
            energy (float): Energy in the data
            action (int): Type of action
            consider_model (bool, optional): If needs to differentiate when there the model. Defaults to True.
            decision_h (int, optional): If needs to consider the human behaviour. Defaults to 0.

        Returns:
            float: Consumption
        """
        # If needs to consider the mode and the human behaviour
        if consider_model and decision_h == 1:
            if np.isnan(energy):
                energy = 0.0
            # When the appliance will be turned on
            #   but the appliance was turned off we use the avg consumption 
            #   as energy
            if action > 0 and energy <= 0:
                energy = self.avg_consumption
            new_energy = energy * action
            return new_energy
        else:
            return energy

    def __repr__(self):
        """Override of the represnetation of the class

        Returns:
            str: Text containing the representation of the class
        """
        str_representation = self.__class__.__name__
        str_representation += " = Type: "+str(self.type_appliance)+" ; "
        str_representation += "Consumption: "+str(self.consumption)+" ; "
        str_representation += "Name: "+str(self.name)+" ; "
        str_representation += "Avg Cons: "+str(self.avg_consumption)+".\n"
        return str_representation

In [None]:
# Should have a class per appliance
class ClothesWasher(IntermittentAppliance):
    """Class to represent the clothes washer.
    Inherits from the intermittent appliance class
    """
    def __init__(self, name="", consumption = 0):
        """Class contructor

        Args:
            name (str, optional): Appliance name. Defaults to "".
            consumption (int, optional): Avg consumption. Defaults to 0.
        """
        super().__init__(name, consumption=consumption)
        self.max_usages = 6*2 
        self.min_usages = 0
        # 6 = 15min-window => 90 mins avg cycle; 2 = how many times
        self.group = "clean"
        self.min_cons = 0.01
  
class DishWasher(IntermittentAppliance):
    """Class to represent the dish washer.
    Inherits from the intermittent appliance class
    """
    def __init__(self, name="", consumption = 0):
        """Class contructor

        Args:
            name (str, optional): Appliance name. Defaults to "".
            consumption (int, optional): Avg consumption. Defaults to 0.
        """
        super().__init__(name, consumption=consumption)
        self.max_usages =  6*2 
        self.min_usages = 2*2 # 30 mins times 2 times
        # 6 = 15min-window => 90 mins avg cycle; 2 = how many times
        self.group = "clean"
        self.min_cons = 0.05
        

class Dryer(IntermittentAppliance):
    """Class to represent the dryer.
    Inherits from the intermittent appliance class
    """
    def __init__(self, name="", type_dryer="electric", consumption = 0):
        """Class contructor

        Args:
            name (str, optional): Appliance name. Defaults to "".
            type_dryer (str, optional): To difference electric and gas one. Defaults to "electric".
            consumption (int, optional): Avg consumption. Defaults to 0.
        """
        super().__init__(name, consumption=consumption)
        self.type_dryer = type_dryer
        self.max_usages =  8*2 
        self.min_usages = 0
        # 8 = 15min-window => 120 mins avg cycle; 2 = how many times
        self.group = "clean"
        self.min_cons = 0.05
     
class Microwave(IntermittentAppliance):
    """Class to represent the microwave.
    Inherits from the intermittent appliance class
    """
    def __init__(self, name="", consumption = 0):
        """Class contructor

        Args:
            name (str, optional): Appliance name. Defaults to "".
            consumption (int, optional): Avg consumption. Defaults to 0.
        """
        super().__init__(name, consumption=consumption)
        self.max_usages = 5
        self.group = "cook"
        self.min_usages = 1
        self.min_cons = 0.01
    
class Oven(IntermittentAppliance):
    """Class to represent the oven.
    Inherits from the intermittent appliance class
    """
    def __init__(self, name="", consumption = 0):
        """Class contructor

        Args:
            name (str, optional): Appliance name. Defaults to "".
            consumption (int, optional): Avg consumption. Defaults to 0.
        """
        super().__init__(name, consumption=consumption)
        self.max_usages = 8*2
        self.group = "cook"
        self.min_usages = 3*3
        self.min_cons = 0.01

class GarbageDisposal(IntermittentAppliance):
    """Class to represent the garbage disposal.
    Inherits from the intermittent appliance class
    """
    def __init__(self, name="", consumption = 0):
        """Class contructor

        Args:
            name (str, optional): Appliance name. Defaults to "".
            consumption (int, optional): Avg consumption. Defaults to 0.
        """
        super().__init__(name, consumption=consumption)
        self.max_usages = 8*2
        self.group = "cook"
        self.min_usages = 3*3
        self.min_cons = 0.001
    
class Refrigerator(ContinuousAppliance):
    """Class to represent the refrigerator.
    Inherits from the intermittent appliance class
    """
    def __init__(self, name="", consumption = 0):
        """Class contructor

        Args:
            name (str, optional): Appliance name. Defaults to "".
            consumption (int, optional): Avg consumption. Defaults to 0.
        """
        super().__init__(name, consumption=consumption)
        # self.max_usages = 15
        self.group = "cook"

# Room is used to modelate the kitchen appliances
class Room(IntermittentAppliance):
    """Class to represent the kitchen appliances in Pecan Street Dataset.
    Inherits from the intermittent appliance class
    """
    def __init__(self, name="", type_room="kitcheapp", consumption = 0):
        """Class contructor

        Args:
            name (str, optional): Appliance name. Defaults to "".
            type_room (str, optional): To difference different rooms. Defaults to "kitcheapp".
            consumption (int, optional): Avg consumption. Defaults to 0.
        """
        super().__init__(name, consumption=consumption)
        self.type_room = type_room
        self.type_usage = "appliance"
        self.max_usages = 4*3 # every usage is 15 mins, so 4 make an hour => 3 hours
        self.group = "room"
        self.min_usages = 2
        self.min_cons = 0.07
        
class SolarPV:
    """Class to represent pv.
    """
    def __init__(self, name="", capacity=100, generation=0):
        """Class constructor

        Args:
            name (str, optional): Appliance name. Defaults to "".
            capacity (float, optional): How much can generate. Defaults to 100.
            generation (float, optional): How much is generating. Defaults to 0.
        """
        self.capacity = capacity
        self.current_load = 0
        self.electric_generation = []
        self.last_generation = None
        self.generation = generation
        self.name = name
        self.group = "pv"
        self.mean_consumption = 0.0
        
    def get_total_generation(self):
        """Obtain the total generated

        Returns:
            float: Sum of generations recorded
        """
        total_generation = sum(self.electric_generation)
        return total_generation

    def __repr__(self):
        """Override of the represnetation of the class

        Returns:
            str: Text containing the representation of the class
        """
        str_representation = self.__class__.__name__
        str_representation += " = Generation: "+str(self.generation)+"."
        return str_representation

# Air compressor class
class AirCompressor(IntermittentAppliance):
    """Class to represent the air compressor.
    Inherits from the intermittent appliance class
    """
    def __init__(self, name="", consumption = 0):
        """Class contructor

        Args:
            name (str, optional): Appliance name. Defaults to "".
            consumption (int, optional): Avg consumption. Defaults to 0.
        """
        super().__init__(name, consumption=consumption)
        
class Freezer(ContinuousAppliance):
    """Class to represent the freezer.
    Inherits from the continuos appliance class
    """
    def __init__(self, name="", consumption = 0):
        """Class contructor

        Args:
            name (str, optional): Appliance name. Defaults to "".
            consumption (int, optional): Avg consumption. Defaults to 0.
        """
        super().__init__(name, consumption=consumption)
        # self.max_usages = 15
        self.group = "cook"
    
class WaterHeater(ContinuousAppliance):
    """Class to represent the water heater.
    Inherits from the continuos appliance class
    """
    def __init__(self, name="", consumption = 0):
        """Class contructor

        Args:
            name (str, optional): Appliance name. Defaults to "".
            consumption (int, optional): Avg consumption. Defaults to 0.
        """
        super().__init__(name, consumption=consumption)
        self.group = "heater"

class Car(IntermittentAppliance):
    """Class to represent the car.
    Inherits from the continuos appliance class
    """
    def __init__(self, name="", max_capacity=0, consumption = 0):
        """Class contructor

        Args:
            name (str, optional): Appliance name. Defaults to "".
            consumption (int, optional): Avg consumption. Defaults to 0.
        """
        super().__init__(name, type_consumption="storage",consumption=consumption)
        self.current_load = None
        self.max_capacity = max_capacity
        self.max_charges = 3 # Empirically might change
        self.group = "car"
    
    def charge(self, energy):
        """When the car is needed to charge

        Args:
            energy (float): How much more the car is charging
        """
        self.current_load = self.current_load + energy

    def set_current_load(self, load):
        """Set the current load

        Args:
            load (float): New load
        """
        self.current_load = load
    
    def get_current_load(self):
        """Get the current load

        Returns:
            float: Current load
        """
        return self.current_load

class Grid:
    """Class to represent the grid
    """
    def __init__(self, name="", consumption = 0):
        """Class contructor

        Args:
            name (str, optional): Appliance name. Defaults to "".
            consumption (int, optional): Avg consumption. Defaults to 0.
        """
        self.consumption = consumption
        self.electric_consumption = []
        self.last_consumption = None
        self.name = name
        self.mean_consumption = 0.0
        
    def get_total_consumption(self):
        """Get the total consumption

        Returns:
            float: Total consumption of all recorded consumption
        """
        total_consumption = sum(self.electric_consumption)
        return total_consumption

    def __repr__(self):
        """Override of the representation of the class
        when using print() over the class

        Returns:
            str: Text containing the representation of the class
        """
        str_representation = self.__class__.__name__
        str_representation += " = Consumption: "+str(self.consumption)+"."
        return str_representation

In [None]:
class State:
    """Represent the state of the building
    """
    def __init__(
        self,
        month:float,
        day:float,
        hour:float,
        minute:float,
        price:float,
        weather:dict,
        dictionary_of_appliances:dict,
        dict_price_metadata = {}
    ):
        """Class constrcutor

        Args:
            month (float): Month that is part of the state
            day (float): Day that is part of the state
            hour (float): Hour that is part of the state
            minute (float): Minute that is part of the state
            price (float): Price in the current time window
            weather (dict): Weather information that is part of the state
            dictionary_of_appliances (dict): Appliances that are part of the state
            dict_price_metadata (dict, optional): Metadata from the price. Defaults to {}.
        """
        # Temporal state information
        self.month = month
        self.day = day
        self.hour = hour
        self.minute = minute
        # Energy price
        self.price = price
        # Current weather - dictionary in the form {column(string): float}
        self.weather = weather
        # Object per appliance
        #   the dictionary should be in the form {appliance(string): appliance(object)}
        self.dictionary_of_appliances = dictionary_of_appliances
        # To save the totals of the state
        self.total_consumption = 0.0
        self.grid_consumption = 0.0
        self.total_generation = 0.0
        self.net_demand = 0.0
        self.total_house_usage = 0.0
        self.net_cost = 0.0
        self.dict_price_metadata = dict_price_metadata
        self.intermittent_appliances = []
        self.current_usage_appliances = {}
        # Get the totals of the current state
        self.calculate_totals()
        
    def calculate_totals(self):
        """Calculate the totals of consumption
        """
        self.total_consumption, self.grid_consumption, self.total_generation = self.get_total_consumption_grid_con_generation()
        self.net_demand = self.get_net_demand()
        self.total_house_usage = self.get_total_house_usage()
        self.net_cost = self.get_net_cost()

    def get_total_consumption(self):
        """Calculate the total of consumption

        Returns:
            flaot: Total consumption of the state
        """
        total_consumption = 0.0
        # Iterate over the appliances to sum those that consume electricity
        for name, appliance in self.dictionary_of_appliances.items():
            if isinstance(appliance, ContinuousAppliance) or isinstance(appliance, IntermittentAppliance):
                total_consumption += appliance.consumption
        return total_consumption        

    def get_grid_consumption(self):
        """Calculate the consumption from the grid register on Pecan Street Data

        Returns:
            float: Consumption reported on grid
        """
        total_grid_consumption = 0.0
        # Iterate over the appliances to get the data from the grid
        for name, appliance in self.dictionary_of_appliances.items():
            if isinstance(appliance, Grid):
                total_grid_consumption += appliance.consumption
        return total_grid_consumption

    def get_total_generation(self):
        """Get the total generation of energy from PV mainly

        Returns:
            float: Total generation
        """
        total_generation = 0.0
        # Iterate over the appliances to sum those that generate electricity
        for name, appliance in self.dictionary_of_appliances.items():
            if isinstance(appliance, SolarPV):
                total_generation += appliance.generation
        return total_generation

    def get_total_consumption_grid_con_generation(self):
        """Get the consumption considering the grid, the generation and the continuous appliances

        Returns:
            tuple: Consumptions
        """
        total_consumption = 0.0
        total_grid_consumption = 0.0
        total_generation = 0.0
        # Iterate over the appliances 
        for name, appliance in self.dictionary_of_appliances.items():
            # sum those that consume electricity
            if isinstance(appliance, ContinuousAppliance) or isinstance(appliance, IntermittentAppliance):
                total_consumption += appliance.consumption
            # get the data from the grid
            elif isinstance(appliance, Grid):
                total_grid_consumption += appliance.consumption
            # sum those that generate electricity
            elif isinstance(appliance, SolarPV):
                total_generation += appliance.generation
        return total_consumption, total_grid_consumption, total_generation

    def get_net_demand(self):
        """Get the net demand considering the generation and consumption

        Returns:
            float: Net demand
        """
        # Consumption - generation - use + since the total generation is negative from the datasource
        net_demand = self.total_consumption + self.total_generation
        return net_demand

    def get_total_house_usage(self):
        """Get the total usage of electricy in the bulding

        Returns:
            float: Consumption
        """ 
        # Grid (consumption) + solar - use - here since the generation is negative
        total_house_usage = self.grid_consumption + (-self.total_generation)
        return total_house_usage

    def get_net_cost(self):
        """Get the cost considering the price and net consumption

        Returns:
            float: Cost
        """
        # Consumption * price or grid * price
        # Here, we asume that the solar generation has no cost but what is taken from the grid
        demand_ = self.net_demand
        if np.isnan(self.net_demand):
            demand_ = 0
        cost_consumption = self.price * demand_
        cost_consumption_gird = self.price * self.grid_consumption

        return cost_consumption

    def get_encode_temp(self, temp:float):
        """Return the encoding of a given temperature by assuming its representation 
        as a position in a fixed list, the values are gotten using the fuzzy definition. 

        Args:
            temp (float): Temperature to be converted

        Returns:
            int: Encoding of the given temperature
        """
        # Define the Fuzzy variable with its states
        temperature = fuzzylogic_local.classes.Domain("temperature", -15, 70)
        temperature.cold = fuzzylogic_local.functions.S(-15, 20)
        temperature.medium = fuzzylogic_local.functions.triangular(15,30, c=23)
        temperature.hot = fuzzylogic_local.functions.R(25,70)
        # Set as a list
        list_temp = [temperature.cold(temp), temperature.medium(temp), temperature.hot(temp)]
        # Get the max therefore the one where the given temperature belongs more
        return np.argmax(list_temp)

    def get_encode_wind(self, wind_temp:float):
        """Returns the encoding of a given wind speed by assuming its representation
        as a position in a fixed list, the values are gotten using the fuzzy definition.

        Args:
            wind_temp (float): Wind speed to be converted

        Returns:
            int: Encoding of the given wind speed
        """
        # Define the Fuzzy variable with its states
        wind = fuzzylogic_local.classes.Domain("wind", 0, 100)
        wind.light = fuzzylogic_local.functions.S(0,24)
        wind.moderate = fuzzylogic_local.functions.triangular(13,30,c=19)
        wind.strong = fuzzylogic_local.functions.R(24,100)
        # Set as a list
        list_temp = [wind.light(wind_temp), wind.moderate(wind_temp), wind.strong(wind_temp)]
        # Get the max therefore the one where the given wind speed belongs more
        return np.argmax(list_temp)

    def get_element_for_dql(self, tuple_):
        """Generate an object containing the elements to represent the State for the DQL approach

        Args:
            tuple_ (tuple): Contains the element to be represent, a threshold if needed and 
            a list with extra decisions

        Returns:
            dict: Dictionary containing the variable and its value for the DQL method
        """
        element_, threshold, list_decisions = tuple_
        dict_return = {}
        # Check if the element is on the actionable list of appliances
        if element_ in self.dictionary_of_appliances:
            appliance = self.dictionary_of_appliances[element_]
            # When it is a solar pv the attribute to use is generation instead of consumption
            if isinstance(appliance, SolarPV):
                dict_return[element_+"_status"] = 1 if appliance.generation < threshold else 0
                dict_return[element_] = appliance.generation
            # When it si another appliance the attribute is consumption
            elif isinstance(appliance, IntermittentAppliance) or isinstance(appliance, ContinuousAppliance):
                dict_return[element_+"_status"] = 1 if appliance.consumption > threshold else 0
                dict_return[element_] = appliance.consumption
                # Add usage 
                if isinstance(appliance, IntermittentAppliance) and element_ in self.current_usage_appliances:
                    appliance = self.dictionary_of_appliances[element_]
                    # Get the usage of the appliance
                    usage_appliance_curent = self.current_usage_appliances[element_]
                    dict_return["usage_"+str(element_)+""] = appliance.max_usages-usage_appliance_curent
                elif isinstance(appliance, IntermittentAppliance) and element_ not in self.current_usage_appliances:
                    # If it is not there we assume a usage of zero so there is still available all usages
                    dict_return["usage_"+str(element_)+""] = appliance.max_usages
            elif isinstance(appliance, Grid):
                dict_return[element_] = 1 if appliance.consumption > threshold else 0
        # Add information of price
        elif element_ in self.dict_price_metadata:
            dict_return[element_] = self.dict_price_metadata[element_]
        # Add information of weather
        elif element_ in self.weather:
            if element_ == 'temperature':
                dict_return[element_] = self.get_encode_temp(self.weather[element_])
            elif element_ == 'wind':
                dict_return[element_] = self.get_encode_wind(self.weather[element_])
            else: 
                dict_return[element_] = self.weather[element_]
        # Check if it month, day, hour or minute
        elif element_ == 'month':
            dict_return[element_] = self.month
        elif element_ == 'day':
            dict_return[element_] = self.day
        elif element_ == 'hour':
            dict_return[element_] = self.hour
        elif element_ == 'minute':
            dict_return[element_] = self.minute
        elif element_ in ['decision_p', 'cost_p'] and list_decisions is not None:
            # If it is the cost then compare with the calculated one
            if element_ == 'cost_p':
                dict_return[element_] = 1 if self.get_net_demand() > self.net_demand  else 0 
            # If it is the decision from the previous state then 
            #  check if the percentage is higher than a threshold
            elif element_ == 'decision_p':
                dict_return[element_] = round(sum(list_decisions)*1.0/len(list_decisions),2) if len(list_decisions) > 0 else -1.0
        # If not exist probably it is an appliance that does not exist
        else:
            dict_return[element_] = 0.0
        return dict_return

    def get_list_dql(self, list_elements_param:list, list_decisions:list=None):
        """Returns the list to represnt the state for DQL

        Args:
            list_elements_param (list): List of elements to consider in the State
            list_decisions (list, optional): Extra decisions to consider. Defaults to None.

        Returns:
            tuple: List with the values and a list name of the elements
        """
        list_elements = []
        list_names = []
        threshold = 0.0
        last_lng_t = 0

        # Iterate over the list and check if exists in the state
        # So we have an state in the form of 
        #  [ moth, day, temperature, weather, wind, Type(price){0,1}, DayType(price){0,1}, Season(price){0,1}, 
        #    Decision%, Cost%, SolarPV{0,1}, SolarPV, InterContApp_i{0,1}, InterContApp_i, InterApp_i_usage]
        
        # Remove elements of status and usage
        list_elements_param_ = [x for x in list_elements_param if 'status_' not in x and 'usage_' not in x]
        # Get a list of dictionaries
        dictionary_result = list(map(self.get_element_for_dql, [(x, threshold, list_decisions) for x in list_elements_param_] ))
        # Merge dctionaries into one
        merged_dictionary = {name: value_ for dict_ in dictionary_result for name, value_ in dict_.items()}
        # List names will be the keys and list_elements will be the values
        list_names = list(merged_dictionary.keys())
        list_elements = list(merged_dictionary.values())
        #print(list_names)
        list_elements = np.array(list_elements, dtype=float)
        return list_elements, list_names
 
    # This is to get states
    def get_list_qlearning(self, list_elements_param:list, list_decisions:list=None):
        """Generate a list to represent the state for the Q-table approach

        Args:
            list_elements_param (list): Elements to be consider as part of the state
            list_decisions (list, optional): Extra decisions to be consider. Defaults to None.

        Returns:
            list: Representation of the state
        """
        # Get the representation of the state as list of specific elements
        list_elements = []
        list_names = []
        threshold = 0.0
        # Add information of appliances
        # The state will be composed by 
        # [ SolarPV{0,1 }, InterApp_i{0,1}, Type(Price){0,1}, Decision%{0,1}, Cost%{0,1}]
        for element_ in list_elements_param:
            if element_ in self.dictionary_of_appliances:
                appliance = self.dictionary_of_appliances[element_]
                if isinstance(appliance, SolarPV):
                    list_elements.append(1 if appliance.generation < threshold else 0)
                    list_names.append(element_)
                elif isinstance(appliance, IntermittentAppliance):
                    list_elements.append(1 if appliance.consumption > threshold else 0)
                    list_names.append(element_)
            # Add information of price
            elif element_ in self.dict_price_metadata:
                # Add the value since it is already encoded
                price_value = self.dict_price_metadata[element_]
                list_elements.append(price_value)
                list_names.append(element_)
            elif element_ in ['decision_p', 'cost_p']:
                # If it is the cost then compare with the calculated one
                if element_ == 'cost_p':
                    list_elements.append(1 if self.get_net_demand() > self.net_demand  else 0)
                    list_names.append(element_)
                # If it is the decision from the previous state then 
                #  check if the percentage is higher than a threshold
                elif element_ == 'decision_p':
                    thrs_ = 0.7
                    if len(list_decisions) == 0:
                        list_elements.append(0)
                    else:
                        get_percentage_ = sum(list_decisions)*1.0/len(list_decisions)
                        list_elements.append(1 if thrs_ > get_percentage_ else 0)
                    list_names.append(element_)
            else:
                list_elements.append(0) # -1.0
                list_names.append(element_)
        # Add the discretization of the usage of appliance at the end of the list
        #   this is added at the end to keep an order
        # So we have a state in the form of 
        # [ SolarPV{0,1 }, InterApp_i{0,1}, Type(Price){0,1}, Decision%, Cost%, InterApp_i_usages {0, 1}]
        for element_ in list_elements_param:
            if element_ in self.dictionary_of_appliances:
                appliance = self.dictionary_of_appliances[element_]
                if(isinstance(appliance, IntermittentAppliance)):
                    # Get the usage of the appliance
                    usage_appliance_curent = self.current_usage_appliances[element_]
                    if usage_appliance_curent < appliance.max_usages:
                        list_elements.append(0)
                    else:
                        list_elements.append(1)
                    list_names.append("usage of "+str(element_)+"")

        # Convert element to float
        list_elements = np.array(list_elements, dtype=float)
        return list_elements, list_names

    # This is to get states
    def get_as_list(self):
        """Get a list representation of the state

        Returns:
            list: Representation of the state
        """
        # Get the values in a list in the following order
        #   month, day, hour, minute, price, [list_appliance] + [weather]
        list_elements = [
            self.month, 
            self.day, 
            self.hour, 
            self.minute,
            self.price
        ]
        # Names in case we need to know what are we reading
        list_names = ['month', 'day', 'hour', 'minute', 'price']
        # Add information of appliances
        for name, appliance in self.dictionary_of_appliances.items():
            if not isinstance(appliance, SolarPV):
                list_elements.append(appliance.consumption)
            else:
                list_elements.append(appliance.generation)
            list_names.append(name)
        # Add information of weather
        for weather_name, weather_value in self.weather.items():
            list_elements.append(weather_value)
            list_names.append(weather_name)
        # Convert element to float
        list_elements = np.array(list_elements, dtype=float)#dtype=np.float)
        return list_elements, list_names

    def __repr__(self):
        """Override of the representation of the class
        when using print() over the class

        Returns:
            str: Text containing the representation of the class
        """
        str_representation = "********** STATE ***********\n"
        str_representation += "Month: " + str(self.month) + " \n"
        str_representation += "Day: " + str(self.day) + " \n"
        str_representation += "Hour: " + str(self.hour) + " \n"
        str_representation += "Minute: " + str(self.minute) + " \n"
        str_representation += "Price: " + str(self.price) + " \n"
        str_representation += "Weather: " + str(json.dumps(self.weather, indent=2)) + " \n"
        str_representation += "Appliances: " + str(self.dictionary_of_appliances) + " \n"
        str_representation += "Total Consumption: " + str(self.total_consumption) + " \n"
        str_representation += "Grid Consumption: " + str(self.grid_consumption) + " \n"
        str_representation += "Total Generation: " + str(self.total_generation) + " \n"
        str_representation += "Net Demand: " + str(self.net_demand) + " \n"
        str_representation += "House Usage: " + str(self.total_house_usage) + " \n"
        str_representation += "Net Cost: " + str(self.net_cost) + " \n"
        str_representation += "********** **** ***********\n"
        return str_representation

In [None]:
class StateEnv:
    """Class to represent the environment state
    """
    def __init__(
        self, 
        bid,
        state,
        demand_intert,
        demand_contin,
        generation,
        total_demand,
        net_demand,
        net_cost,
        total_grid_demand,
        current_price,
        element_list = [],
        building=None
    ):
        self.bid = bid
        self.state_appliances = state
        self.demand_intert = demand_intert
        self.demand_contin = demand_contin
        self.generation = generation
        self.total_demand = total_demand
        self.net_demand = net_demand
        self.net_cost = net_cost
        self.total_grid_demand = total_grid_demand
        self.element_list = element_list # Elements 
        self.building = building
        self.current_price = current_price

    def get_as_list(self):
        """Get the elements on the state as list

        Returns:
            list: elements of the state
        """
        # If the element_list is empty, then return everything
        if len(self.element_list) == 0:
            list_state, list_names = self.state_appliances.get_as_list()
            list_state = np.append(list_state, [
                self.demand_intert,
                self.demand_contin,
                self.generation,
                self.total_demand,
                self.net_demand,
                self.net_cost,
                self.total_grid_demand
            ])
            return list_state
        else:
            list_state, list_names = self.state_appliances.get_list_dql(self.element_list)
            for ele in self.element_list:
                if ele == 'demand_intert':
                    list_state.append(self.demand_intert)
                    list_names.append(ele)
                elif ele == 'demand_intert':
                    list_state.append(self.demand_contin)
                    list_names.append(ele)
                elif ele == 'generation':
                    list_state.append(self.generation)
                    list_names.append(ele)
                elif ele == 'total_demand':
                    list_state.append(self.total_demand)
                    list_names.append(ele)
                elif ele == 'net_demand':
                    list_state.append(self.net_demand)
                    list_names.append(ele)
                elif ele == 'net_cost':
                    list_state.append(self.net_cost)
                    list_names.append(ele)
            return list_state


    def get_list_qlearning(self, element_list_:list):
        """Get the representation for Q-Table

        Args:
            element_list_ (list): Elementos to be considered

        Returns:
            tuple: elements and names of the values 
        """
        list_, _ = self.state_appliances.get_list_qlearning(element_list_, self.building.array_decisions_human)
        return list_, _

    def get_list_dql(self, element_list_:list):
        """Get the representation for DQL

        Args:
            element_list_ (list): Elementos to be considered

        Returns:
            tuple: Elements and names of the values 
        """
        list_ = self.state_appliances.get_list_dql(element_list_, list_decisions=self.building.array_decisions_human)
        return list_

    def __repr__(self):
        """Override of the representation of the class
        when using print() over the class

        Returns:
            str: Text containing the representation of the class
        """
        str_representation = "************ ENV STATE ************\n"
        str_representation += "Build ID: "+ str(self.bid)+"\n"
        str_representation += "Demand Intermittent: "+ str(self.demand_intert)+"\n"
        str_representation += "Demand Continuous: "+ str(self.demand_contin)+"\n"
        str_representation += "Generation: "+ str(self.generation)+"\n"
        str_representation += "Cont+Intert Demand: "+ str(self.total_demand)+"\n"
        str_representation += "Grid Total Demand: "+ str(self.total_grid_demand) +"\n"
        str_representation += "Net Demand: "+ str(self.net_demand)+"\n"
        str_representation += "State Appliances: "+ str(self.state_appliances)+"\n"
        return str_representation

In [None]:
class InformationStage:
    """Representation of extra information at each step in the iteration
    """
    def __init__(self, bid:int):
        """Class constructor

        Args:
            bid (int): Building ID
        """
        self.states = []
        # Form: {appliance (str): count (int)}
        self.count_usage_appliances = {}

        self.consume_approx_history = [] # Estimated demands - electricity consumption
        self.consume_history = [] #"real"
        
        self.cost_approx_history = [] 
        self.cost_history = [] 

        self.consumption_last_appliances_max = []
        self.consumption_last_appliances_max_approx = []

        self.current_day = -1
        self.current_month = -1

        self.bid = bid

    def start_definition(
        self,
        state
    ):
        """Init the definition of the Stage

        Args:
            state (State): State to be consider
        """
        # Set the usage of appliances to zero
        for name, appliance in state.dictionary_of_appliances.items():
                self.count_usage_appliances[name] = 0

    def add_iteration(
        self, 
        state:State, 
        consumption_approx:float,
        dict_last_consumption_with_actions:dict,
        is_training=False,
        step_ = 0
    ):
        """Add information of an iteration and change the
        one that corresponds

        Args:
            state (State): State to be used in the iteration
            consumption_approx (float): Consumption from the model
            dict_last_consumption_with_actions (dict): Information of the last actions
            is_training (bool, optional): Flag to control the flow when is training or not. Defaults to False.
            step_ (int, optional): Number of the current step. Defaults to 0.
        """
        self.states.append(state)

        self.step = step_
        # Get max from list_last_consumption
        max_approx = max(dict_last_consumption_with_actions['list_'])
        self.consumption_last_appliances_max_approx.append(max_approx)
        max_inter = 0

        # Get max intermittent consumption from state
        for name, appliance in state.dictionary_of_appliances.items():
            if isinstance(appliance, IntermittentAppliance) and max_inter < appliance.consumption:
                max_inter = appliance.consumption
        self.consumption_last_appliances_max.append(max_inter)

        # Calculate the "real" consumption
        consumption = state.get_net_demand()
        self.consume_history.append(consumption)
        self.consume_approx_history.append(consumption_approx)

        # Calculate the costs
        cost = state.get_net_cost()
        self.cost_history.append(cost)
        cost_approx = consumption_approx * state.price
        self.cost_approx_history.append(cost_approx)

        if is_training == False:
            # Check if the usage is on a new day and new month
            if state.day > self.current_day and state.month > self.current_month:
                # Update day and month on stage
                self.current_day = state.day
                self.current_month = state.month
                # Reset usage
                for name, appliance in state.dictionary_of_appliances.items():
                    # If there is a consumption on the appliance that means it was used
                    if (name in dict_last_consumption_with_actions['dict_'] 
                        and dict_last_consumption_with_actions['dict_'][name] > 0):
                        self.count_usage_appliances[name] = 1
                    else:
                        self.count_usage_appliances[name] = 0

            elif state.day > self.current_day and state.month == self.current_month:
                # Update day and month on stage
                self.current_day = state.day
                self.current_month = state.month
                # Reset usage
                for name, appliance in state.dictionary_of_appliances.items():
                    # If there is a consumption on the appliance that means it was used
                    if name in dict_last_consumption_with_actions['dict_'] and dict_last_consumption_with_actions['dict_'][name] > appliance.min_cons:
                        self.count_usage_appliances[name] = 1
                    else:
                        self.count_usage_appliances[name] = 0

            elif state.day == self.current_day and state.month > self.current_month:
                # Update day and month on stage
                self.current_day = state.day
                self.current_month = state.month
                # Reset usage
                for name, appliance in state.dictionary_of_appliances.items():
                    # If there is a consumption on the appliance that means it was used 
                    if name in dict_last_consumption_with_actions['dict_'] and dict_last_consumption_with_actions['dict_'][name] > appliance.min_cons:
                        self.count_usage_appliances[name] = 1
                    else:
                        self.count_usage_appliances[name] = 0
        else:
            # When it is training just check if it was reach a (24*4) step
            value_module = 24*4
            for name, appliance in state.dictionary_of_appliances.items():
                if step_ % value_module == 0:
                # Usual validation of usages
                    # If there is a consumption on the appliance that means it was used
                    if name in dict_last_consumption_with_actions['dict_'] and dict_last_consumption_with_actions['dict_'][name] > appliance.min_cons:
                        self.count_usage_appliances[name] = 1
                    else:
                        self.count_usage_appliances[name] = 0
    
        # Update usages
        for name, appliance in state.dictionary_of_appliances.items():
            if name in self.count_usage_appliances:
                # If there is a consumption on the appliance that means it was used
                if name in dict_last_consumption_with_actions['dict_'] and dict_last_consumption_with_actions['dict_'][name] > appliance.min_cons:
                    self.count_usage_appliances[name] += 1
                # self.count_usage_appliances[name] += 1
            else:
                # If there is a consumption on the appliance that means it was used
                if name in dict_last_consumption_with_actions['dict_'] and dict_last_consumption_with_actions['dict_'][name] > appliance.min_cons:
                    self.count_usage_appliances[name] = 1
                else:
                    self.count_usage_appliances[name] = 0
                # self.count_usage_appliances[name] = 1
    def __repr__(self):
        """Override of the representation of the class
        when using print() over the class

        Returns:
            str: Text containing the representation of the class
        """
        str_representation = str(self.states) +"\n"
        str_representation += "Usage Appli: " + str(self.count_usage_appliances) +"\n"
        str_representation += "Consume Hist: " + str(self.consume_history) +"\n"
        str_representation += "Date: "+ str(self.current_day)+ "/"+ str(self.current_month)+". \n"
        return str_representation

In [None]:
class Environment(gym.Env): 
    """Class to represent the environment

    Args:
        gym (gym.Env): Parent class
    """
    def __init__(self, 
                 path_data:str, 
                 path_appliances:str,
                 path_metadata:str,
                 path_prices:str,
                 path_weather:str,
                 houses_id:list, 
                 time_window:list, 
                 city:str, 
                 buildings_number:int,
                 months_to_load: list,
                 model_name="Random",
                 consider_human = False,
                 copy_env = None,
                 consider_model = True,
                 is_training = False,
                 episodes = 1
        ):
        """Class constructor for the environment class

        Args:
            path_data (str): Path to the main data
            path_appliances (str): Path to the appliance csv
            path_metadata (str): Path to the buildings metadata
            path_prices (str): Path to the prices data
            path_weather (str): Path to the weather data
            houses_id (list): List of the buildings ID to be used
            time_window (list): Lower and upper limit of the time in environment 
            city (str): Name of the city to be used
            buildings_number (int): Number of builings part of the environment
            months_to_load (list): Months to be loaded
            model_name (str, optional): Name of the model to be used. Defaults to "Random".
            consider_human (bool, optional): To control if it should consider the human behaviour. Defaults to False.
            copy_env (Environment, optional): A environment to copy information from. Defaults to None.
            consider_model (bool, optional): If the environment should consider the model. Defaults to True.
            is_training (bool, optional): TO control testing and training flows. Defaults to False.
            episodes (int, optional): How many episodes will be running. Defaults to 1.
        """
        # Assign the variable to the class
        self.path_data = path_data 
        self.path_appliances = path_appliances
        self.path_metadata = path_metadata
        self.path_prices = path_prices
        self.path_weather = path_weather
        self.houses_id = houses_id
        self.time_window = time_window
        self.city = city
        self.buildings_number = buildings_number
        self.months_to_load = months_to_load
        self.copy_env_exist = copy_env is not None
        self.episodes = episodes 
        # Get how many rows should exist in the dataframe when it is training
        #   since an episode are 30 days (*24h *4time window)
        self.n_rows_in_training_df = self.episodes * 30 * 24 * 4
        self.is_training = is_training
        self.consider_model = consider_model
        self.model_name = model_name
        
        self.state = []
        self.state_objects = []
        self.dictionary_history_state_objects = {}
        
        self.buildings_net_electricity_demand = []
        self.centralized = False
        self.buildings_states_actions = None
        self.loss = []
        self.cost_rbc = None
        self.cumulated_reward_episode = 0.0
        self.net_electric_consumption = []
        # This should be like {'appliance': list}
        self.historyelectric_demand_per_appliance = {}
        self.stage_info_per_building_dictionary = {}
        self.stage_info_per_building_list = []
        self.list_elements_state = []
        self.encoder_used_price = None
        
        # Params for the loader
        params_loaders = {
            'path_data': self.path_data,
            'path_appliances': self.path_appliances,
            'path_metadata': self.path_metadata,
            'path_prices': self.path_prices,
            'path_weather': self.path_weather,
            'building_ids': self.houses_id,
            'city': self.city,
            'months_to_load': self.months_to_load,
            'copy_env': copy_env,
            'is_training': self.is_training,
            'n_rows_episode': self.n_rows_in_training_df
        }
        
        # State = Observation
        self.buildings, self.observation_spaces, self.action_spaces, _, _, self.list_appliances, self.encoder_used_price = init_loader(**params_loaders)
        self.init_vars()
        self.list_name_appliances_intert = []
        self.list_obj_appliances = self.get_object_list_appliances()


        # Set the human decision variables
        ages_ = {
            '4373': 27,
            '7719': 43,
            '8156': 35
        }
        self.human_ages_ = ages_
        for bid,building in self.buildings.items():
            building.set_human_decision(consider_human=consider_human, age_habitant=ages_[str(bid)])
    
    def get_buildings_info(self):
        """Get the information of the buildings that are part of the enviornment

        Returns:
            dict: Dictionary with the information of the building
        """
        info = {}
        months = (self.time_window[1] - self.time_window[0] + 1)/2880
        # Calculate the total information of all the buldings
        #   NOTE: This first for loop was done to avoid overload 
        for bid, building in self.buildings.items():
            building.get_consumptions_for_information()
            
        # Calculate building information
        for bid, building in self.buildings.items():
            temp_ = {}
            for k, v in building.metadata.items():
                temp_[k] = v
            temp_["Total_Consumption_Monthly"] = round(building.total_consumption_/months, 3)
            temp_["Electric_Generation_Monthly"] = round(building.electric_generation_/months, 3)
            temp_["Constant_Consumption_Monthly"] = round(building.consumption_constant_/months, 3)
            temp_["Intermittent_Consumption_Monthly"] = round(building.consumption_intermitent_/months, 3)
            # TODO: Consider add consumption by room
            
            temp_['Correlations_Consumption'] = {}
            temp_['Correlations_Generation'] = {}
            temp_['Correlations_Constant'] = {}
            temp_['Correlations_Intermittent'] = {}
            
            # Get correlation between buildings
            for bid_correlation, building_correlation in self.buildings.items():
                if bid_correlation != bid:
                    temp_['Correlations_Consumption'][bid_correlation] = round(
                        np.corrcoef(
                            building.total_consumption[
                                0: len(building.total_consumption)
                                if len(building.total_consumption) <= len(building_correlation.total_consumption)
                                else len(building_correlation.total_consumption)
                            ],
                            building_correlation.total_consumption[
                                0: len(building.total_consumption)
                                if len(building.total_consumption) <= len(building_correlation.total_consumption)
                                else len(building_correlation.total_consumption)
                            ]
                        )[0][1],
                        3
                    )
                    temp_['Correlations_Generation'][bid_correlation] = round(
                        np.corrcoef(
                            building.electric_generation[
                                0: len(building.electric_generation)
                                if len(building.electric_generation) <= len(building_correlation.electric_generation)
                                else len(building_correlation.electric_generation)
                            ],
                            building_correlation.electric_generation[
                                0: len(building.electric_generation)
                                if len(building.electric_generation) <= len(building_correlation.electric_generation)
                                else len(building_correlation.electric_generation)
                            ]
                        )[0][1],
                        3
                    )
                    temp_['Correlations_Constant'][bid_correlation] = round(
                        np.corrcoef(
                            building.constant_consumption[
                                0: len(building.constant_consumption)
                                if len(building.constant_consumption) <= len(building_correlation.constant_consumption)
                                else len(building_correlation.constant_consumption)
                            ],
                            building_correlation.constant_consumption[
                                0: len(building.constant_consumption)
                                if len(building.constant_consumption) <= len(building_correlation.constant_consumption)
                                else len(building_correlation.constant_consumption)
                            ]
                        )[0][1],
                        3
                    )
                    temp_['Correlations_Intermittent'][bid_correlation] = round(
                        np.corrcoef(
                            building.intermittent_consumption[
                                0: len(building.intermittent_consumption)
                                if len(building.intermittent_consumption) <= len(building_correlation.intermittent_consumption)
                                else len(building_correlation.intermittent_consumption)
                            ],
                            building_correlation.intermittent_consumption[
                                0: len(building.intermittent_consumption)
                                if len(building.intermittent_consumption) <= len(building_correlation.intermittent_consumption)
                                else len(building_correlation.intermittent_consumption)
                            ]
                        )[0][1],
                        3
                    )
            info[bid] = temp_
        return info
    
    def get_object_list_appliances(self):
        """Create a list of appliances as objects

        Returns:
            list: List with the appliances as objects
        """
        # Init lists
        list_object_appliances = []
        list_object_appliances_intert = []
        list_name_appliances_intert = []
        for appliance in self.list_appliances:
            # Get the instance of the object
            obj = instance_object_appliance(appliance, 0.0)
            list_object_appliances.append(obj)
            if isinstance(obj, IntermittentAppliance) or isinstance(obj, SolarPV):
                list_object_appliances_intert.append(obj)
            if isinstance(obj, IntermittentAppliance):
                list_name_appliances_intert.append(appliance)
        self.list_name_appliances_intert = list_name_appliances_intert
        return list_object_appliances
            
    def init_vars(self, is_training=False):
        """Restart variables values

        Args:
            is_training (bool, optional): Separate the flow when training and testing. Defaults to False.

        Returns:
            tuple: Dictionaries with the state information as objects and list
        """
        # When training do the first step here
        #   since it is gonna be reset every episode
        if is_training == False:
            self.minutes_step = iter(range(self.time_window[0], self.time_window[1] + 1))
            self.next_time_step()
        
        # Init the variables
        self.total_electric_consumption = []
        self.total_electric_generation = []
        self.electric_consumption_appliances = []
        self.stage_info_per_building_dictionary = {}
        
        # Set the reward function
        self.reward_accumulated_episode = 0
        self.reward_function = Reward(
            len(self.houses_id),
            [],
            self.encoder_used_price
        )
        
        # Init varialbes to save the information
        self.state = []
        self.state_objects = []
        self.state_dictinary = {}
        self.state_obj_dictionary = {}
        # For each building init the StateEnvironment and variables
        for bid, building in self.buildings.items():
            # Init the building vars
            building.init_vars()
            # Get the first enviroment state
            current_state = building.list_states[self.time_step]
            action_dummy = [1.0] * len(building.list_intermittent_appliances)
            total_consump, continious, intermitend, generation, grid_con = building.get_consumption(self.time_step, action_dummy, is_init=True, consider_model=self.consider_model)            
            total_consump += grid_con
            building.current_net_electricity_demand = round(total_consump + generation, 4)
            current_env_state = StateEnv(
                bid,
                current_state, 
                intermitend,
                continious,
                generation,
                total_consump,
                building.current_net_electricity_demand,
                building.current_net_electricity_demand*current_state.price,
                grid_con,
                current_state.price,
                element_list=self.list_elements_state,
                building=building
            )
            # Each sub array represent a building
            self.state.append(np.array(current_env_state.get_as_list()))
            self.state_dictinary[bid] = current_env_state
            # Each StateEnv is from a different building
            self.state_objects.append(current_env_state)
            self.state_obj_dictionary[bid] = current_env_state
            # Check if the dictionary already has information of the building
            if bid in self.dictionary_history_state_objects:
                self.dictionary_history_state_objects[bid].append(current_env_state)
            else:
                self.dictionary_history_state_objects[bid] = [current_env_state]
        
            # Init the object
            new_stage_info = InformationStage(bid)
            # Init stages
            new_stage_info.start_definition(
                current_state
            )
            # Add to dictionary
            self.stage_info_per_building_dictionary[bid] = new_stage_info
            # Add the usage of appliances to the state appliances
            current_state.current_usage_appliances = new_stage_info.count_usage_appliances
        
        # Set the list as the correct type
        self.state = np.array(self.state, dtype='object')
        self.state_objects = np.array(self.state_objects)
        return self.state_dictinary, self.state_obj_dictionary
    
    def next_time_step(self):
        """Move the time window for each building in the environment
        """
        # Get the next value in the possible time windows
        self.time_step = next(self.minutes_step)
        for bid, building in self.buildings.items():
            # Set the new step to the buildings
            building.time_step = self.time_step
            
    def step(self, actions:dict, is_deep=False):
        """Do the calculations when a new step is taken

        Args:
            actions (dict): Every element represent a set of actions (combination of on and off) per building
            is_deep (bool, optional): To do some extra steps when is the DQL. Defaults to False.

        Returns:
            tuple: new state as list and object, the actions taken and the reward
        """
        # Init variables
        self.buildings_net_electricity_demand = []
        self.stage_info_per_building_list = []
        electric_consumption = 0
        assert len(actions) == self.buildings_number, "The length of the list of actions should match the length of the list of buildings."
        electric_demand = 0
        # Consumptions in the current state
        intermitent_appliance_consumption = 0
        continuos_appliance_consumption = 0
        electric_generation_pv = 0
        total_electricity_demand = 0
        grid_consumption = 0.0
        
        # Calculate how much is each appliance spending in energy based on the action
        # Have a variable appliance consumption
        # Get how much is the solar generation
        self.state = []
        self.state_obj = []
        self.state_dictinary = {}
        self.state_obj_dictionary = {}
        # Get the calculations per building in the environment
        for bid, building in self.buildings.items():
            action = actions[bid]
            # Get the consumption using the action
            total_consump, continious, intermitend, generation, grid_con = building.get_consumption(self.time_step, action, consider_model=self.consider_model)
            intermitent_appliance_consumption += intermitend
            continuos_appliance_consumption += continious
            electric_generation_pv += generation
            total_electricity_demand += total_consump
            grid_consumption += grid_con
            total_consump += grid_con 

            # Electricity consumed by every building - use - since the value of genaration is negative
            building.current_net_electricity_demand = round(total_consump + electric_generation_pv,4)
            self.buildings_net_electricity_demand.append(building.current_net_electricity_demand)    
        
            # Total electricity consumption - this is to graph
            electric_demand += total_electricity_demand
            
            # Considering the taken action, how that affect the new state 
            #       in my case it turn on and off, depending on the user as well
            current_state = building.list_states[self.time_step]
            current_env_state = StateEnv(
                bid,
                current_state, 
                intermitend, # intermitent_appliance_consumption,
                continious,# continuos_appliance_consumption,
                generation,# electric_generation_pv,
                total_consump,# total_electricity_demand,
                building.current_net_electricity_demand,# building.current_net_electricity_demand,
                building.current_net_electricity_demand*current_state.price,
                grid_con,# grid_consumption,
                current_state.price,
                element_list=self.list_elements_state,# element_list=self.list_elements_state
                building=building
            )
            
            # Add states as lists
            self.state.append(np.array(current_env_state.get_as_list()))
            self.state_obj.append(current_env_state)
            # Add it on the dictionary
            self.state_dictinary[bid] = self.state
            self.state_obj_dictionary[bid] = current_env_state
            # Add the state in the dictionary history
            if bid in self.dictionary_history_state_objects:
                self.dictionary_history_state_objects[bid].append(current_env_state)
            else:
                self.dictionary_history_state_objects[bid] = [current_env_state]

            # Check if the StageInfo objec for the building has been initialized
            if bid in self.stage_info_per_building_dictionary:
                # Add new info to the object
                self.stage_info_per_building_dictionary[bid].add_iteration(
                    current_state,
                    total_consump, # approx total electricity consumption
                    building.consumptions_intermittents[self.time_step],
                    is_training=self.is_training,
                    step_ = self.time_step
                )
            else:
                # Init the object
                new_stage_info = InformationStage(bid)
                new_stage_info.add_iteration(
                    current_state,
                    total_consump, # approx total electricity consumption
                    building.consumptions_intermittents[self.time_step],
                    is_training=self.is_training,
                    step_ = self.time_step
                )
                self.stage_info_per_building_dictionary[bid] = new_stage_info
            # Add the usage of appliances to the state appliances
            current_state.current_usage_appliances = self.stage_info_per_building_dictionary[bid].count_usage_appliances

            # Add the stage in the array to be used in the reward part
            self.stage_info_per_building_list.append(self.stage_info_per_building_dictionary[bid])
    
        # Move to next hour
        if is_deep == False:
            self.next_time_step()

        # State as np array
        self.state = np.array(self.state, dtype='object')
        self.state_obj = np.array(self.state_obj)
        
        # Get the reward of the epoc
        rewards, dict_rewards = self.reward_function.get_rewards(self.stage_info_per_building_list) #self.buildings_net_electricity_demand)
        # Sum the comulative reward
        self.cumulated_reward_episode += sum(rewards)
        # Add the electric consumption
        self.net_electric_consumption.append(float(electric_consumption))#np.float32(electric_consumption))
        # Check if it is a final stage
        terminal = self._terminal()

        return (self.state_dictinary, self.state_obj_dictionary, dict_rewards, terminal, {})
    
    def _terminal(self):
        """Check if the current state is the last one

        Returns:
            bool: Flag to say if it was the last one
        """
        is_terminal = bool(self.time_step >= self.time_window[1])
        if is_terminal:
            for building in self.buildings.values():
                building.terminate()
                
            # When the simulation is over, convert all the control variables to numpy arrays so they are easier to plot.
            self.net_electric_consumption = self.net_electric_consumption#np.array(self.net_electric_consumption)
            # self.loss.append([i for i in self.get_baseline_cost().values()])            
        return is_terminal
    
    def get_current_state(self): 
        """Get the curren state

        Returns:
            list: Elements of the current state
        """         
        return self.state
    

    def helper_method_graph(
        self, 
        df, 
        x_:str, 
        y_:str, 
        title:str, 
        xlabel:str, 
        ylabel:str, 
        type_cycle="",
        format_=None, 
        show_in_notebook=False,
        bid=None, 
        hue='type_group', 
        estimator='mean',
        graph_std=False
    ):
        """Method to create the graphs considering the parameters

        Args:
            df (dataframe): Dataframe with the information to graph
            x_ (str): Name of the column for X axis
            y_ (str): Name of the column for Y axis
            title (str): Name of the graph
            xlabel (str): X label
            ylabel (str): Y axis label
            type_cycle (str, optional): To separete train and test phases. Defaults to "".
            format_ (Object, optional): Format datetime data. Defaults to None.
            show_in_notebook (bool, optional): To control if it shows the graphs on the notebook. Defaults to False.
            bid (str, optional): Bulding ID. Defaults to None.
            hue (str, optional): Used when grouping graphs. Defaults to 'type_group'.
            estimator (str, optional): Give the group function to be used. Defaults to 'mean'.
            graph_std (boo, optional): If true, will graph the std on graphs. Defaults to False
        """
        # Define font size
        sns.set(font_scale=3.2)
        # Set if it should show the graph on the notebook
        if show_in_notebook:
            %matplotlib inline
        else:
            %matplotlib auto

        # Set the figure
        fig = plt.figure(figsize=(15,10))
        from matplotlib.dates import DateFormatter
        sns.color_palette("colorblind")
        palette_current = sns.color_palette("colorblind", 3).as_hex()
        # Graph
        ax = sns.lineplot(data=df, x=x_, y=y_, hue=hue, palette=palette_current, ci=None, estimator=estimator)
        # Set title
        if hue is not None:
            ax.legend_.set_title(None)
        # Set labels
        ax.set_xlabel(xlabel)#, fontsize=font_size_to_use)
        ax.set_ylabel(ylabel)#, fontsize=font_size_to_use)

        # Set the format
        if format_ is not None:
            formater_date = DateFormatter(format_)
            ax.xaxis.set_major_formatter(formater_date)
        # Get the path where to save the figures
        current_mn = self.model_name
        if self.model_name == "Random":
            current_mn = "Normal"
        path_save_file = 'graphs/'+current_mn+'/'
        if bid is not None:
            path_save_file += bid+"/"
        path_save_file += type_cycle+'/'
        # Save the graphs
        fig.savefig(path_save_file+title, dpi=fig.dpi)
        plt.ioff()

        if graph_std:
            # Std Shadow version
            fig = plt.figure(figsize=(15,10))
            from matplotlib.dates import DateFormatter
            # Set the palette
            sns.color_palette("colorblind")
            palette_current = sns.color_palette("colorblind", 3).as_hex()
            ax = sns.lineplot(data=df, x=x_, y=y_,hue=hue, palette=palette_current, ci='sd', estimator=estimator)
            if hue is not None:
                ax.legend_.set_title(None)
            # Set the axis
            ax.set_xlabel(xlabel)
            ax.set_ylabel(ylabel)

            if format_ is not None:
                formater_date = DateFormatter(format_)
                ax.xaxis.set_major_formatter(formater_date)
            # Save the graphs
            fig.savefig(path_save_file+title+"_shadow", dpi=fig.dpi)
            plt.ioff()

    def helper_group_data_graph (self, dataframes_:dict, data_rewards:dict, type_cycle:str):
        """Group and order the data to be graph. Uses the graph method

        Args:
            dataframes_ (dict): Dictionary with building and DF with the inforamtion to be graph
            data_rewards (dict): Reward information
            type_cycle (str): Separates training and testing flows

        Returns:
            tuple: List with information, dataframe with consumption information 
            and dataframe with reward information
        """
        # Graph performance
        # Graph per bulding
        list_table_info = []
        general_dataframe = None
        general_dataframe_rewards = None
        # Iterate over each building with its own dataframe
        for bid, df_ in dataframes_.items():
            # Add to general dataframe
            if general_dataframe is None:
                general_dataframe = df_
            else:
                general_dataframe = general_dataframe.append(df_)

            # Separate summer and not summer data
            df_summer = df_[df_['month'].isin([6.0, 7.0, 8.0, 9.0])].copy(deep=True)
            df_not_summer = df_[df_['month'].isin([1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 11.0, 12.0])].copy(deep=True)
            df_['type_group'] = 'All'
            df_summer['type_group'] = 'Summer'
            df_not_summer['type_group'] = 'Not-Summer'

            # Add information
            df_appened = df_.append(df_summer).append(df_not_summer)

            log_message("=="*20)
            log_message("Working with house "+str(bid))

            # Generation of energy graphs
            #self.helper_method_graph(df_appened, "time", "generation", 'A1-Avg Generation Per Hour-Minute from PV - '+str(bid)+' - '+str(type_cycle), "Time", "Generation [kW]", format_="%H:%M", bid=str(bid),type_cycle=type_cycle)
            
            # Intermittent consumption graphs
            self.helper_method_graph(df_appened, "time", "intermittent", 'A2-Avg Consumption Per Hour-Minute from Intermittent Appliances - '+str(bid)+' - '+str(type_cycle), "Time", "Consumption [kW]", format_="%H:%M", bid=str(bid),type_cycle=type_cycle)
            
            # Total consumption graphs
            self.helper_method_graph(df_appened, "time", "total_demand", 'A3-Avg Total Consumption Per Hour-Minute - '+str(bid)+' - '+str(type_cycle), "Time", "Consumption [kW]", format_="%H:%M", bid=str(bid),type_cycle=type_cycle)
            
            # Net consumption graphs
            #self.helper_method_graph(df_appened, "time", "net_demand", 'A4-Avg Net Demand Per Hour-Minute - '+str(bid)+' - '+str(type_cycle), "Time", "Consumption [kW]", format_="%H:%M", bid=str(bid),type_cycle=type_cycle)
            
            log_message("Done graph training graph over time")

            # Individual part
            # Get the information for tables
            dictionary_table_info = {
                'bid': bid,
                # Monthly Cost in avg
                'cost_monthly_avg': round(df_.groupby('month').sum()['net_cost'].mean(), 4),
                'cost_monthly_std': round(df_.groupby('month').sum()['net_cost'].std(), 4),

                # Daily Cost in avg
                'cost_daily_avg': round(df_.groupby(['month', 'day']).sum()['net_cost'].mean(), 4),
                'cost_daily_std': round(df_.groupby(['month', 'day']).sum()['net_cost'].std(), 4),

                # Intermittent Monthly avg
                'intermittent_monthly_avg': round(df_.groupby('month').sum()['intermittent'].mean(), 4),
                'intermittent_monthly_std': round(df_.groupby('month').sum()['intermittent'].std(), 4),

                # Intermittent Daily avg
                'intermittent_daily_avg': round(df_.groupby(['month', 'day']).sum()['intermittent'].mean(), 4),
                'intermittent_daily_std': round(df_.groupby(['month', 'day']).sum()['intermittent'].std(), 4),

                # Total Consumption Monthly avg
                'total_consumption_monthly_avg': round(df_.groupby('month').sum()['total_demand'].mean(), 4),
                'total_consumption_monthly_std': round(df_.groupby('month').sum()['total_demand'].std(), 4),

                # Total Consumption Daily avg
                'total_consumption_daily_avg': round(df_.groupby(['month', 'day']).sum()['total_demand'].mean(), 4),
                'total_consumption_daily_std': round(df_.groupby(['month', 'day']).sum()['total_demand'].std(), 4),

                # Net Consumption Monthly avg
                'net_consumption_montly_avg': round(df_.groupby('month').sum()['net_demand'].mean(), 4),
                'net_consumption_montly_std': round(df_.groupby('month').sum()['net_demand'].std(), 4),

                # Net Consumption Daily avg
                'net_consumption_daily_avg': round(df_.groupby(['month', 'day']).sum()['net_demand'].mean(), 4),
                'net_consumption_daily_std': round(df_.groupby(['month', 'day']).sum()['net_demand'].std(), 4),
            }

            log_message("Done computing information for table")

            # Graph information from episodes
            # To get reward info to graph accumulative rewards
            data_r = data_rewards[bid]
            # To get info to graph rewards
            list_r_indv = []
            list_r_time_window = []
            list_r_day = []
            list_r_month = []
            for l_r in data_r:
                list_r_indv.extend(l_r)
                list_r_time_window.extend(list(range(1, len(l_r)+1))),

            # To add a column to identify when a day ends
            current_pseudo_day = 0
            current_pseudo_month = 0
            for count_ in range(len(list_r_indv)):
                if count_ % 96 == 0:
                    current_pseudo_day += 1
                if count_ % 2880 == 0:
                    current_pseudo_month += 1
                list_r_day.append(current_pseudo_day)
                list_r_month.append(current_pseudo_month)


            # Create dataframe for individual rewards
            df_indiv_r = pd.DataFrame({
                'time_window': list_r_time_window,
                'day': list_r_day,
                'month': list_r_month,
                'reward': list_r_indv
            })
            df_indiv_r['bid'] = bid

            # Aggregate dataframe to get the average per day
            df_avg_reward_day = df_indiv_r.groupby(['month', 'day']).mean()
            df_avg_reward_day.reset_index(inplace=True)
            df_avg_reward_day['day_'] = list(range(1, len(df_avg_reward_day)+1) )
            # Reward sum per period
            df_sum_reward_day = df_indiv_r.groupby(['month', 'day']).sum().groupby(level=0).cumsum()
            df_sum_reward_day.reset_index(inplace=True)
            df_sum_reward_day['day_'] = list(range(1, len(df_avg_reward_day)+1) )
            # Cumulative sum
            df_cum_sum_reward=df_indiv_r.groupby(['month', 'day']).sum().reset_index().cumsum()

            # Add to general dataframe of rewards
            if general_dataframe_rewards is None:
                general_dataframe_rewards = df_indiv_r
            else:
                general_dataframe_rewards = general_dataframe_rewards.append(df_indiv_r)
            
            # Add information to the dictionary
            # Reward per month
            dictionary_table_info['reward_monthly_avg'] = round(df_indiv_r.groupby('month')['reward'].sum().mean(), 4)
            dictionary_table_info['reward_monthly_std'] = round(df_indiv_r.groupby('month')['reward'].sum().std(), 4)
            # Reward per day 
            dictionary_table_info['reward_daily_avg'] = round(df_indiv_r.groupby('day')['reward'].sum().mean(), 4)
            dictionary_table_info['reward_daily_std'] = round(df_indiv_r.groupby('day')['reward'].sum().std(), 4)

            # Graph evolution of reward
            ##self.helper_method_graph(df_cum_sum_reward, "day", "reward", 'C1-Cumulative Sum Reward per Day - '+str(bid)+' - '+str(type_cycle), "Day", "Reward", bid=str(bid),hue=None, estimator=None,type_cycle=type_cycle)
            #self.helper_method_graph(df_sum_reward_day, "day_", "reward", 'C2-Sum Reward per Day - '+str(bid)+' - '+str(type_cycle), "Day", "Reward", bid=str(bid),hue=None, estimator=None,type_cycle=type_cycle)
            #self.helper_method_graph(df_avg_reward_day, "day_", "reward", 'C3-Avg Reward per Day - '+str(bid)+' - '+str(type_cycle), "Day", "Reward", bid=str(bid),hue=None, estimator=None,type_cycle=type_cycle)
            ##self.helper_method_graph(df_indiv_r, "month", "reward", 'C4-Sum Reward Per Episode - '+str(bid)+' - '+str(type_cycle), "Episode", "Reward", bid=str(bid), estimator=sum,hue=None,type_cycle=type_cycle)
            # Only graph when training
            if type_cycle == "train":
                self.helper_method_graph(df_indiv_r, "month", "reward", 'C5-Avg Reward Per Episode - '+str(bid)+' - '+str(type_cycle), "Episode", "Reward", bid=str(bid), hue=None,type_cycle=type_cycle)


            # Add to list
            list_table_info.append(dictionary_table_info) 
            log_message("Done reward per day")

        # Graph the general ones
        general_dataframe['type_group'] = 'Total'
        df_summer = general_dataframe[general_dataframe['month'].isin([6.0, 7.0, 8.0, 9.0])].copy(deep=True)
        df_not_summer = general_dataframe[general_dataframe['month'].isin([1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 11.0, 12.0])].copy(deep=True)
        df_summer['type_group'] = 'Summer'
        df_not_summer['type_group'] = 'Not-Summer'

        # Add inforamtion
        df_appened = general_dataframe.append(df_summer).append(df_not_summer)

        log_message("=="*20)
        log_message("Working with general")

        # Generation of energy graphs
        ##self.helper_method_graph(df_appened, "time", "generation", 'A1-Avg Generation Per Hour-Minute from PV - '+str(type_cycle), "Time", "Generation [kW]", format_="%H:%M",type_cycle=type_cycle)
        
        # Intermittent consumption graphs
        self.helper_method_graph(df_appened, "time", "intermittent", 'A2-Avg Consumption Per Hour-Minute from Intermittent Appliances'+type_cycle, "Time", "Consumption [kW]", format_="%H:%M",type_cycle=type_cycle)
        
        # Total consumption graphs
        self.helper_method_graph(df_appened, "time", "total_demand", 'A3-Avg Total Consumption Per Hour-Minute - '+str(type_cycle), "Time", "Consumption [kW]", format_="%H:%M",type_cycle=type_cycle)
        
        # Net consumption graphs
        #self.helper_method_graph(df_appened, "time", "net_demand", 'A4-Avg Net Demand Per Hour-Minute - '+str(type_cycle), "Time", "Consumption [kW]", format_="%H:%M",type_cycle=type_cycle)
        
        log_message("Done graph training graph over time")

        # Get the information for tables
        dictionary_table_info = {
            'bid': 'general',
            # Monthly Cost in avg
            'cost_monthly_avg': round(general_dataframe.groupby('month').sum()['net_cost'].mean(), 4),
            'cost_monthly_std': round(general_dataframe.groupby('month').sum()['net_cost'].std(), 4),

            # Daily Cost in avg
            'cost_daily_avg': round(general_dataframe.groupby(['month', 'day']).sum()['net_cost'].mean(), 4),
            'cost_daily_std': round(general_dataframe.groupby(['month', 'day']).sum()['net_cost'].std(), 4),

            # Intermittent Monthly avg
            'intermittent_monthly_avg': round(general_dataframe.groupby('month').sum()['intermittent'].mean(), 4),
            'intermittent_monthly_std': round(general_dataframe.groupby('month').sum()['intermittent'].std(), 4),

            # Intermittent Daily avg
            'intermittent_daily_avg': round(general_dataframe.groupby(['month', 'day']).sum()['intermittent'].mean(), 4),
            'intermittent_daily_std': round(general_dataframe.groupby(['month', 'day']).sum()['intermittent'].std(), 4),

            # Total Consumption Monthly avg
            'total_consumption_monthly_avg': round(general_dataframe.groupby('month').sum()['total_demand'].mean(), 4),
            'total_consumption_monthly_std': round(general_dataframe.groupby('month').sum()['total_demand'].std(), 4),

            # Total Consumption Daily avg
            'total_consumption_daily_avg': round(general_dataframe.groupby(['month', 'day']).sum()['total_demand'].mean(), 4),
            'total_consumption_daily_std': round(general_dataframe.groupby(['month', 'day']).sum()['total_demand'].std(), 4),

            # Net Consumption Monthly avg
            'net_consumption_montly_avg': round(general_dataframe.groupby('month').sum()['net_demand'].mean(), 4),
            'net_consumption_montly_std': round(general_dataframe.groupby('month').sum()['net_demand'].std(), 4),

            # Net Consumption Daily avg
            'net_consumption_daily_avg': round(general_dataframe.groupby(['month', 'day']).sum()['net_demand'].mean(), 4),
            'net_consumption_daily_std': round(general_dataframe.groupby(['month', 'day']).sum()['net_demand'].std(), 4),

            # Reward per month
            'reward_monthly_avg': round(general_dataframe_rewards.groupby('month')['reward'].sum().mean(), 4),
            'reward_monthly_std': round(general_dataframe_rewards.groupby('month')['reward'].sum().std(), 4),

            # Reward per day
            'reward_daily_avg': round(general_dataframe_rewards.groupby('day')['reward'].sum().mean(), 4),
            'reward_daily_std': round(general_dataframe_rewards.groupby('day')['reward'].sum().std(), 4)
        }

        log_message("Done computing information for table")

        # Add to list
        list_table_info.append(dictionary_table_info)

        return list_table_info, general_dataframe, general_dataframe_rewards

    def graph_performance(self, specific_bid=None,is_training=False, data_rewards=None):
        """Create the graphs using the grouping data method and the graph one

        Args:
            specific_bid (str, optional): Used to graph just one bulding. Defaults to None.
            is_training (bool, optional): Seperate flows of training and testing. Defaults to False.
            data_rewards (dict, optional): Reward information per period. Defaults to None.
        """
        # To graph elements in the environment
        # list to save the inforamtion to graph
        graphs_ = {}
        dataframes_ = {} 
        for bid, state_l in self.dictionary_history_state_objects.items():
            if bid not in graphs_:
                # Init all the list
                graphs_[bid] = {}
                graphs_[bid]['intermittent'] = []
                graphs_[bid]['continuous'] = []
                graphs_[bid]['generation'] = []
                graphs_[bid]['total_demand'] = []
                graphs_[bid]['net_demand'] = []
                graphs_[bid]['net_cost'] = []
                graphs_[bid]['datetime'] = []
                graphs_[bid]['date'] = []
                graphs_[bid]['time'] = []
                graphs_[bid]['month'] = []
                graphs_[bid]['day'] = []
                graphs_[bid]['hour'] = []
                graphs_[bid]['minute'] = []

                graphs_[bid]['grid'] = []
                graphs_[bid]['price'] = []

            # Add information to the lists
            for state_ in state_l:
                graphs_[bid]['intermittent'].append(state_.demand_intert)
                graphs_[bid]['continuous'].append(state_.demand_contin)
                graphs_[bid]['generation'].append(state_.generation)
                graphs_[bid]['total_demand'].append(state_.total_demand)
                graphs_[bid]['net_demand'].append(state_.net_demand)
                graphs_[bid]['net_cost'].append(state_.net_cost)

                graphs_[bid]['grid'].append(state_.total_grid_demand)
                graphs_[bid]['price'].append(state_.current_price)

                graphs_[bid]['month'].append(state_.state_appliances.month)
                graphs_[bid]['day'].append(state_.state_appliances.day)
                graphs_[bid]['hour'].append(state_.state_appliances.hour)
                graphs_[bid]['minute'].append(state_.state_appliances.minute)
                # Format the datetime
                string_datetime = f'{int(state_.state_appliances.day):02d}'+"-"+f'{int(state_.state_appliances.month):02d}'+"-2018 " \
                    + f'{int(state_.state_appliances.hour):02d}'+":"+f'{int(state_.state_appliances.minute):02d}'+""

                string_date = f'{int(state_.state_appliances.day):02d}'+"-"+f'{int(state_.state_appliances.month):02d}'+"-2018" 
                string_time = f'{int(state_.state_appliances.hour):02d}'+":"+f'{int(state_.state_appliances.minute):02d}'+""
                datetime_object = datetime.datetime.strptime(string_datetime, "%d-%m-%Y %H:%M")
                date_object = datetime.datetime.strptime(string_date, "%d-%m-%Y")
                time_object = datetime.datetime.strptime(string_time, "%H:%M")

                graphs_[bid]['datetime'].append(datetime_object)
                graphs_[bid]['date'].append(date_object)
                graphs_[bid]['time'].append(time_object)

        # Create dataframe
        datetimes_ = {}
        times_ = {}
        days_ = {}
        months_ = {}
        # For each created list on a bulding, group them and create a dataframe
        for bid, lists_ in graphs_.items():
            plot_df = pd.DataFrame({
                'datetime': lists_['datetime'],
                'date': lists_['date'],
                'time': lists_['time'],
                'month': lists_['month'],
                'day': lists_['day'],
                'hour': lists_['hour'],
                'minute': lists_['minute'],
                'intermittent': lists_['intermittent'],
                'continous': lists_['continuous'],
                'generation': lists_['generation'],
                'total_demand':  lists_['total_demand'],
                'net_demand': lists_['net_demand'],
                'net_cost': lists_['net_cost'],

                'grid': lists_['grid'],
                'price': lists_['price'],
            })
            # Add dataframe to dictionary
            dataframes_[bid] = plot_df
            datetimes_[bid] = plot_df['datetime'].tolist()
            times_[bid] = plot_df['time'].tolist()
            days_[bid] = plot_df['day'].tolist()
            months_[bid] = plot_df['month'].tolist()

        # Set style and palette for seaborn
        sns.set_style("darkgrid")
        sns.color_palette("colorblind")

        # When it is training
        if is_training:
            self.dataframes_graphs_training = dataframes_
            type_cycle = "train"
            self.list_table_info_training, self.dataframe_general_training, self.general_dataframe_rewards_training = self.helper_group_data_graph(dataframes_, data_rewards, type_cycle) 
            # Save the dictonary for table
            current_mn = self.model_name
            if self.model_name == "Random":
                current_mn = "Normal"
            with open('graphs/'+current_mn+'/table results '+self.model_name+' '+type_cycle+'.json', 'w') as data_json:
                json.dump(self.list_table_info_training, data_json, indent=3)
        # When it is testing
        else:
            self.dataframes_graphs_test = dataframes_
            type_cycle = "test"
            self.list_table_info_test, self.dataframe_general_test, self.general_dataframe_rewards_testing  = self.helper_group_data_graph(dataframes_, data_rewards, type_cycle) 
            # Save the dictonary for table
            current_mn = self.model_name
            if self.model_name == "Random":
                current_mn = "Normal"
            with open('graphs/'+current_mn+'/table results '+self.model_name+' '+type_cycle+'.json', 'w') as data_json:
                json.dump(self.list_table_info_test, data_json, indent=3)

In [None]:
class Reward:
    """Class to manage the reward calculation
    """
    def __init__(self, agents_number:int, building, encoder_used_price):
        """Class constructor`

        Args:
            agents_number (int): Number of agents in used
            building (object): Current builing
            encoder_used_price (object): Encoder for price
        """
        # Most of these variable were not used at the end
        self.agents_number = agents_number
        self.building = building
        self.encoder_used_price = encoder_used_price
        
    def get_rewards(self, info_stages_array: list):
        """Get the rewards of a group of buldings

        Args:
            info_stages_array (list): InfoStage objects corresponding to a building

        Returns:
            tuple: List and dictionary of the rewards gotten out of the buildings
        """
        # Init variables
        list_rewards_per_building = []
        dictionary_rewards_per_building = {}
        # For each stage in the given list
        for stage in info_stages_array:
            # Get the rward of a building
            building_reward = self.get_reward(stage)
            # Add it to the data structures
            list_rewards_per_building.append(building_reward)
            dictionary_rewards_per_building[stage.bid] = building_reward
        #return list_rewards_per_building
        return list_rewards_per_building, dictionary_rewards_per_building
    
    # Get the defined reward based on the appliance and type
    # This should be per building
    def get_reward(self, info_stage: InformationStage):
        """Get the reward of a building

        Args:
            info_stage (InformationStage): InformationStage corresponding to a building

        Returns:
            float: Reward gotten
        """
        # Get current state
        current_state = info_stage.states[-1]
        # Rewards variables
        r_cost_control = 0.0
        r_energy_consumption = 0.0
        r_shift_load = 0.0
        r_usage = 0.0

        # Constant variables from defition
        x = 2.5 #0.0
        a = 1.5 #0.0
        y = 1.75 #0.0
        b = 20.0 #1.0

        # ********* Cost Control *********
        current_accum_approx_cost = info_stage.cost_approx_history[-1]
        current_accum_cost = info_stage.cost_history[-1]
        if current_accum_approx_cost < current_accum_cost:
            r_cost_control = 4.0*x*abs(current_accum_approx_cost - current_accum_cost)
        else:
            r_cost_control = -4.0*a*abs(current_accum_approx_cost - current_accum_cost)
        
        # ********* Energy Consumption *********
        last_approx_max = info_stage.consumption_last_appliances_max_approx[-1]
        last_max = info_stage.consumption_last_appliances_max[-1]
        if last_approx_max < last_max:
            r_energy_consumption = y*abs(last_approx_max - last_max)
        else:
            r_energy_consumption = -a

        # ********* Shift Load Towards small price *********
        load_current_type = current_state.dict_price_metadata['Type']
        # Enconde the two possibilites we want to give a negative reward
        on_peak_code_1 = self.encoder_used_price['Type'].transform(
            ["On-Peak"]
        ).astype(int).tolist()[0]
        # Give a negative reward when it is on peak time-window
        if load_current_type == on_peak_code_1: #or load_current_type == on_peak_code_2:
            r_shift_load = -2.0*(x*a)#/2.0
        else:
            # Else give a positive reward
            r_shift_load = 2.0*x

        # ********* Usage Control *********
        for name, appliance in current_state.dictionary_of_appliances.items():
            current_usages_app = info_stage.count_usage_appliances[name]
            
            # Electric car
            if isinstance(appliance, Car):
                if current_usages_app > appliance.max_charges:
                    r_usage +=  (-1.0*current_usages_app)/b
                elif current_usages_app < appliance.max_charges:
                    r_usage += abs(current_usages_app - appliance.max_charges)/b
                else:
                    r_usage += current_usages_app/b
            # Lights
            elif hasattr(appliance, 'type_usage') and appliance.type_usage == 'light': 
                if current_usages_app > appliance.max_usages:
                    r_usage +=  (-1.0*current_usages_app*15.0)/(b*7.0)
                elif current_usages_app < appliance.max_usages:
                    r_usage += (abs(current_usages_app - appliance.max_usages)*15.0)/(b*7.0)
                else:
                    r_usage += current_usages_app*15.0/(b*7.0)
            elif isinstance(appliance, IntermittentAppliance):
                # Rest of appliances - Cleaning and Kitchen
                if current_usages_app > appliance.max_usages:
                    r_usage +=  (-1.0*current_usages_app)/b
                elif current_usages_app > appliance.min_usages and current_usages_app < appliance.max_usages:
                    r_usage += (abs(current_usages_app - appliance.max_usages))/b
                elif current_usages_app == appliance.max_usages:
                    r_usage += current_usages_app/b
                elif current_usages_app < appliance.min_usages:
                    r_usage += (-1.0*current_usages_app)/b
                elif current_usages_app == appliance.min_usages:
                    r_usage += current_usages_app/(1.5*b)

        total_reward_building = r_cost_control + r_energy_consumption + r_shift_load + r_usage
        # In case something has gone wrong
        if total_reward_building > 100:
            print("r_cost_control", r_cost_control)
            print("r_energy_consumption", r_energy_consumption)
            print("r_shift_load", r_shift_load)
            print("r_usage", r_usage)
            print("total_reward_building", total_reward_building)
            print("*"*20)
        return total_reward_building

In [None]:
class Building:
    """Class to represent the building
    """
    # Constructor
    def __init__(
        self, 
        id_building,
        metadata,
        air_compressor, 
        bathroom, 
        car, 
        washer, 
        dish_washer, 
        garbage_disposal, 
        dryer, 
        kitchen,
        microwave, 
        oven, 
        refrigerator,
        solar_pv,
        livingroom,
        dinningroom,
        freezer,
        water_heater,
        list_appliances = [],
        type_building = "house"
    ):
        """Class constructor

        Args:
            id_building (bool): Differentiate when it is a home or building
            metadata (dict): Metadata of the building
            air_compressor (object): Object representing the appliance air
            bathroom (object):  Object representing a room + bathroom
            car (object):  Object representing the car
            washer (object): Object representing the washer
            dish_washer (object):  Object representing the  dish washer
            garbage_disposal (object):  Object representing the  garbage disposal
            dryer (object):  Object representing the  Object representing the  dryer
            kitchen (object):  Object representing the kitchen
            microwave (object):  Object representing the microwave
            oven (object):  Object representing the oven
            refrigerator (object):  Object representing the refrigerator
            solar_pv (object):  Object representing the PV
            livingroom (object):  Object representing the room - livingroom
            dinningroom (object):  Object representing the room - dinningroom
            freezer (object):  Object representing the freezer
            water_heater (object):  Object representing the  water heater
            list_appliances (list, optional): Name of the columns of appliance to consider. Defaults to [].
            type_building (str, optional): Type of the building. Defaults to "house".
        """
        # Metadata
        self.type_building = type_building
        self.id_building = id_building
        self.metadata = metadata
        
        # Appliances
        self.air_compressor = air_compressor
        self.bathroom = bathroom
        self.car = car
        self.washer = washer
        self.dish_washer = dish_washer
        self.garbage_disposal = garbage_disposal
        self.dryer = dryer
        self.kitchen = kitchen
        self.microwave = microwave
        self.oven = oven
        self.refrigerator = refrigerator
        self.solar_pv = solar_pv
        self.livingroom = livingroom
        self.dinningroom = dinningroom
        self.freezer = freezer
        self.water_heater = water_heater
        # Simulated human behaviour
        self.humans_in_house = []
        
        # Appliances and consumption variables
        self.list_appliances = list_appliances
        self.time_step = 0
        self.total_electric_consumption = 0.0
        self.total_electric_consumption_detail = []
        self.observation_space = None
        self.action_space = None
        self.time_action = 0
        self.simulation = {}
        
        # Division of consumption
        self.electric_consumption_continous_appliances = []
        self.electric_consumption_intermittent_appliances = []
        self.electric_generation_from_pv = []
        
        # Set the min and max of states
        self.state_mins = None
        self.state_maxs = None
        self.list_states = []
        self.str_status = "Init"
        
        # Init variables for grouped consumption
        self.current_net_electricity_demand = 0.0
        self.current_consumption = 0.0
        self.current_constant_consumption = 0.0
        self.current_intermittent_consumption = 0.0
        self.current_electric_generation = 0.0
        self.current_grid_consumption = 0.0
        self.consider_human = False
        self.list_intermittent_appliances = []
        self.consumptions_intermittents = {}

        # List to manage appliances and human behaviour
        self.clean_list_appliances = []
        self.array_decisions_human = []

    def set_human_decision(self, consider_human = False, age_habitant = 25):
        """Set the simulated human behaviour

        Args:
            consider_human (bool, optional): To know when the human behaviour should be consider. Defaults to False.
            age_habitant (int, optional): Age of the simulated human. Defaults to 25.
        """
        self.consider_human = consider_human
        self.human_decision = Human(age=age_habitant)
        
    def get_list_appliances(self):
        """Get the list of appliances that are actually in the house

        Returns:
            list: Clean list of the appliances in the house
        """
        # Execute when the clean version is empty
        if len(self.clean_list_appliances) == 0:
            clean_list = []
            # Check when there is dat about the appliance
            for appliance in self.list_appliances:
                min_ = min(self.simulation[appliance])
                max_ = max(self.simulation[appliance])
                # if min and max are nan then the list is full of nans
                if (math.isnan(min_) and math.isnan(max_)) == False:
                    clean_list.append(appliance)
            # Set as property of the class
            self.clean_list_appliances = clean_list
            return clean_list
        else:
            return self.clean_list_appliances
        
    def set_state_space(self, high_state:int, low_state:int):
        """Set the box of states.
         The state space of a bulding is based on the data observed in the past

        Args:
            high_state (int): Max state
            low_state (int): Min state
        """
        self.observation_space = gym.spaces.Box(low=low_state, high=high_state, dtype=float)
    
    def set_action_space(self, action_high:int, action_low:int):
        """Set the box of the actions
        The action space of a bulding is just 1 to turn on the appliance or 0 to turn it off

        Args:
            action_high (int): Max action
            action_low (int): Min Action
        """
        self.action_space = gym.spaces.Box(low=action_low, high=action_high, dtype=float)
        
    def init_vars(self):
        """Reset the variables of the building
        """
        # Reset current grouped variables
        self.current_net_electricity_demand = 0
        self.current_consumption = 0.0
        self.current_constant_consumption = 0.0
        self.current_intermittent_consumption = 0.0
        self.current_electric_generation = 0.0
        self.current_grid_consumption = 0.0
        # total load - solar genearation
        self.total_electric_consumption = 0.0

        # Reset lists
        self.total_electric_consumption_detail = []
        self.electric_consumption_continous_appliances = []
        self.electric_consumption_intermittent_appliances = []
        self.electric_generation_from_pv = []
        
        self.total_consumption = []
        self.electric_generation = []
        self.constant_consumption = []
        self.intermittent_consumption = []
        self.net_demand = []
        
        # Reset lists of totals
        self.total_consumption_ = 0.0
        self.electric_generation_ = 0.0
        self.consumption_constant_ = 0.0
        self.consumption_intermitent_ = 0.0
        self.net_demand_ = 0.0
        # Reset dictionaries
        self.consumptions_intermittents  = {}
        # Get the intermittent appliances
        self.get_intermittedent_appliances()
        
    def terminate(self):
        """Set the last actions when the iteration are over
        """
        self.str_status = "End"
    
    def get_mins(self):
        """Get the minium value per appliance

        Returns:
            list: Min values of each appliance
        """
        # Copy the dataframe
        sub_df = self.state_mins.copy(deep=True)
        # Set consumption of the appliances to zero
        for appliance in self.list_appliances:
            sub_df[appliance] = 0
        # For all the columns where the column is not data id, get the first element and add it to a list
        return list(sub_df.loc[:, self.state_mins.columns != 'dataid'].iloc[0])
    
    def get_maxs(self):
        """Get the max value per appliance

        Returns:
            list: Max values of each appliance
        """
        # For all the columns where the column is not data id, get the first element and add it to a list
        return list(self.state_maxs.loc[:, self.state_maxs.columns != 'dataid'].iloc[0])
    
    def get_object_list_appliances(self):
        """Get the object list of the appliances.
        This considers only those on the clean list

        Returns:
            tuple: List of object and the name of the objects
        """
        # Get the clean list
        clean_list_appliances = self.get_list_appliances()
        list_object_appliances = []
        # For each appliance in the clean list
        for appliance in clean_list_appliances:
            # Get the instance of the object
            obj = instance_object_appliance(appliance, 0.0)
            list_object_appliances.append(obj)
        return list_object_appliances, clean_list_appliances

    def get_intermittedent_appliances(self):
        """Get the name of the appliances that are intermmitent

        Returns:
            list: Name of the appliances
        """
        list_app = []
        self.list_intermittent_appliances = []
        # Get the objects and names of the appliances
        list_object_appliances, clean_list_appliances = self.get_object_list_appliances()
        for index, appl in enumerate(list_object_appliances):
            # Select those that are intermittent
            if isinstance(appl, IntermittentAppliance):
                list_app.append(appl.__class__.__name__)
                self.list_intermittent_appliances.append(appl.name)        
        return list_app
    
    def get_consumptions_for_information(self):
        """Summary of the consumption of the building
        """
        # Init list
        total_consumption = []
        electric_generation = []
        constant_consumption = []
        intermittent_consumption = []
        net_demand = []
        
        # Init the grouped sums
        total_consumption_ = 0.0
        electric_generation_ = 0.0
        consumption_constant_ = 0.0
        consumption_intermitent_ = 0.0
        net_demand_ = 0.0
        
        # Get the appliances
        list_object_appliances, clean_list_appliances = self.get_object_list_appliances()
        # Get the consumption grouped from the appliances
        for index, appl in enumerate(list_object_appliances):
            # Check if it is the solar generation 
            if isinstance(appl, SolarPV):
                electric_generation_ += sum(self.simulation[clean_list_appliances[index]])
                # If it is not the first iteration add
                if len(electric_generation) > 0:
                    electric_generation = np.add(
                        electric_generation, 
                        self.simulation[clean_list_appliances[index]]
                    )
                else:
                    electric_generation = self.simulation[clean_list_appliances[index]]
                continue
            # If it is the grid just skip since we cannot work over it
            elif isinstance(appl, Grid):
                continue
            # Check continuos or intermittent
            if isinstance(appl, ContinuousAppliance):
                consumption_constant_ += sum(self.simulation[clean_list_appliances[index]])
                # If it is not the first iteration add
                if len(constant_consumption) > 0:
                    constant_consumption = np.add(
                        constant_consumption, 
                        self.simulation[clean_list_appliances[index]]
                    )
                else:
                    constant_consumption = self.simulation[clean_list_appliances[index]]
            elif isinstance(appl, IntermittentAppliance):
                consumption_intermitent_ += sum(self.simulation[clean_list_appliances[index]])
                # If it is not the first iteration add
                if len(intermittent_consumption) > 0:
                    intermittent_consumption = np.add(
                        intermittent_consumption, 
                        self.simulation[clean_list_appliances[index]]
                    )
                else:
                    intermittent_consumption = self.simulation[clean_list_appliances[index]]
        # Get total consumption
        total_consumption_ = consumption_constant_ + consumption_intermitent_
        total_consumption = np.add(constant_consumption, intermittent_consumption)
        
        # Get net demand - use - since generation is negative
        net_demand_ = total_consumption_ + electric_generation_
        # Substract the solar generation when possible
        if len(electric_generation) > 0:
            net_demand =  np.subtract(total_consumption, electric_generation)
        else:
            net_demand = total_consumption
        
        # Set the values
        self.total_consumption = total_consumption
        self.electric_generation = electric_generation
        self.constant_consumption = constant_consumption
        self.intermittent_consumption = intermittent_consumption
        self.net_demand = net_demand
        # Set the numeric values too
        self.total_consumption_ = total_consumption_
        self.electric_generation_ = electric_generation_
        self.consumption_constant_ = consumption_constant_
        self.consumption_intermitent_ = consumption_intermitent_
        self.net_demand_ = net_demand_
                
    def get_consumption(self, step, actions, is_init=False, consider_model=True):
        """Get the consumption in a step considering an action

        Args:
            step (int): Current step
            actions (int): Type of action
            is_init (bool, optional): When is the first step. Defaults to False.
            consider_model (bool, optional): Separate flows when the model is considered. Defaults to True.

        Returns:
            tuple: Values of the consumptions, constant consumptions, intermittent consumptions, PV and grid
        """
        # Get the current state
        try:
            current_state = self.list_states[step]
        except:
            print("There was an error. Please stop the execution")
            print("bid", self.id_building)
            print("self.list_states", len(self.list_states))
            print("step", step)

        # Init variables
        consumption_ = 0.0
        consumption_constant = 0.0
        consumption_intermitent = 0.0
        electric_generation = 0.0
        grid_consumption = 0.0
        

        # Check that the number of action is the same as the list of intermittent appliances
        assert len(self.list_intermittent_appliances) == len(actions), "Actions must be of the same length of intermmittent appliances "+str(len(self.list_intermittent_appliances))+", "+str(len(actions))

        # Consider the simulated human decision
        if self.consider_human == True and is_init == False:
            # Get the decision array per action
            array_decisions = self.human_decision.get_decision(
                current_state.weather['temperature'], 
                current_state.weather['wind'],
                current_state.hour,
                len(actions)
            )
            self.array_decisions_human = array_decisions
        else:
            self.array_decisions_human = np.zeros(len(actions))
        
        # Use the actions over the intermittent appliances only
        consumptions_intermittents_ = []
        consumptions_intermittents_dict_ = {}
        for action, name_appliance_intermmitent, decision_human in zip(actions, self.list_intermittent_appliances, self.array_decisions_human ):
            # Get the appliance from the current state
            appliance = current_state.dictionary_of_appliances[name_appliance_intermmitent]
            assert isinstance(appliance, IntermittentAppliance), "Appliace found as not intermmittent "+str(name_appliance_intermmitent)+" "
            
            appliance_consumption_current = appliance.energy_consumed(appliance.consumption, action, consider_model=consider_model, decision_h=decision_human)
            # Update the consumption on the appliance int he state
            appliance.consumption = appliance_consumption_current
            consumptions_intermittents_.append(appliance_consumption_current)
            consumptions_intermittents_dict_[name_appliance_intermmitent] = appliance_consumption_current
            consumption_intermitent += appliance_consumption_current
            consumption_ += appliance_consumption_current
        self.consumptions_intermittents[step] = {
            'list_': consumptions_intermittents_,
            'dict_': consumptions_intermittents_dict_
        }

        # Get the consumption and generation of the rest of appliances
        for appliance_name, appliance in current_state.dictionary_of_appliances.items():
            # Check if it is the solar generation 
            if isinstance(appliance, SolarPV):
                generation_ = appliance.generation
                if (appliance.generation > 0):
                    generation_ = -1.0*generation_
                electric_generation += generation_#appliance.generation
                continue
            # If it is the grid just skip since we cannot work over it
            elif isinstance(appliance, Grid):
                grid_consumption += appliance.consumption
                continue
            
            # Since it's  a continuos appliance it will always on {1.0}
            appliance_consumption_current = appliance.energy_consumed(appliance.consumption, 1.0, consider_model=consider_model, decision_h=0)
            if isinstance(appliance, ContinuousAppliance):
                consumption_constant += appliance_consumption_current
                consumption_ += appliance_consumption_current
        
        # Set class consumptions
        self.current_consumption = consumption_
        self.current_constant_consumption = consumption_constant
        self.current_intermittent_consumption = consumption_intermitent
        self.current_electric_generation = electric_generation
        self.current_grid_consumption = grid_consumption

        return consumption_, consumption_constant, consumption_intermitent, electric_generation, grid_consumption

In [None]:
class RandomAgent:
    """Random agent class to define an agent that will take random decisions
    """
    def __init__(self, actions_spaces:list):
        """Class constructor

        Args:
            actions_spaces (list): List of possible actions
        """
        self.action_spaces = actions_spaces
        self.action_tracker = []
        self.size_actions = -1
    
    def select_action(self, states:list, states_obj:list):
        """Select random agents of a group of buldings (not used)

        Args:
            states (list): List of States
            states_obj (list): List of States objects

        Returns:
            tuple: Tuple of actions and control variables (not used)
        """
        # Random Actions per building
        actions_ = np.random.choice(self.action_spaces, self.size_actions)
        return actions_, None
    
    def select_action_one(self, state_obj):
        """Select an action for just one building

        Args:
            state_obj (State): Current state of the bulding

        Returns:
            list: Actions
        """
        actions_ = np.random.choice(self.action_spaces, len(state_obj.state_appliances.intermittent_appliances))
        return actions_

# 
class RandomCoordinator:
    """Class for the Coordinator for Random Agents
    """
    def __init__(self, building_list_dictionary:dict, env:Environment, epocs:int):
        """Class constructor

        Args:
            building_list_dictionary (dict): Dictionary with builidng id and list of states
            env (Environment): Environment in use
            epocs (int): Number of epocs to consider
        """
        self.agents = {}
        self.enviroment = env
        # Set an agent for each building
        for bid, building in building_list_dictionary.items():
            random_agent = RandomAgent([0.0, 1.0])
            # Get intermittent appliances
            appliances_action = building.get_intermittedent_appliances()
            random_agent.size_actions = len(appliances_action)
            self.agents[bid] = random_agent
        self.list_reward = []
        self.epocs = epocs

    def call_agents(self):
        """Class to call the agents. These agents do not train
        so this method just call them
        """
        # Set init variables
        self.dictionary_list_reward = {}
        dictionary_list_reward = {}
        list_reward = []
        count_control = 0
        done = False
        # Get the first state of the environment
        state, state_obj = self.enviroment.init_vars()
        while not done:
            # Get the Actions from each agent
            actions = {}
            for bid, state_env in state_obj.items():
                # Get the agent
                current_agent = self.agents[bid]
                action, _ = current_agent.select_action(None, state_env)
                actions[bid] = action
            # Get the result from applying the actions to the env
            next_state, next_state_obj, reward, done, _ = self.enviroment.step(actions)

            # Change variables from prev to current state
            state = next_state
            state_obj = next_state_obj
            # Add reward history
            for bid, r in reward.items():
                if not bid in dictionary_list_reward:
                    dictionary_list_reward[bid] = []
                dictionary_list_reward[bid].append(r)
            list_reward.append(reward)
            count_control += 1
            if count_control % 50 == 0:
                log_message("Count: "+str(count_control))
                
        self.list_reward  = list_reward
        # Add value of reward to dictionary general
        for key, value_list in dictionary_list_reward.items():
            # If key does not exists, then add it as empty
            if key not in self.dictionary_list_reward:
                self.dictionary_list_reward[key] = []
            # Append the new list to the historical one
            self.dictionary_list_reward[key].append(value_list)
        log_message("Out of loop")

    # To create the graphs from the environment
    def create_graphs(self, is_training=True):
        """Generate the graphs

        Args:
            is_training (bool, optional): Control the flow for training. In this case it is not actually use. Defaults to True.
        """
        if is_training == True:
            self.enviroment.graph_performance(is_training=is_training, data_rewards=self.dictionary_list_reward)
        else:
            self.enviroment.graph_performance(is_training=is_training, data_rewards=self.dictionary_list_reward)

In [None]:
# Code based from: 
#   Simonini, Thomas. [internet]. Q* Learning with FrozenLake. 2019. [cited 2021 November 20]. 
#   Available from: https://github.com/simoninithomas/Deep_reinforcement_learning_Course/blob/master/Q%20learning/FrozenLake/Q%20Learning%20with%20FrozenLake.ipynb
class QLearningAgent:
    """Class to represent the Q-Learning agent using Q-Table
    """
    def __init__(self, actions:int, list_state:list, list_elements:list, building_of_appliances:list):
        """Class constructor for the agent

        Args:
            actions (list): How many actions
            list_state (list): List of appliances over which the agent can work
            list_elements (list): How many element to consider into a state
            building_of_appliances (list): All the appliances in the bulding
        """
        # actions = possible values of actions (1,0)
        # list_state = values that compose an state
        # list_elements = values that conform an action (i,e. intermittetent appliances)
        # building_of_appliances all the appliances in the building (list_of_appliances)
        self.actions = actions # How many actions [1,0]
        self.list_elements = list_elements # How many appliances over which we can work
        self.list_state = list_state # How many elements consider a state
        #self.action_size = len(actions)
        n = len(self.list_elements)
        m = len(self.list_state) + len(self.list_elements)

        # The possible states are all the possible combinations of the actions on/off
        #   over all the appliances
        self.possible_states = list(map(list, itertools.product(actions, repeat=m)))
        # Dictionary version of the possible states
        self.possible_states_dictionary = {}
        for index_ap, state_possible in enumerate(self.possible_states):
            self.possible_states_dictionary[tuple(state_possible)] = index_ap
        # The actions, as well as the states, are the combinations of turning on/off
        #   the corresponding appliances
        self.possible_actions = list(map(list, itertools.product(actions, repeat=n)))
        # Dictionary version of the possible actions
        self.possible_actions_dictionary = {}
        for index_ap, acction_possible in enumerate(self.possible_actions):
            self.possible_actions_dictionary[tuple(acction_possible)] = index_ap
        self.state_size = len(self.possible_states)
        self.action_size = len(self.possible_actions)
        # Create the qtable
        print("Possible shape zeros", self.state_size, self.action_size)
        #print("**"*10)
        self.qtable = np.zeros((self.state_size, self.action_size),dtype='float16')
        # Hyperparameters
        # Total episodes
        self.total_episodes = 15000
        # Learning rate
        self.learning_rate = 0.1 #0.01 # 0 = agents consider only prior knowledge, 1 = only most recent
        # Max steps per episode
        self.max_steps = 99
        # Discouting factor
        self.gamma = 0.95 # 0 = inmmediate priority, 1 = eval on future

        # Exploration params
        # Exploration rate
        self.epsilon = 1.0
        # Exploration prob at start
        self.max_epsilon = 1.0
        # Min exploration prob
        self.min_epsilon = 0.01
        # Exponetial decay rate for exploration prob
        self.decay_rate = 0.01 #0.005 # how longer is the jump to find the optimal policy.

        self.epsilon_decay = 200000#250000#200000#250000#80000#7500 #7000 = 100 #37000 ==> 250000
        self.steps_done = 0

        # This is to ensure the usage of the same appliances as when training
        self.building_of_appliances = building_of_appliances

        self.history_suggestions_train = []
        self.history_suggestions_test = []

    def select_action(self, state_obj, step = 1, is_test=False):
        """Select an action based on Q-Learning algorithm

        Args:
            state_obj (State): Current state
            step (int, optional): Current step. Defaults to 1.
            is_test (bool, optional): To seperate flow whe is testing. Defaults to False.

        Returns:
            list: Actions to be taken on building
        """
        # Get a discrete version of the state columns
        state_discrt, xx = state_obj.get_list_qlearning(self.list_state)
        state_discrt = state_discrt.astype(int).tolist()
        # Get Index of the state in the QTable
        try:
            index_state = self.possible_states_dictionary[tuple(state_discrt)] 
        except:
            print("len", len(state_discrt), len(xx))
            for t1, t2 in zip(xx, state_discrt):
                print(t1, t2)
            print("self.list_state", len(self.list_state), self.list_state)
            print("table", len(self.qtable), len(self.qtable[0]))
        # Set actions as empty
        action = None
        # Calculate the the var to compare with threshold
        random_val = np.random.uniform(0.0, 1.0)

        epsilon = self.min_epsilon + (self.max_epsilon - self.min_epsilon) * \
            math.exp(-1.0 * self.steps_done / self.epsilon_decay)
        if is_test:
            # It will not make a random decision
            epsilon = self.min_epsilon

        self.steps_done += 1
        # If random greather than epsilon then explotation
        if random_val > epsilon:
            # Get the highest Q from table for the state
            action_index = np.argmax(self.qtable[index_state,:])
            action = self.possible_actions[action_index]
        else:
            # Else, exploration
            # Get a random action
            index_rc = np.random.choice(len(self.possible_actions), replace=False)
            action = self.possible_actions[index_rc] #np.random.choice(self.possible_actions)
            if is_test:
                self.history_suggestions_test.append(action)
            else:
                self.history_suggestions_train.append(action)
        return action

    def get_index_element(self, list_:list, look_for_list:int):
        """Get the index of an element in a list (not used)

        Args:
            list_ (list): List to look in
            look_for_list (int): Element to find the list

        Returns:
            int: The index
        """
        value_ = list(list_).index(list(look_for_list))
        return value_
    
    def update_qtable(self,action:list, reward:float, state:StateEnv, new_state:StateEnv):
        """Update the Q-table according to the algorithm

        Args:
            action (list): List of actions over appliances
            reward (float): Current reward obtained
            state (StateEnv): Current state
            new_state (StateEnv): Following State
        """
        # Get the index of the actions
        index_actions = self.possible_actions_dictionary[tuple(action)]
        state_obj_, _ = state.get_list_qlearning(self.list_state)
        index_state = self.possible_states_dictionary[tuple(state_obj_.tolist())]
        new_state_obj_, _ = new_state.get_list_qlearning(self.list_state)
        index_new_state = self.possible_states_dictionary[tuple(new_state_obj_.tolist())] 

        # Update the qtable as Q + learnig_rate * [R(s,a) + gamma * max(Q') - Q]
        table_row = self.qtable[index_new_state, :]
        value_max_table_row = np.max(table_row[table_row != np.inf])
        value_in_cell = self.qtable[index_state, index_actions]
        update_value = 0
        # Validate different possible errors
        with warnings.catch_warnings():
            warnings.filterwarnings('error')
            try: 
                update_value = value_in_cell + \
                    self.learning_rate * (reward + self.gamma * value_max_table_row - value_in_cell)
            except Warning:
                print("RUNTIMEWARNING!")
                print("value_in_cell", value_in_cell)
                print("self.learning_rate", self.learning_rate)
                print("reward", reward)
                print("self.gamma", self.gamma)
                print("value_max_table_row", value_max_table_row)
                print("self.qtable[index_state, index_actions]", self.qtable[index_state, index_actions])
                print("update_value", update_value)
                print("+++++")

        # Update the value in the table
        self.qtable[index_state, index_actions] = update_value
        if np.isnan(update_value):
            print("NAN IN VALUE")
            print("value_in_cell", value_in_cell)
            print("self.learning_rate", self.learning_rate)
            print("reward", reward)
            print("self.gamma", self.gamma)
            print("value_max_table_row", value_max_table_row)
            print("self.qtable[index_state, index_actions]", self.qtable[index_state, index_actions])
            print("update_value", update_value)
            print("+++++")

        if np.isinf(self.qtable[index_state, index_actions]):
            print("INF IN TABLE")
            print("value_in_cell", value_in_cell)
            print("self.learning_rate", self.learning_rate)
            print("reward", reward)
            print("self.gamma", self.gamma)
            print("value_max_table_row", value_max_table_row)
            print("self.qtable[index_state, index_actions]", self.qtable[index_state, index_actions])
            print("update_value", update_value)
            print("+++++")


class QLCoordinator:
    """Class to coordinate the Q-table agents
    """
    def __init__(self, list_column_states:list, building_list_dictionary:list, env:Environment, episodes:int):
        """Class constructor for the coordinator

        Args:
            list_column_states (list): Appliances to consider
            building_list_dictionary (list): List of buildings
            env (Environment): Environment to be consider
            episodes (int): Number of episodes
        """

        # State the variables
        self.agents = {}
        self.enviroment = env
        # Assign a new agent per buidling
        for bid, building in building_list_dictionary.items():
            appliances_action = building.get_intermittedent_appliances()
            print("=="*20)
            print("Current BID:", bid)
            print("appliances_action", appliances_action)
            print("Lenght Appliances with Actions", len(appliances_action))
            print("list_intermittent_appliances", building.list_intermittent_appliances)
            # print("dos", building.list_intermittent_appliances)
            qlagent = QLearningAgent(
                [1.0,0.0], 
                list_column_states, 
                appliances_action,
                # get_list_appliances()
                building.get_list_appliances()
            )
            self.agents[bid] = qlagent
        self.list_reward = []
        self.episodes = episodes
    
    def train_agents(self):
        """Train the agens using Q-Learning algorithm
        """
        # Do multiple epocs to train model
        self.dictionary_list_reward = {}
        n_days_per_episode = 30*24*4
        time_loop = 1
        for current_episode in range(self.episodes):
            # Call the environment to start
            # Init the vars and get the first state
            state, state_obj = self.enviroment.init_vars(is_training=True)

            # Start the "training" loop
            done = False
            list_reward = []
            dictionary_list_reward = {}
            count_control = 0
            bids_considered = []
            control_episode = 1

            while not done:
                # Get the Actions from each agent
                actions = {}
                for bid, state_env in state_obj.items():
                    # Get the agent
                    current_agent = self.agents[bid]
                    action = current_agent.select_action(state_env)
                    actions[bid] = action
                    # Add the building id to the list
                    if len(bids_considered) != len(state_obj):
                        bids_considered.append(bid)

                # Get the result from applying the actions to the env
                next_state, next_state_obj, reward, done, _ = self.enviroment.step(actions)

                # Update QTable
                for bid_ in bids_considered:
                    #log_message("> "+str(bid_))
                    self.agents[bid_].update_qtable(
                        actions[bid_], 
                        reward[bid_],#[index],
                        state_obj[bid_], #[index], 
                        next_state_obj[bid_]#[index]
                    )

                # Change variables from prev to current state
                state = next_state
                state_obj = next_state_obj
                # Add reward history
                for bid, r in reward.items():
                    if not bid in dictionary_list_reward:
                        dictionary_list_reward[bid] = []
                    dictionary_list_reward[bid].append(r)
                list_reward.append(reward)

                time_loop += 1
                count_control += 1
                if count_control % 1000 == 0:
                    log_message("Count: "+str(count_control))
                if count_control % 960 == 0:
                    log_message("Done day "+str( count_control/(24*4)  ))
                # Check if done or end of episode
                if count_control % n_days_per_episode == 0:
                    log_message("End of episode: "+str(current_episode+1))
                    break

            # Save rewards in the general dictionary
            self.list_reward  = list_reward
            # Add value of reward to dictionary general
            for key, value_list in dictionary_list_reward.items():
                # If key does not exists, then add it as empty
                if key not in self.dictionary_list_reward:
                    self.dictionary_list_reward[key] = []
                # Append the new list to the historical one
                self.dictionary_list_reward[key].append(value_list)

    # To create the graphs from the environment
    def create_graphs(self, new_env=None, is_training=False):
        """Create graphs

        Args:
            new_env (Environment, optional): Environment used. Defaults to None.
            is_training (bool, optional): Separate flows of training and testing. Defaults to False.
        """
        if is_training == True:
            self.enviroment.graph_performance(is_training=is_training, data_rewards=self.dictionary_list_reward)
        elif new_env is not None:
            new_env.graph_performance(is_training=is_training, data_rewards=self.dictionary_list_reward_test)

    # To test the model
    def test_agents(self, env_test:Environment):
        """To test the agents

        Args:
            env_test (Environment): Environment to use
        """
        self.dictionary_list_reward_test = {}
        # Call the environment to start
        # Init the vars and get the first state
        state, state_obj = env_test.init_vars()

        # Start the "training" loop
        done = False
        list_reward = []
        dictionary_list_reward = {}
        count_control = 0
        bids_considered = []
        while not done:
            # Get the Actions from each agent
            actions = {}
            for bid, state_env in state_obj.items():
                # Get the agent
                current_agent = self.agents[bid]
                action = current_agent.select_action(state_env, is_test=True)
                actions[state_env.bid] = action
                # Add the building id to the list
                if len(bids_considered) != len(state_obj):
                    bids_considered.append(bid)

            # Get the result from applying the actions to the env
            next_state, next_state_obj, reward, done, _ = env_test.step(actions)

            # Change variables from prev to current state
            state = next_state
            state_obj = next_state_obj
            # Add reward history
            for bid, r in reward.items():
                if not bid in dictionary_list_reward:
                    dictionary_list_reward[bid] = []
                dictionary_list_reward[bid].append(r)
            list_reward.append(reward)
            count_control += 1
            if count_control % 50 == 0:
                log_message("Count: "+str(count_control))
        self.list_reward  = list_reward
        # Add value of reward to dictionary general
        for key, value_list in dictionary_list_reward.items():
            # If key does not exists, then add it as empty
            if key not in self.dictionary_list_reward_test:
                self.dictionary_list_reward_test[key] = []
            # Append the new list to the historical one
            self.dictionary_list_reward_test[key].append(value_list)
        log_message("Out of loop")

In [None]:
# Code taken from:
#   PyTorch [internet]. Reinforcement Learning (DQN) Tutorial. 2021. [cited 2021 November 20]. Available from: https://pytorch.org/tutorials/intermediate/reinforcement_q_learning.html
# Create a named tuple for the transitions
Transition = namedtuple('Transition',
                        ('state', 'action', 'next_state', 'reward'))

class ReplayMemory(object):
    """Replay memory

    Args:
        object (namedtuple): Tuple to be used to load into the memory
    """
    def __init__(self, capacity):
        self.memory = deque([],maxlen=capacity)

    def push(self, *args):
        # Save a transition
        self.memory.append(Transition(*args))

    def sample(self, batch_size):
        # Ensure random return of the batch
        indices_mask = np.random.choice(len(self.memory), batch_size, replace=False)
        return [self.memory[index_] for index_ in indices_mask] 

    def __len__(self):
        return len(self.memory)

# Code based from:
#   PyTorch [internet]. Reinforcement Learning (DQN) Tutorial. 2021. [cited 2021 November 20]. Available from: https://pytorch.org/tutorials/intermediate/reinforcement_q_learning.html
# Deep Q-Learning Network to be used 
class DQN(nn.Module):
    """Class to represent the neurnal network used

    Args:
        nn (nn.Module): Parent class
    """

    def __init__(self, state_size:int, action_size:int, device:str):
        """Class constructor for the neurnal network

        Args:
            state_size (int): Number of elements in the state
            action_size (int): Number of possible combinations of the actionable appliances
            device (int): The device to be used
        """
        super(DQN, self).__init__()
        # Assign tht device
        self.device = device

        # Attepmt 0
        self.hidden = nn.Linear(state_size, int(state_size/2))
        self.output = nn.Linear(int(state_size/2), action_size)
        self.sigmoid = nn.Sigmoid()
        self.softmax = nn.Softmax(dim=1)

        # Attempt 1
        self.head = nn.Linear(state_size, 100)
        self.linear_1 = nn.Linear(100, 150)
        self.dropout_1 = nn.Dropout(p=0.2)
        self.linear_2 = nn.Linear(150,100)
        self.dropout_2 = nn.Dropout(p=0.2)
        self.linear_3 = nn.Linear(100, action_size)
        self.output = nn.Linear(action_size, action_size)

        # Attempt 2
        self.head = nn.Linear(state_size, state_size*2)
        self.linear_1 = nn.Linear(state_size*2, state_size*3)
        self.linear_2 = nn.Linear(state_size*3, action_size*2)
        self.linear_3 = nn.Linear(action_size*2, action_size)
        self.output = nn.Linear(action_size, action_size)

        # Attempt 3
        self.head = nn.Linear(state_size, state_size*2)
        self.linear_1 = nn.Linear(state_size*2, state_size*3)
        self.linear_2 = nn.Linear(state_size*3, action_size*3)
        self.linear_2_5 = nn.Linear(action_size*3, action_size*3)
        self.linear_3 = nn.Linear(action_size*3, action_size*2)
        self.linear_4 = nn.Linear(action_size*2, action_size)
        self.output = nn.Linear(action_size, action_size)

        # Attempt 4
        self.head = nn.Linear(state_size, state_size*2)
        self.linear_1 = nn.Linear(state_size*2, state_size*4)
        self.linear_2 = nn.Linear(state_size*4, state_size*4)
        self.linear_2_5 = nn.Linear(state_size*4, action_size*4)
        self.linear_3 = nn.Linear(action_size*4, action_size*2)
        self.linear_4 = nn.Linear(action_size*2, action_size)
        self.output = nn.Linear(action_size, action_size)

        # Attempt 5
        self.head = nn.Linear(state_size, state_size*2)
        self.linear_1 = nn.Linear(state_size*2, state_size*4)
        self.linear_2 = nn.Linear(state_size*4, state_size*4)
        self.linear_2_5 = nn.Linear(state_size*4, action_size*4)
        self.linear_2_75 = nn.Linear(action_size*4, action_size*4) 
        self.linear_3 = nn.Linear(action_size*4, action_size*2)
        self.linear_4 = nn.Linear(action_size*2, action_size)
        self.output = nn.Linear(action_size, action_size)
        
        # Attempt 6
        self.head = nn.Linear(state_size, state_size*2)
        self.linear_1 = nn.Linear(state_size*2, state_size*3)
        self.linear_2 = nn.Linear(state_size*3, action_size*2)
        self.linear_3 = nn.Linear(action_size*2, action_size)
        self.output = nn.Linear(action_size, action_size)
        
        # Attempt 7
        self.head = nn.Linear(state_size, state_size*2)
        self.linear_1 = nn.Linear(state_size*2, state_size*3)
        self.linear_2 = nn.Linear(state_size*3, action_size*3)
        self.linear_2_5 = nn.Linear(action_size*3, action_size*3)
        self.linear_3 = nn.Linear(action_size*3, action_size)
        self.output = nn.Linear(action_size, action_size)

        # Attempt 8
        self.head = nn.Linear(state_size, state_size*2)
        self.linear_1 = nn.Linear(state_size*2, state_size*3)
        self.linear_2 = nn.Linear(state_size*3, state_size*4)
        self.linear_2_5 = nn.Linear(state_size*4, action_size*3) 
        self.linear_2_75 = nn.Linear(action_size*3, action_size*3)
        self.linear_3 = nn.Linear(action_size*3, action_size)
        self.output = nn.Linear(action_size, action_size)

        # Attempt 9
        self.head = nn.Linear(state_size, state_size*2)
        self.linear_1 = nn.Linear(state_size*2, state_size*4)
        self.linear_2 = nn.Linear(state_size*4, state_size*4)
        self.linear_2_5 = nn.Linear(state_size*4, action_size*4)
        self.linear_2_75 = nn.Linear(action_size*4, action_size*4) 
        self.linear_3 = nn.Linear(action_size*4, action_size*2)
        self.linear_4 = nn.Linear(action_size*2, action_size)
        self.output = nn.Linear(action_size, action_size)

        # Attempt 10
        self.head = nn.Linear(state_size, state_size*2)
        self.linear_1 = nn.Linear(state_size*2, state_size*3)
        self.linear_2 = nn.Linear(state_size*3, action_size*3)
        self.linear_2_5 = nn.Linear(action_size*3, action_size*3)
        self.linear_3 = nn.Linear(action_size*3, action_size*2)
        self.linear_4 = nn.Linear(action_size*2, action_size)
        self.output = nn.Linear(action_size, action_size)

        # Final version
        self.head = nn.Linear(state_size, state_size*4)
        self.linear_1 = nn.Linear(state_size*4, state_size*6)
        self.linear_2 = nn.Linear(state_size*6, state_size*8)
        self.linear_3 = nn.Linear(state_size*8, action_size*8)
        self.linear_4 = nn.Linear(action_size*8, action_size*4)
        self.output = nn.Linear(action_size*4, action_size)


    def forward(self, x):
        """Forward call on PyTorch nn.Module

        Args:
            x (Tensor): Tensor that entry in the network

        Returns:
            Tensor: New tensor result of the evaluation in the NN
        """

        x = self.head(x)
        x = F.leaky_relu(self.linear_1(x), negative_slope=0.001)
        x = F.leaky_relu(self.linear_2(x), negative_slope=0.001)
        x = F.leaky_relu(self.linear_3(x), negative_slope=0.001)
        x = F.leaky_relu(self.linear_4(x), negative_slope=0.001)
        x = self.output(x)

        return x

# Deep Q-Learning Agent
class DQLearningAgent:
    """Agent to execute the Q-learning algorithm with the DQL
    """
    def __init__(
        self, 
        state_size_, # Deprected
        action_size, # Number of appliances over we can take actions
        state_size, # Number of columns that are consider as part of the state
        list_elements, # Elements that form a State
        random_agent, # Random agent
        device, # Device to be used
        bid
    ):
        """Constructor for the agent

        Args:
            state_size_ (int): Size of the states (not used)
            action_size (int): Number of intermittent appliances
            state_size (int): Number of columns that are considered as part of the state
            list_elements (list): Elements of a state
            random_agent (RandomAgent): To make random decision when needed
            device (str): Device to be used
            bid (int): Bulding ID
        """
        # Set values
        self.bid = bid
        self.state_size = state_size
        self.action_size = action_size
        self.random_agent = random_agent
        self.list_elements = list_elements
        self.device = device

        # Considering the action_size we generate all the possible values of it
        self.possible_actions = list(map(list, itertools.product([0.0,1.0], repeat=action_size)))
        # Create dictionary to accelerate the access to it
        self.possible_actions_dictionary = {}
        for index_ap, acction_possible in enumerate(self.possible_actions):
            self.possible_actions_dictionary[tuple(acction_possible)] = index_ap
        
        # Convert list and send it to device
        self.possible_actions = torch.FloatTensor(self.possible_actions)
        self.possible_actions = self.possible_actions.to(self.device)

        self.size_possible_actions = len(self.possible_actions )
        print("size_possible_actions", self.size_possible_actions)

        self.gamma = 0.95 # 0 = inmmediate priority, 1 = eval on future

        self.epsilon_max = 1.0
        self.epsilon = self.epsilon_max
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.75#99
        
        # from previous implementation - not used anymore
        self.eps_start = 1.0 #0.9
        self.eps_end = 0.01
        self.eps_decay = 250000 #250000#30000#80000 # 200 37000 250000

        self.batch_size = 128
        self.target_update = 5 #@10 # Every N will update the target net

        self.count_random = 0
        self.count_max = 0

        self.learning_rate = 0.01 #0.001 #0.01 # 0 = agents consider only prior knowledge, 1 = only most recent
        
        # Init networks to be used
        print("appliances_size", state_size)
        print("action_size", action_size)
        print("possible actions", self.size_possible_actions)
        # Instance base and target net, and send them to the device
        self.policy_net = DQN(state_size, self.size_possible_actions, self.device).to(self.device)
        self.target_net = DQN(state_size, self.size_possible_actions, self.device).to(self.device)
        self.target_net.load_state_dict(self.policy_net.state_dict())
        self.target_net.eval()

        #print(self.policy_net)

        # Optimizer and memory for replay
        # RMSprop   Adam      SGD
        self.optimizer = optim.Adam(self.policy_net.parameters(), lr=self.learning_rate)#RMSprop(self.policy_net.parameters())
        self.memory = ReplayMemory(10000)

        self.steps_done = 0
        self.episode_durations = []

        self.num_episodes = 30
        self.max_steps = 100

    # Method to select an action
    def select_action(self, state_obj:StateEnv, is_test=False):
        """Select an action based on Q-learning algorithm adapted for the DQL

        Args:
            state_obj (StateEnv): Current state
            is_test (bool, optional): Controls the flow for testing and training. Defaults to False.

        Returns:
            list: List of actions
        """
        # Get the state as array to be used
        state, names = state_obj.get_list_dql(self.list_elements)
        state = torch.FloatTensor(state).to(self.device)

        sample = np.random.random()
        # Exponential epsilon decay per action taken
        epsilon = self.eps_end + (self.eps_start - self.eps_end) * \
            math.exp(-1.0 * self.steps_done / self.eps_decay)
        if is_test:
            epsilon = self.epsilon_min

        self.steps_done += 1
        # If random is less than the epsilon then random
        if sample < epsilon:
            self.count_random += 1
            action_r = self.random_agent.select_action_one(state_obj)
            #action_r = np.random.randint(low=0, high=self.action_size)
            return action_r
        # Else get the max
        else:
            self.count_max += 1
            with torch.no_grad():
                actions_ = self.policy_net(state)#(state_tensor)
                action = torch.argmax(actions_).item() 
                return self.possible_actions[action] 

    # Optimize model
    def optimize_model(self):
        """To optimize the model using replay batch learning with target network
        """
        if len(self.memory) < self.batch_size:
            return
        transitions = self.memory.sample(self.batch_size)
        # Transpose batch
        batch = Transition(*zip(*transitions))
        # Compute mask of non-final states to concate the batch elements
        non_final_mask = torch.tensor(
            tuple(
                map(lambda x: x is not None, batch.next_state)
            ), device = self.device, dtype=torch.bool
        )
        
        try:
            non_final_next_states =  torch.stack(list([bnx for bnx in batch.next_state if bnx is not None]), dim=0)
        except Exception as e:
            print("ERROR")
            print(e)
            print(batch.next_state)
        
        # Move to device
        non_final_next_states = non_final_next_states.to(self.device)
        non_final_mask = non_final_mask.to(self.device)

        # Get information from batch
        state_batch = torch.stack(list(batch.state), dim=0).to(self.device) # torch.cat()
        action_batch = torch.stack(list(batch.action), dim=0).to(self.device) #torch.cat()
        reward_batch = torch.cat(batch.reward).to(self.device)
        
        # Calculate Q(s', a). The model will give Q(s')
        # Ge tthe values out of the action_batch indexes
        state_action_values_ = self.policy_net(state_batch)

        # Get the value of the state that was produced from that action
        state_action_values = []
        for index_ab, action_in_batch in enumerate(action_batch):
            index_to_use = self.possible_actions_dictionary[tuple(action_in_batch.tolist())]
            state_action_values.append(state_action_values_[index_ab][index_to_use])
        
        state_action_values = torch.tensor(state_action_values, requires_grad=True) 

        # Calculate V(s'') for each of the states that follow
        # Create a mask of zeros 
        next_state_values = torch.zeros(self.batch_size, device=self.device)
        next_state_values[non_final_mask] = self.target_net(non_final_next_states).max(1)[0].detach()
        # Get Q value
        expected_state_action_values = reward_batch + (self.gamma * next_state_values) 
        # Move to the corresponding device
        # Get the loss
        criterios = nn.MSELoss() #nn.SmoothL1Loss() #nn.CrossEntropyLoss()#nn.BCEWithLogitsLoss() #BCELoss() #nn.MSELoss()        
        loss = criterios(state_action_values.unsqueeze(1), expected_state_action_values.unsqueeze(1))
        #print("passed loss")
        # Optimize steps
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

class CoordinatorAgent:
    """Coordinator agent for DQL
    """
    def __init__(self, building_list_dictionary:list, env:Environment, episodes:int): 
        """Class constructor for the coordinator

        Args:
            building_list_dictionary (list): List of building to be used
            env (Environment): Environment to be used
            episodes (int): Number of episodes
        """
        self.agents = {}
        self.enviroment = env
        # Create an agent per building
        for bid, building in building_list_dictionary.items():
            log_message("=="*20)
            log_message("Started Current BID: "+str(bid))
            random_agent = RandomAgent([0.0,1.0])

            # Get the list of elements to consider a state
            list_elements_ = building.get_list_appliances()
            list_elements_status = [e+"_status" for e in list_elements_]
            list_elements_usage = ["usage_"+e for e in list_elements_]
            list_elements_final = []

            for el,s,u in zip(list_elements_,list_elements_status,list_elements_usage):
                list_elements_final.append(s)
                list_elements_final.append(el)
                # Avoid add refrigerator since this is a continuous appliance
                #    also avoid solar since this does not have a count of usages 
                if 'refri' not in u and 'solar' not in u and 'grid' not in u:
                    list_elements_final.append(u)

            # Add the rest of elements since the list from building are
            #   just the appliances
            #  'day', 'minute' , 'humidy', 'barameter', 'visibility'
            list_elements_ = ['month', 'hour',] + \
                ['temperature', 'wind'] + \
                ['Type', 'DayType', 'Season'] + \
                ['decision_p', 'cost_p'] + \
                    list_elements_final
                    #list_elements_
            #print(list_elements_)
            log_message("Size of state: "+str(len(list_elements_)))
            # Get the list of appliances over we can do an action
            appliances_action = building.get_intermittedent_appliances()
            log_message("Appliances with actions"+str(len(appliances_action)))
            print(appliances_action)
            log_message("=="*20)
            self.device = torch.device("cpu")
            # torch.device("cpu")  # torch.device("cuda" if torch.cuda.is_available() else "cpu")
            log_message("Using device: "+str(self.device))

            dql_agent = DQLearningAgent(
                0, # Not used
                len(appliances_action), # Number of appliances over which we can take an action
                len(list_elements_), # Number of columns that are consider as part of the state
                list_elements_, # Elements that form a State
                random_agent, # Random agent when episolon correspond
                self.device, # The device to use
                bid
            )
            # Add the agent to the dictionary
            self.agents[bid] = dql_agent
        # Set the rest of properties
        self.list_elements_ = list_elements_
        self.num_episodes = 50
        self.target_update = 10
        self.list_rewards = []
        self.episodes = episodes
        self.building_list_dictionary = building_list_dictionary

    def convert_action(self, action_):
        """Method to conver actions from tensor to numpy array

        Args:
            action_ (Tensor): The tensor to be converted

        Returns:
            array: Numpy array resulted
        """
        if torch.is_tensor(action_): 
            action_ = action_.detach().clone().numpy()
        return action_

    def add_to_list(self, list_, element):
        """To add to a list (not used)

        Args:
            list_ (list): List to be used
            element (int): Element to be added

        Returns:
            list: List with the new element added
        """
        list_.append(element)
        return list_

    
    def train_models(self):
        """To train the model using the Q-learning algorithm
        """
        # Set the list of elements in state at env
        self.enviroment.list_elements_state = self.list_elements_
        self.dictionary_list_reward = dict(
            (
                str(bid), []
            )
            for bid in self.building_list_dictionary.keys()
        )
        n_days_per_episode = 30*24*4
        for current_episode in range(self.episodes):
            # Call the environment to start
            # Init the vars and get the first state
            state, state_obj = self.enviroment.init_vars()

            # Start the training loop
            done = False
            list_reward = []
            dictionary_list_reward = dict(
                (
                    str(bid), []
                )
                for bid in self.building_list_dictionary.keys()
            )
            count_control = 0
            #bids_considered = []
            while not done:
                # Get the Actions from each agent
                actions = {}
                for bid, state_env in state_obj.items():
                    # Get the agent
                    #current_agent = self.agents[bid]
                    action = self.agents[bid].select_action(state_env)
                    if torch.is_tensor(action): #and action.is_cuda:
                        #action = action.cpu().detach().numpy()
                        action = action.detach().clone().numpy()
                    actions[bid] = action

                # Move to next state
                next_state, next_state_obj, reward, done, _ = self.enviroment.step(actions)

                # Add reward history
                for bid, r in reward.items():
                    dictionary_list_reward[bid].append(r)

                # Remember part 
                for mbid, model in self.agents.items():
                    # Save the state from the returned elements
                    model.memory.push(
                        torch.FloatTensor(state_obj[mbid].get_list_dql(model.list_elements)[0]), 
                        torch.FloatTensor(actions[mbid]), 
                        torch.FloatTensor(next_state_obj[mbid].get_list_dql(model.list_elements)[0]), 
                        torch.FloatTensor([reward[mbid]])
                    )
                
                # Optimization
                for model in self.agents.values():
                    model.optimize_model()

                # Change state
                state_obj = next_state_obj
                state = next_state
                
                # Log event - steps so far
                count_control += 1
                if count_control % 1000 == 0:
                    log_message("Count: "+str(count_control))
                # Log event of days
                if count_control % 960 == 0:
                    log_message("Done day "+str( count_control/(24*4)  ))
                # Check if done or end of episode
                if count_control % n_days_per_episode == 0:
                    log_message("End of episode: "+str(current_episode+1))
                    if done:
                        #for mid, model in self.agents.items():
                        #    model.episode_durations.append(t + 1)
                        break
                    break
            
            # Update the target network of the model from the policy one
            if current_episode % self.target_update == 0:
                for mbid, model in self.agents.items():
                    model.target_net.load_state_dict(model.policy_net.state_dict())

            # Save rewards in the general dictionary
            self.list_reward  = list_reward
            # Add value of reward to dictionary general
            for key, value_list in dictionary_list_reward.items():
                # Append the new list to the historical one
                self.dictionary_list_reward[key].append(value_list)

        for v in self.agents.values():
            print("EVALUATE DECISIONS")
            print("Agent", v.bid, "Random:",v.count_random/(v.count_random+v.count_max), "Max:", v.count_max/(v.count_random+v.count_max))

    # To create the graphs from the environment
    def create_graphs(self, new_env=None, is_training=False):
        """Create graphs

        Args:
            new_env (Environment, optional): Environment used. Defaults to None.
            is_training (bool, optional): To separate flows when training and testing. Defaults to False.
        """
        if is_training == True:
            self.enviroment.graph_performance(is_training=is_training, data_rewards=self.dictionary_list_reward)
        elif new_env is not None:
            new_env.graph_performance(is_training=is_training, data_rewards=self.dictionary_list_reward_test)
            
    def test_agents(self, env_test):
        """Test the agents

        Args:
            env_test (Environment): Enviroment to be used
        """
        self.dictionary_list_reward_test = {}
        # Call the environment to start
        # Init the vars and get the first state
        state, state_obj = env_test.init_vars()

        # Start the "training" loop
        done = False
        list_reward = []
        dictionary_list_reward = {}
        count_control = 0
        bids_considered = []

        t = 0
        while not done:
            actions = {}
            for bid, state_env in state_obj.items():
                current_agent = self.agents[bid]
                action = current_agent.select_action(state_env, is_test=True)
                if torch.is_tensor(action): #and action.is_cuda:
                    action = action.cpu().detach().numpy()
                actions[bid] = action
                # Add the building to the list id
                if len(bids_considered) != len(state_obj):
                    bids_considered.append(bid)
            # Apply actions
            next_state, next_state_obj, reward, done, _ = env_test.step(actions)
    
            # Add reward history
            for bid, r in reward.items():
                if not bid in dictionary_list_reward:
                    dictionary_list_reward[bid] = []
                dictionary_list_reward[bid].append(r)
            list_reward.append(reward)
            count_control += 1


            # Change state
            state_obj = next_state_obj
            state = next_state

            t += 1
            if done:
                for mid, model in self.agents.items():
                    model.episode_durations.append(t + 1)
                break
            if count_control % 100 == 0:
                log_message("Count: "+str(count_control))
        # Save rewards
        self.list_reward  = list_reward
        for key, value_list in dictionary_list_reward.items():
            # If key does not exists, then add it as empty
            if key not in self.dictionary_list_reward_test:
                self.dictionary_list_reward_test[key] = []
            # Append the new list to the historical one
            self.dictionary_list_reward_test[key].append(value_list)
        log_message("Out of loop")

### General Execution

##### Training

In [None]:
# TRAIN ENV


# Set the variables to load the environment
path_files = "datasets/use/"
path_houses = path_files+"houses.csv"
path_appliances = path_files+"appliances.csv"
path_data = path_files+"15mins/15minute_data_austin/15minute_data_austin.csv"
path_weather = path_files+"weather_austin_2018.csv"
path_price = path_files+"AustinPrices.csv"
path_metadata = path_files+"15mins/15minute_data_austin/metadata.csv"
epocs_to_consider = 10  # This was replaced by episodes
houses_ids = ['4373', '7719', '8156'] 
# Set episodes
episodes = 2 
days_amount_to_use = episodes * 30 # episodes * 30
# For getting the bases set both to false
consider_human_decision = False
consider_model = False
# Used when run the once per bid since 
#   the state representation overloads the memory
current_bid=str(houses_ids[0])

# Calculate how many rows we need (row = 15 mins)
rows_to_use = days_amount_to_use * 24 * 4 # 24 hours, 4 time-window of 15min in 1 hour
log_message("Rows to use: "+str(rows_to_use))
time_window_env = [0, rows_to_use]#34560], # This is the amount of time to consider
city_env = 'Austin'

# To define type of model stage (Train/Test)
type_mode = "Train" #"Test" #
months_dist_mode = {
    "Train": [1, 2, 3, 4, 5] + [6, 7] + [11, 12], # 212 # 273
    "Test": [9] + [10]#, 11, 12] # August is not complete so ignore # 122
}
# Set the months to be used here
months_to_use = months_dist_mode[type_mode]

# Random model with variables consider_human_decision=False and consider_model=False
#   is used to get the information from the raw data
agent_use = "Random" # "Random" # "DeepQLearning" # "QLearning"

# Overwrite Environment Training
overwrite_env_train = True
env_train_file_name = 'training_environment-'+agent_use+' '+current_bid+'.pickle'

# Overwrite Environment Test
overwrite_env_test = True
env_test_file_name = 'test_environment-'+agent_use+' '+current_bid+'.pickle'

# Overwrite Random Model
overwrite_random_model = True
random_model_name = 'random_model.pickle'
random_model_name_2 = 'random_model_2.pickle'

# Overwrite Q Learning Model
overwrite_qlearning_model = False
qlearning_model_name = 'qlearning_model_'+current_bid+'.pickle'

# Overwrite Deep Q Learning Model
overwrite_deepqlearning_model = False
deepqlearning_model_name = 'deep_qlearning_model_'+current_bid+'.pickle'

params_env = {
    'path_data': path_data,
    'path_appliances': path_appliances,
    'path_metadata': path_metadata,
    'path_prices': path_price,
    'path_weather': path_weather,
    'houses_id': houses_ids,
    'time_window': time_window_env,
    'city': city_env,
    'buildings_number': len(houses_ids),
    'consider_human': consider_human_decision,
    'months_to_load': months_to_use,
    'consider_model': consider_model,
    'model_name': agent_use,
    'episodes': episodes,
    'is_training': True # When using Random or Normal set to False
}


# To save and load in memory
if overwrite_env_train:
    enviroment = Environment(**params_env)
    log_message("Writing pickle file")
    # Open file where to store the environment
    file_env = open(env_train_file_name, 'wb')
    # Dump the env in the file
    #pickle.dump(enviroment, file_env)
    log_message("Done pickle file")
    # Close
    file_env.close()
else:
    # Open file where is the environment
    log_message("Reading pickle file")
    file_env = open(env_train_file_name, 'rb')
    # Load the env
    enviroment = pickle.load(file_env)
    log_message("Done pickle file")
    # Close
    file_env.close()


In [None]:
# When it is random
if agent_use == "Random":
    # To save and load in memory
    if overwrite_random_model:
        random_coordinator = RandomCoordinator(enviroment.buildings, enviroment, episodes)
        # call the agents
        random_coordinator.call_agents()
        # Open file where to store the environment
        file_model_random = open(random_model_name, 'wb')
        # Dump the env in the file
        pickle.dump(random_coordinator, file_model_random)
        # Close
        file_model_random.close()
    else:
        # Open file where is the environment
        file_model_random = open(random_model_name, 'rb')
        # Load the model
        random_coordinator = pickle.load(file_model_random)
        random_coordinator.call_agents()
        # Close
        file_model_random.close()

In [None]:
# When it is using the Q-Table
if agent_use == "QLearning":
    # Get the elements to determinate the state and action
    list_appls = enviroment.list_name_appliances_intert
    list_elements = list_appls
    list_column_state = list_appls + ['Type', 'cost_p', 'decision_p']  #+ ['DayType', 'Type', 'Season'] #[] 16777216 2,097,152
    
    # To save and load in memory
    if overwrite_qlearning_model:
        qlearning_coordinator = QLCoordinator(list_column_state, enviroment.buildings, enviroment, episodes)
        # Train the agents        
        qlearning_coordinator.train_agents()
        # Open file where to store the environment
        file_model_ql = open(qlearning_model_name, 'wb')
        # Dump the env in the file
        pickle.dump(qlearning_coordinator.agents, file_model_ql)
        # Close
        file_model_ql.close()
        log_message("Done pickle file")
    else:
        # Open file where is the environment
        file_model_ql = open(qlearning_model_name, 'rb')
        # Load the model
        qlearning_coordinator = pickle.load(file_model_ql)
        # Close
        file_model_ql.close()
    qlearning_coordinator.create_graphs(is_training=True)

In [None]:
# When using DQL
if agent_use == "DeepQLearning":

    # To save and load in memory
    if overwrite_deepqlearning_model:
        dql_coordinator = CoordinatorAgent(enviroment.buildings, enviroment, episodes)
        # Train the agents
        dql_coordinator.train_models()
        # Open file where to store the environment
        file_model_dql = open(deepqlearning_model_name, 'wb')
        # Dump the env in the file
        pickle.dump(dql_coordinator, file_model_dql)
        # Close
        file_model_dql.close()
    else:
        # Open file where is the environment
        file_model_dql = open(deepqlearning_model_name, 'rb')
        # Load the model
        dql_coordinator = pickle.load(file_model_dql)
        # Close
        file_model_dql.close()
    
    dql_coordinator.create_graphs(is_training=True)

In [None]:
# Sound to alert the end of training
import winsound

filename = 'sounds/guitar.wav'
#winsound.PlaySound(filename, winsound. SND_FILENAME)

##### Testing

In [None]:
# TEST ENV

# Set variables
days_amount_to_use = 61
episodes = 1
rows_to_use = days_amount_to_use * 24 * 4 # 24 hours, 4 time-window of 15min in 1 hour
time_window_env = [0, rows_to_use]# This is the amount of time to consider
city_env = 'Austin'

# To define type of model stage (Train/Test)
type_mode = "Test" # "Train" #
months_to_use = months_dist_mode[type_mode]
print(months_to_use)

# Create a new environment to test agents
params_env = {
    'path_data': path_data,
    'path_appliances': path_appliances,
    'path_metadata': path_metadata,
    'path_prices': path_price,
    'path_weather': path_weather,
    'houses_id': houses_ids,
    'time_window': time_window_env,
    'city': city_env,
    'buildings_number': len(houses_ids),
    'consider_human': consider_human_decision,
    'months_to_load': months_to_use,
    'consider_model': consider_model,
    'copy_env': enviroment,
    'model_name': agent_use,
    'episodes': episodes,
    'is_training': False
}

# To save and load in memory
if overwrite_env_test:
    enviroment_test = Environment(**params_env)
    # Open file where to store the environment
    file_env_test = open(env_test_file_name, 'wb')
    # Dump the env in the file
    pickle.dump(enviroment_test, file_env_test)
    # Close
    file_env_test.close()
else:
    # Open file where is the environment
    file_env_test = open(env_test_file_name, 'rb')
    # Load the env
    enviroment_test = pickle.load(file_env_test)
    # Close
    file_env_test.close()

In [None]:
# When using random
if agent_use == "Random":
    # To save and load in memory
    if overwrite_random_model:
        random_coordinator = RandomCoordinator(enviroment_test.buildings, enviroment_test, epocs_to_consider)
        # call the agents
        random_coordinator.call_agents()
        # Open file where to store the environment
        file_model_random = open(random_model_name_2, 'wb')
        # Dump the env in the file
        pickle.dump(random_coordinator, file_model_random)
        # Close
        file_model_random.close()
    else:
        # Open file where is the environment
        file_model_random = open(random_model_name_2, 'rb')
        # Load the model
        random_coordinator = pickle.load(file_model_random)
        random_coordinator.call_agents()
        # Close
        file_model_random.close()
    
    random_coordinator.create_graphs(is_training=False)


In [None]:
# When using an approach with Q-Table
if agent_use == "QLearning":
    qlearning_coordinator.test_agents(enviroment_test)
    qlearning_coordinator.create_graphs(enviroment_test)

In [None]:
# When using DQL
if agent_use == "DeepQLearning":
    dql_coordinator.test_agents(enviroment_test)
    dql_coordinator.create_graphs(enviroment_test) 

In [None]:
# Sound to alert the end of training
import winsound

filename = 'sounds/guitar.wav'
#winsound.PlaySound(filename, winsound.SND_FILENAME)