In [1]:
from gymnasium import Env
from gymnasium.spaces import Box
from typing import Any, SupportsFloat
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdt
import random

from stable_baselines3 import A2C

from grid import Microgrid

In [2]:
df = pd.read_parquet('./data/power_price.parquet')
df = df.dropna()
days = df['time'].dt.date.unique()

dataset = []

for day in days:
    part = df[df['time'].dt.date == day]
    if len(part) == 288:
        dataset.append({'power' : part['power'].to_numpy(),
                        'price' : part['price'].to_numpy()})

env = Microgrid()
env.reset(dataset)

  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


(array([0. , 0.2, 1. , 0. , 0. ]), {})

In [3]:
model = A2C('MlpPolicy', env, verbose=0)
m = model.learn(total_timesteps=28800)

In [20]:
def simpleAlgo(state):
    t, batt, price, sol, hydro = state
    if price < 0:
        return np.array([1, 0])
    if hydro == 1:
        return np.array([0, 1])
    hydro_need = (1 - hydro) / (1 - t) ** 2
    return np.array([max(0, min(1, hydro_need - price)), max(0, min(1, (price + 1) / 2)) ])


In [21]:
episodes = 10

for ep in range(episodes):
    seed = random.randint(0, 1e9)

    state, info = env.reset(seed=seed)
    done = False
    score_rand = 0
    while not done:
        action = env.action_space.sample()
        state, reward, done, trunc, info = env.step(action)
        score_rand += reward
    

    state, info = env.reset(seed=seed)
    done = False
    score_model = 0
    while not done:
        action, state = model.predict(state)
        state, reward, done, trunc, info = env.step(action)
        score_model += reward
    
    state, info = env.reset(seed=seed)
    done = False
    score_algo = 0
    while not done:
        action = simpleAlgo(state)
        state, reward, done, trunc, info = env.step(action)
        score_algo += reward
    print(f'Episode {ep+1}, score_rand {score_rand}, score_model {score_model}, score_algo {score_algo}')

Episode 1, score_rand 36.36077298756197, score_model 65.79224650033446, score_algo 27.56899682051445
Episode 2, score_rand 34.11146804349788, score_model 58.812264837497395, score_algo 24.231402548843523
Episode 3, score_rand 85.36221083458527, score_model 118.08778193599608, score_algo 33.65831086114392
Episode 4, score_rand 31.698423284836185, score_model 54.80922515892134, score_algo 20.702630357529102
Episode 5, score_rand 49.06900295020333, score_model 94.50293970664492, score_algo -22.149031021531094
Episode 6, score_rand 66.74981343471129, score_model 76.23856401433711, score_algo 29.767128023055907
Episode 7, score_rand 52.563448833130636, score_model 71.7663952295051, score_algo -5.554633145142667
Episode 8, score_rand 42.68279018805088, score_model 49.997308187810994, score_algo 19.74258348958621
Episode 9, score_rand 14.807817819755599, score_model 52.64982318545105, score_algo -20.77103934436588
Episode 10, score_rand 33.027643737402144, score_model 49.84398837916909, score