In [1]:
from gymnasium import Env
from gymnasium.spaces import Box
from typing import Any, SupportsFloat
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdt
import random

from stable_baselines3 import A2C

from grid import Microgrid

In [2]:
df = pd.read_parquet('./data/power_price.parquet')
df = df.dropna()
days = df['time'].dt.date.unique()

dataset = []

for day in days:
    part = df[df['time'].dt.date == day]
    if len(part) == 288:
        dataset.append({'power' : part['power'].to_numpy(),
                        'price' : part['price'].to_numpy()})

env = Microgrid()
env.reset(dataset)

  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


(array([0. , 0.2, 1. , 0. , 0. ]), {})

In [3]:
model = A2C('MlpPolicy', env, verbose=0)
m = model.learn(total_timesteps=28800)

In [8]:
def simpleAlgo(state):
    t, batt, price, sol, hydro = state
    return np.array([10, 1 - price])


In [9]:
episodes = 10

for ep in range(episodes):
    seed = random.randint(0, 1e9)

    state, info = env.reset(seed=seed)
    done = False
    score_rand = 0
    while not done:
        action = env.action_space.sample()
        state, reward, done, trunc, info = env.step(action)
        score_rand += reward
    

    state, info = env.reset(seed=seed)
    done = False
    score_model = 0
    while not done:
        action, state = model.predict(state)
        state, reward, done, trunc, info = env.step(action)
        score_model += reward
    
    state, info = env.reset(seed=seed)
    done = False
    score_algo = 0
    while not done:
        action = simpleAlgo(state)
        state, reward, done, trunc, info = env.step(action)
        score_algo += reward
    print(f'Episode {ep+1}, score_rand {score_rand}, score_model {score_model}, score_algo {score_algo}')

Episode 1, score_rand 36.84997893683129, score_model 42.07986713612437, score_algo 17.825047920406288
Episode 2, score_rand 49.06437652582723, score_model 64.5666877982593, score_algo 31.217656503843727
Episode 3, score_rand 45.91596470935793, score_model 60.0210271217841, score_algo 44.47793877213418
Episode 4, score_rand 42.2657585234344, score_model 56.406177506913785, score_algo 53.86313023287289
Episode 5, score_rand 45.01136508275018, score_model 67.31955324628302, score_algo 72.598650573009
Episode 6, score_rand 227.27283980232917, score_model 299.07693159534836, score_algo 590.6147747767817
Episode 7, score_rand 39.45224514447795, score_model 53.8215437979712, score_algo 46.32821986443562
Episode 8, score_rand 34.62737732656973, score_model 72.62580479634846, score_algo 88.79857199957699
Episode 9, score_rand 55.87064318485029, score_model 78.19725611445702, score_algo 82.41103608310574
Episode 10, score_rand 63.30731426578388, score_model 94.24801172127533, score_algo 85.25487