-
Notifications
You must be signed in to change notification settings - Fork 0
/
qlearn_agent.py
41 lines (35 loc) · 1.46 KB
/
qlearn_agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import numpy as np
from agent import Agent
import random
class q_learning_agent(Agent):
def __init__(self,nb_states,nb_actions,gamma, demand, h_demand, price_penalty, n_epochs):
self.nb_states = nb_states
self.nb_actions = nb_actions
self.demand = demand
self.h_demand = h_demand
self.price_penalty = price_penalty
self.Q = np.zeros((nb_states,nb_actions))
self.gamma = gamma
self.alpha = 0.2
self.eps = 0.1
self.n_epochs = n_epochs
def update(self,s,a,r,sp):
# print(s,a,sp)
self.Q[s,a] += self.alpha*(r + self.gamma*max(self.Q[sp,:]) - self.Q[s, a])
def step(self, state, action, reward, next_state):
s = int(state[0]*400+state[1]) #hr*400 + bat_lvl
ns = int(next_state[0]*400+next_state[1])
self.update(s, action, reward, ns)
def act(self, hr, bat_lvl, power_supplied,power_cap,energy_cap, demand):
rand_v = random.uniform(0,1)
bat_lvl = int(bat_lvl)
s = hr*400 + bat_lvl
if rand_v < self.eps:
a = random.randint(0,self.nb_actions-1)
else:
a = np.argmax(self.Q[s])
# if bat_lvl + (a-power_cap) + power_supplied - demand < 0:
# a = power_cap - bat_lvl - power_supplied + demand
# elif bat_lvl + (a-power_cap) + power_supplied - demand > energy_cap:
# a = energy_cap + power_cap - bat_lvl - power_supplied + demand
return a