# 0. Install Dependencies

In [1]:
# Install Dependencies
!pip install keras-rl2

Collecting keras-rl2
  Downloading keras_rl2-1.0.5-py3-none-any.whl (52 kB)
[?25l[K     |██████▎                         | 10 kB 35.4 MB/s eta 0:00:01[K     |████████████▋                   | 20 kB 8.9 MB/s eta 0:00:01[K     |██████████████████▉             | 30 kB 7.8 MB/s eta 0:00:01[K     |█████████████████████████▏      | 40 kB 7.4 MB/s eta 0:00:01[K     |███████████████████████████████▍| 51 kB 4.1 MB/s eta 0:00:01[K     |████████████████████████████████| 52 kB 936 kB/s 
Collecting tf-estimator-nightly==2.8.0.dev2021122109
  Downloading tf_estimator_nightly-2.8.0.dev2021122109-py2.py3-none-any.whl (462 kB)
[K     |████████████████████████████████| 462 kB 7.6 MB/s 
Installing collected packages: tf-estimator-nightly, keras-rl2
Successfully installed keras-rl2-1.0.5 tf-estimator-nightly-2.8.0.dev2021122109


In [2]:
# Test Random Environment with OpenAI Gym
import math
import cmath
import random
import itertools
import tensorflow
import numpy as np

from gym import Env
from matplotlib import cm
from scipy.constants import *
from matplotlib import colors
from rl.agents import DQNAgent

import matplotlib.pyplot as plt

from gym.spaces import Discrete, Box
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
from mpl_toolkits.mplot3d import Axes3D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Flatten

# 1. Test Random Environment with OpenAI Gym

In [8]:
class ShowerEnv(Env):
    def __init__(self):

        self.action_space = Discrete(3)

        self.observation_space = Box(low=np.array([0]), high=np.array([100]))

        self.state = 38 + random.randint(-3,3)

        self.shower_length = 60
        
    def step(self, action):

        self.state += action -1 
        self.shower_length -= 1 
        
        # Calculate reward
        if self.state >=37 and self.state <=39: 
            reward =1 
        else: 
            reward = -1 
        
        # Check if shower is done
        if self.shower_length <= 0: 
            done = True
        else:
            done = False
        
        info = {}
        
        return self.state, reward, done, info

    def render(self):
        pass
    
    def reset(self):

        self.state = 38 + random.randint(-3,3)
 
        self.shower_length = 60 
        return self.state
    

In [9]:
env = ShowerEnv()



In [10]:
# test of action space
print(env.action_space) # action space list
print(env.action_space.n) # random
print(type(env.action_space)) # action types

# test of observation space
print(env.observation_space) # observation values
print(type(env.observation_space)) # observation types

# test of state
print(env.state)

Discrete(3)
3
<class 'gym.spaces.discrete.Discrete'>
Box(0.0, 100.0, (1,), float32)
<class 'gym.spaces.box.Box'>
40


In [16]:
episodes = 10
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    states = env.observation_space.shape
    state = np.reshape(state, states)

    print(states)
    print(state)
    
    while not done:
        #env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))

(1,)
[0]
Episode:1 Score:-60
(1,)
[0]
Episode:2 Score:-60
(1,)
[0]
Episode:3 Score:-60
(1,)
[0]
Episode:4 Score:-60
(1,)
[0]
Episode:5 Score:-60
(1,)
[0]
Episode:6 Score:-60
(1,)
[0]
Episode:7 Score:-60
(1,)
[0]
Episode:8 Score:-60
(1,)
[0]
Episode:9 Score:-60
(1,)
[0]
Episode:10 Score:-60


In [12]:
# test section
states = env.observation_space.shape
actions = env.action_space.n

state = np.reshape(state, states)

print(states)
print(state)

print(env.observation_space)
print(actions)

(1,)
[0]
Box(0.0, 100.0, (1,), float32)
3


In [13]:
#  model = tensorflow.keras.Sequential() 
def build_model(states, actions):
    model = tensorflow.keras.Sequential()    
    model.add(Dense(24, activation='relu', input_shape=states))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [14]:
model = build_model(states, actions)

In [15]:
model.summary()

print(model.output_shape)
print(actions)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 24)                48        
                                                                 
 dense_1 (Dense)             (None, 24)                600       
                                                                 
 dense_2 (Dense)             (None, 3)                 75        
                                                                 
Total params: 723
Trainable params: 723
Non-trainable params: 0
_________________________________________________________________
(None, 3)
3


# 3. Build Agent with Keras-RL

In [17]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [None]:
dqn = build_agent(model, actions)
dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=10000, visualize=False, verbose=1)

In [None]:
scores = dqn.test(env, nb_episodes=100, visualize=False)
print(np.mean(scores.history['episode_reward']))

In [None]:
_ = dqn.test(env, nb_episodes=15, visualize=False)

# 4. Reloading Agent from Memory

In [None]:
dqn.save_weights('dqn_weights.h5f', overwrite=True)

In [None]:
del model
del dqn
del env

In [None]:
dqn.load_weights('dqn_weights.h5f')

In [None]:
_ = dqn.test(env, nb_episodes=5, visualize=False)

Testing for 5 episodes ...
Episode 1: reward: -60.000, steps: 60
Episode 2: reward: -54.000, steps: 60
Episode 3: reward: -60.000, steps: 60
Episode 4: reward: -58.000, steps: 60
Episode 5: reward: -54.000, steps: 60
