# 0. Install Dependencies

In [None]:
!pip install tensorflow==2.3.0
!pip install gym
!pip install keras
!pip install keras-rl2

# 1. Test Random Environment with OpenAI Gym

In [1]:
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random
# import cv2
import matplotlib.pyplot as plt

In [32]:
class StaircaseEnv(Env):
    def __init__(self):
        # Actions we can take
        self.action_space = Discrete(7)
        # Observation array
        self.observation_space = Box(low=np.array([0]),high=np.array([20]))
        # Set start point
        self.state = 10
        # Set time length
        self.num_step = 0
        
        self.target = 10 + random.randint(-10,10)
        self.previous_dist = abs(self.state - self.target)
        
    def step(self, action):
        # Apply action (0, +/-1, +/-2, +/-3)
        self.state += action-3
        self.num_step += 1
        reward = 0 - (0.1*self.num_step)
        
        # Calculate reward
        self.current_dist = abs(self.state - self.target)
        if self.current_dist < self.previous_dist:
            reward += 3
        else:
            reward -= 1
        
        self.previous_dist = self.current_dist
        # Check if the test is done
        if self.state == self.target:
            done=True
            reward += 5
        else: done=False
        
        # Set placeholder for info
        info = {}
        
        # Return step information
        return self.state,reward, done, info

    def render(self):
        # Implement viz
        pass
    
    def reset(self):
        # Reset state
        self.state = 10
        # Reset num_step
        self.num_step = 0
        return self.state

In [33]:
env = StaircaseEnv()

In [40]:
## Naive walking
episodes = 10
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    while not done:
        #env.render()
        action = env.action_space.sample()
        n_state,reward, done, info = env.step(action)
        score+=reward
    print(f'{env.num_step} steps taken')
    print('Episode:{} Score:{}'.format(episode, score))

2 steps taken
Episode:1 Score:6.699999999999999
2 steps taken
Episode:2 Score:6.699999999999999
1 steps taken
Episode:3 Score:3.9
2 steps taken
Episode:4 Score:6.699999999999999
1 steps taken
Episode:5 Score:3.9
15 steps taken
Episode:6 Score:1.9999999999999991
2 steps taken
Episode:7 Score:6.699999999999999
327 steps taken
Episode:8 Score:-5108.8
56 steps taken
Episode:9 Score:-106.6
7 steps taken
Episode:10 Score:7.2


# 2. Create a Deep Learning Model with Keras

In [39]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, LayerNormalization
from tensorflow.keras.optimizers import Adam

In [44]:
states = env.observation_space.shape
actions = env.action_space.n
print(states)

(1,)


In [45]:
def build_model(states, actions):
    initialiser = tf.keras.initializers.HeNormal(seed=42)
    model = Sequential()    
    model.add(Dense(32, activation='relu', kernel_initializer=initialiser,input_shape=(states)))
    model.add(LayerNormalization())
    model.add(Dense(32, activation='relu',kernel_initializer=initialiser))
    model.add(Dropout(0.2))
    model.add(Dense(actions, activation='linear'))
    return model

# 3. Build Agent with Keras-RL

In [35]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [46]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=500000, window_length=1)
    dqn = DQNAgent(
        model=model, 
        memory=memory,
        policy=policy, 
        test_policy=policy, 
        enable_double_dqn=True,
        nb_actions=actions, 
        nb_steps_warmup=1000, 
        target_model_update=1e-2
        )
    return dqn

In [47]:
import tensorflow as tf
with tf.device('/device:GPU:0'):
    tf.keras.backend.clear_session()
    model = build_model(states, actions)
    dqn = build_agent(model, actions)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])
    history = dqn.fit(env, nb_steps=500000, visualize=False, verbose=1)

  super(Adam, self).__init__(name, **kwargs)


Training for 500000 steps ...
Interval 1 (0 steps performed)


  updates=self.state_updates,


 1843/10000 [====>.........................] - ETA: 1:07 - reward: -90.9541done, took 17.983 seconds


In [None]:
with tf.device('/device:GPU:0'):
    _ = dqn.test(env, nb_episodes=20, visualize=False)

# 4. Reloading Agent from Memory

In [23]:
dqn.save_weights('models/StaircaseEnv_dqn_weights.h5f', overwrite=True)

In [52]:
ENV= StaircaseEnv()
actions = ENV.action_space.n
states = ENV.observation_space.shape
with tf.device('/device:GPU:0'):
    tf.keras.backend.clear_session()
    MODEL = build_model(states, actions)
    DQN = build_agent(MODEL, actions)
    DQN.compile(Adam(lr=1e-3), metrics=['mae'])
    DQN.load_weights('models/StaircaseEnv_dqn_weights.h5f')

In [59]:
with tf.device('/device:GPU:0'):
    scores = DQN.test(ENV, nb_episodes=1000, visualize=False, verbose=0)

In [60]:
print(f"Mean episode reward = {np.mean(scores.history['episode_reward'])}".center(100,'-'))
print(f"Average steps taken per episode = {np.mean(scores.history['nb_steps'])}".center(100,'-'))

-----------------------------Mean episode reward = -11.428900000000002------------------------------
------------------------------Average steps taken per episode = 19.963------------------------------
