# 0. Install Dependencies

In [146]:
!pip install tensorflow==2.3.0
!pip install gym
!pip install keras
!pip install keras-rl2

ERROR: Could not find a version that satisfies the requirement tensorflow==2.3.0 (from versions: 2.8.0rc1, 2.8.0, 2.8.1, 2.8.2, 2.8.3, 2.8.4, 2.9.0rc0, 2.9.0rc1, 2.9.0rc2, 2.9.0, 2.9.1, 2.9.2, 2.9.3, 2.10.0rc0, 2.10.0rc1, 2.10.0rc2, 2.10.0rc3, 2.10.0, 2.10.1, 2.11.0rc0, 2.11.0rc1, 2.11.0rc2, 2.11.0)
ERROR: No matching distribution found for tensorflow==2.3.0




# 1. Test Random Environment with OpenAI Gym

In [147]:
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random

In [148]:
class ShowerEnv(Env):
    def __init__(self):
        # Actions we can take, down, stay, up
        self.action_space = Discrete(4)
        # Temperature array
        self.observation_space = Box(low=np.array([0,0]), high=np.array([100,100]),dtype=int)
        # Set start temp
        self.state = np.array([38,38])
        # Set shower length
        self.shower_length = 60
        self._action_to_direction = {
            0: np.array([1, 0],dtype=int),
            1: np.array([0, 1],dtype=int),
            2: np.array([-1, 0],dtype=int),
            3: np.array([0, -1],dtype=int),
            #4: np.array([0,0])
        }
        
    def step(self, action):
        # Apply action
        # 0 -1 = -1 temperature
        # 1 -1 = 0 
        # 2 -1 = 1 temperature 
        
        self.state += self._action_to_direction[action]
        # Reduce shower length by 1 second
        self.shower_length -= 1 
        
        # Calculate reward
        if self.state[0] >=self.state[0] and self.state[0] <=self.state[0]: 
            reward =1 
        else: 
            reward = -1 
        
        # Check if shower is done
        if self.shower_length <= 0: 
            done = True
        else:
            done = False
        
        # Apply temperature noise
        #self.state += random.randint(-1,1)
        # Set placeholder for info
        info = {}
        
        # Return step information
        return self.state, reward, done, info

    def render(self):
        # Implement viz
        pass
    
    def reset(self):
        # Reset shower temperature
        self.state = np.array([38,38])
        # Reset shower time
        self.shower_length = 60 
        return self.state
    

In [149]:
env = ShowerEnv()

In [150]:
env.observation_space.sample()

array([69,  1])

In [151]:
episodes = 10
for episode in range(1, episodes+1):
    print(env.state)
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        #env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))

[38 38]
Episode:1 Score:60
[40 36]
Episode:2 Score:60
[46 32]
Episode:3 Score:60
[41 41]
Episode:4 Score:60
[33 27]
Episode:5 Score:60
[40 36]
Episode:6 Score:60
[41 41]
Episode:7 Score:60
[37 41]
Episode:8 Score:60
[40 38]
Episode:9 Score:60
[41 39]
Episode:10 Score:60


# 2. Create a Deep Learning Model with Keras

In [152]:
import numpy as np


from tensorflow import keras
from keras import layers
from keras.optimizers import Adam
from keras import Sequential
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import BoltzmannQPolicy

In [153]:
states = env.observation_space.shape
actions = env.action_space.n

In [154]:
actions

4

In [162]:
def build_model(states, actions):
    model = Sequential()    
    model.add(layers.Dense(24, activation='relu', input_shape=(1,2)))
    model.add(layers.Flatten()) 
    model.add(layers.Dense(24, activation='relu'))
    model.add(layers.Dense(actions, activation='linear'))
    return model

In [163]:
del model 

In [164]:
model = build_model(states, actions)

In [165]:
model.summary()

Model: "sequential_14"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_42 (Dense)            (None, 1, 24)             72        
                                                                 
 flatten_10 (Flatten)        (None, 24)                0         
                                                                 
 dense_43 (Dense)            (None, 24)                600       
                                                                 
 dense_44 (Dense)            (None, 4)                 100       
                                                                 
Total params: 772
Trainable params: 772
Non-trainable params: 0
_________________________________________________________________


# 3. Build Agent with Keras-RL

In [159]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [166]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [167]:
dqn = build_agent(model, actions)
dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

Training for 50000 steps ...
Interval 1 (0 steps performed)
    1/10000 [..............................] - ETA: 1:07:30 - reward: 1.0000





<keras.callbacks.History at 0x2d8443548e0>

In [None]:
scores = dqn.test(env, nb_episodes=100, visualize=False)
print(np.mean(scores.history['episode_reward']))

Testing for 100 episodes ...
Episode 1: reward: 60.000, steps: 60
Episode 2: reward: 60.000, steps: 60
Episode 3: reward: 58.000, steps: 60
Episode 4: reward: 60.000, steps: 60
Episode 5: reward: 58.000, steps: 60
Episode 6: reward: 58.000, steps: 60
Episode 7: reward: 60.000, steps: 60
Episode 8: reward: 60.000, steps: 60
Episode 9: reward: 58.000, steps: 60
Episode 10: reward: 60.000, steps: 60
Episode 11: reward: 58.000, steps: 60
Episode 12: reward: 60.000, steps: 60
Episode 13: reward: 58.000, steps: 60
Episode 14: reward: 60.000, steps: 60
Episode 15: reward: 60.000, steps: 60
Episode 16: reward: 60.000, steps: 60
Episode 17: reward: 60.000, steps: 60
Episode 18: reward: 60.000, steps: 60
Episode 19: reward: 60.000, steps: 60
Episode 20: reward: 60.000, steps: 60
Episode 21: reward: 60.000, steps: 60
Episode 22: reward: 60.000, steps: 60
Episode 23: reward: 60.000, steps: 60
Episode 24: reward: 60.000, steps: 60
Episode 25: reward: 60.000, steps: 60
Episode 26: reward: 58.000, st

In [None]:
_ = dqn.test(env, nb_episodes=15, visualize=True)

Testing for 15 episodes ...


TypeError: ShowerEnv.render() got an unexpected keyword argument 'mode'

# 4. Reloading Agent from Memory

In [None]:
dqn.save_weights('dqn_weights.h5f', overwrite=True)

In [None]:
del model
del dqn
del env

In [None]:
env = gym.make('CartPole-v0')
actions = env.action_space.n
states = env.observation_space.shape[0]
model = build_model(states, actions)
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

In [None]:
dqn.load_weights('dqn_weights.h5f')

In [None]:
_ = dqn.test(env, nb_episodes=5, visualize=True)