## Installing Dependencies

In [1]:
!pip install --upgrade ipykernel
#!pip install tensorflow-gpu 
!pip install tensorflow==2.3.0
!pip install keras
!pip install gym
!pip install keras-rl2 
#!apt-get install build-essential python-dev swig python-pygame
#!pip install box2d-py



In [19]:
import tensorflow as tf
import gym 
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

## Testing Baseline with Random Movements 

In [20]:
environment = gym.make('CartPole-v1')
environment._max_episode_steps = 2500
states = environment.observation_space.shape[0]
actions = environment.action_space.n
print(f'No. of States: {states}, No. of Actions: {actions}')

No. of States: 4, No. of Actions: 2


In [21]:
episodes = 10
for episode in range(1, episodes + 1):
  observation = environment.reset()
  score = 0
  done = False

  while done == False:
    environment.render()
    action = np.random.randint(0,2)
    state, reward, done, info = environment.step(action)
    score += reward
  print(f'Episode: {episode}, Reward: {score}')
environment.close()

Episode: 1, Reward: 12.0
Episode: 2, Reward: 21.0
Episode: 3, Reward: 19.0
Episode: 4, Reward: 13.0
Episode: 5, Reward: 12.0
Episode: 6, Reward: 16.0
Episode: 7, Reward: 31.0
Episode: 8, Reward: 32.0
Episode: 9, Reward: 15.0
Episode: 10, Reward: 42.0


## Building our Deep Learning Model

In [22]:
def build_model(states, actions):
    model = Sequential([
        Flatten(input_shape= (1, states)),
        Dense(32, activation= 'relu'),
        Dense(32, activation= 'relu'),
        Dense(actions, activation= 'linear')
    ])
    return model

In [23]:
model = build_model(states, actions)
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_2 (Flatten)          (None, 4)                 0         
_________________________________________________________________
dense_6 (Dense)              (None, 32)                160       
_________________________________________________________________
dense_7 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_8 (Dense)              (None, 2)                 66        
Total params: 1,282
Trainable params: 1,282
Non-trainable params: 0
_________________________________________________________________


## Defining RL Agent

In [24]:
from rl.policy import BoltzmannQPolicy
from rl.agents import DQNAgent
from rl.memory import SequentialMemory

In [25]:
def build_agent(model, actions):
    memory = SequentialMemory(limit= 50000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model= model, policy= policy, memory= memory, nb_actions= actions, nb_steps_warmup=5, target_model_update= 1e-2)
    return dqn

## Train and Test

In [26]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr= 1e-3), metrics=['mae'])
dqn.fit(environment, nb_steps=50000, verbose=1, visualize=False)

Training for 50000 steps ...
Interval 1 (0 steps performed)
92 episodes - episode_reward: 107.870 [12.000, 266.000] - loss: 1.657 - mae: 19.460 - mean_q: 39.466

Interval 2 (10000 steps performed)
47 episodes - episode_reward: 209.617 [157.000, 305.000] - loss: 2.416 - mae: 39.375 - mean_q: 79.733

Interval 3 (20000 steps performed)
41 episodes - episode_reward: 241.878 [188.000, 363.000] - loss: 1.590 - mae: 41.206 - mean_q: 83.162

Interval 4 (30000 steps performed)
8 episodes - episode_reward: 1266.125 [329.000, 2500.000] - loss: 1.192 - mae: 42.689 - mean_q: 85.980

Interval 5 (40000 steps performed)
done, took 206.418 seconds


<tensorflow.python.keras.callbacks.History at 0x21b326a5e20>

In [28]:
scores = dqn.test(environment, nb_episodes=1, visualize=True)
print(np.mean(scores.history['episode_reward']))

Testing for 1 episodes ...
Episode 1: reward: 2500.000, steps: 2500
2500.0
