## Installing Dependencies

In [10]:
!pip install --upgrade ipykernel
#!pip install tensorflow-gpu 
!pip install tensorflow==2.3.0
!pip install keras
!pip install gym
!pip install keras-rl2 
#!apt-get install build-essential python-dev swig python-pygame
#!pip install box2d-py



In [11]:
import tensorflow as tf
import gym 
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

## Testing Baseline with Random Movements 

In [12]:
environment = gym.make('CartPole-v0')
environment._max_episode_steps = 2500
states = environment.observation_space.shape[0]
actions = environment.action_space.n
print(f'No. of States: {states}, No. of Actions: {actions}')

No. of States: 4, No. of Actions: 2


In [13]:
episodes = 10
for episode in range(1, episodes + 1):
  observation = environment.reset()
  score = 0
  done = False

  while done == False:
    environment.render()
    action = np.random.randint(0,2)
    state, reward, done, info = environment.step(action)
    score += reward
  print(f'Episode: {episode}, Reward: {score}')
environment.close()

Episode: 1, Reward: 14.0
Episode: 2, Reward: 13.0
Episode: 3, Reward: 54.0
Episode: 4, Reward: 29.0
Episode: 5, Reward: 16.0
Episode: 6, Reward: 12.0
Episode: 7, Reward: 51.0
Episode: 8, Reward: 35.0
Episode: 9, Reward: 10.0
Episode: 10, Reward: 13.0


## Building our Deep Learning Model

In [14]:
def build_model(states, actions):
    model = Sequential([
        Flatten(input_shape= (1, states)),
        Dense(32, activation= 'relu'),
        Dense(48, activation= 'relu'),
        Dense(32, activation= 'relu'),
        Dense(actions, activation= 'linear')
    ])
    return model

In [15]:
model = build_model(states, actions)
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 4)                 0         
_________________________________________________________________
dense_4 (Dense)              (None, 32)                160       
_________________________________________________________________
dense_5 (Dense)              (None, 48)                1584      
_________________________________________________________________
dense_6 (Dense)              (None, 32)                1568      
_________________________________________________________________
dense_7 (Dense)              (None, 2)                 66        
Total params: 3,378
Trainable params: 3,378
Non-trainable params: 0
_________________________________________________________________


## Defining RL Agent

In [16]:
from rl.policy import BoltzmannQPolicy
from rl.agents import DQNAgent
from rl.memory import SequentialMemory

In [17]:
def build_agent(model, actions):
    memory = SequentialMemory(limit= 50000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model= model, policy= policy, memory= memory, nb_actions= actions, nb_steps_warmup=5, target_model_update= 1e-2)
    return dqn

## Train and Test

In [18]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr= 1e-3), metrics=['mae'])
dqn.fit(environment, nb_steps=50000, verbose=1, visualize=False)

Training for 50000 steps ...
Interval 1 (0 steps performed)
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
    1/10000 [..............................] - ETA: 6:08 - reward: 1.0000

85 episodes - episode_reward: 115.118 [9.000, 335.000] - loss: 1.168 - mae: 19.198 - mean_q: 39.060

Interval 2 (10000 steps performed)
48 episodes - episode_reward: 212.042 [157.000, 377.000] - loss: 5.842 - mae: 43.119 - mean_q: 87.019

Interval 3 (20000 steps performed)
43 episodes - episode_reward: 233.093 [153.000, 486.000] - loss: 1.446 - mae: 44.349 - mean_q: 89.422

Interval 4 (30000 steps performed)
12 episodes - episode_reward: 807.583 [191.000, 2500.000] - loss: 1.162 - mae: 42.846 - mean_q: 86.170

Interval 5 (40000 steps performed)
done, took 199.292 seconds


<tensorflow.python.keras.callbacks.History at 0x28aa5a4c9a0>

In [20]:
scores = dqn.test(environment, nb_episodes=1, visualize=True)
print(np.mean(scores.history['episode_reward']))

Testing for 1 episodes ...
Episode 1: reward: 2500.000, steps: 2500
2500.0


In [19]:
dqn.save_weights('dqn_weights.h5', overwrite= True)

In [28]:
dqn.load_weights('dqn_weights.h5')