In [None]:
!pip install keras-rl2
!pip3 install Box2D
!pip3 install box2d-py
!pip3 install gym[all]
!pip3 install gym[Box_2D]
!apt-get install swig

In [6]:
import gym 
import os 
from tensorflow.keras.layers import Dense,Flatten 
from tensorflow.keras.models import Sequential 
from tensorflow.keras.optimizers import Adam
from rl.agents import DQNAgent 
from rl.memory import SequentialMemory
from rl.policy import BoltzmannQPolicy

In [3]:
env = gym.make('LunarLander-v2')

In [4]:
states = env.observation_space.shape[0] 
actions = env.action_space.n

In [5]:
states , actions 

(8, 4)

In [14]:
def build_model(states,actions):
  model = Sequential()
  model.add(Flatten(input_shape=(1,states)))
  model.add(Dense(128,activation='relu'))
  model.add(Dense(64,activation='relu'))
  model.add(Dense(32,activation='relu'))
  model.add(Dense(actions,activation='linear'))
  return model

In [10]:
def build_agent(model,actions):
  policy = BoltzmannQPolicy()
  memory = SequentialMemory(limit=50000,window_length=1)
  dqn = DQNAgent(memory=memory,model=model,policy=policy,nb_actions=actions,nb_steps_warmup=10,target_model_update=1e-2)
  dqn.compile(Adam(learning_rate=1e-3),metrics=['mae'])
  return dqn

In [11]:
model = build_model(states,actions)
dqn = build_agent(model,actions)

In [15]:
dqn.fit(env,nb_steps=50000,visualize=False)

Training for 50000 steps ...
Interval 1 (0 steps performed)
10 episodes - episode_reward: 84.677 [-20.254, 122.766] - loss: 3.697 - mae: 39.001 - mean_q: 51.996

Interval 2 (10000 steps performed)
10 episodes - episode_reward: 87.508 [54.180, 121.609] - loss: 2.348 - mae: 37.484 - mean_q: 50.637

Interval 3 (20000 steps performed)
10 episodes - episode_reward: 120.768 [71.326, 159.506] - loss: 1.729 - mae: 34.501 - mean_q: 46.708

Interval 4 (30000 steps performed)
11 episodes - episode_reward: 110.292 [10.438, 152.745] - loss: 1.194 - mae: 32.021 - mean_q: 43.405

Interval 5 (40000 steps performed)
done, took 567.330 seconds


<keras.callbacks.History at 0x7fef84a7fe50>

In [16]:
scores = dqn.test(env,nb_episodes=100,visualize=False)

import numpy as np 

print("AVERAGE REWARD :", np.mean(scores.history['episode_reward']))

Testing for 100 episodes ...
Episode 1: reward: 131.668, steps: 1000
Episode 2: reward: 224.043, steps: 380
Episode 3: reward: -72.863, steps: 487
Episode 4: reward: 278.314, steps: 404
Episode 5: reward: -31.983, steps: 415
Episode 6: reward: 270.683, steps: 378
Episode 7: reward: 222.725, steps: 385
Episode 8: reward: 157.604, steps: 398
Episode 9: reward: 253.646, steps: 338
Episode 10: reward: 229.508, steps: 359
Episode 11: reward: 217.636, steps: 325
Episode 12: reward: 178.921, steps: 418
Episode 13: reward: -70.447, steps: 555
Episode 14: reward: 243.639, steps: 323
Episode 15: reward: 212.534, steps: 449
Episode 16: reward: 250.793, steps: 371
Episode 17: reward: 257.666, steps: 371
Episode 18: reward: 275.288, steps: 379
Episode 19: reward: 203.359, steps: 341
Episode 20: reward: 275.678, steps: 368
Episode 21: reward: 238.674, steps: 423
Episode 22: reward: 240.769, steps: 466
Episode 23: reward: 260.134, steps: 419
Episode 24: reward: 250.384, steps: 338
Episode 25: reward:

In [17]:
os.mkdir('Models')
model_path = os.path.join('Models','lunarlanderv2_kerasrl')

In [18]:
dqn.save_weights(model_path)