# 0. Install Dependencies





In [1]:
#!pip install rl-agents==0.1.1
#!pip install tensorflow
#!pip install gym
#!pip install keras
#!pip install keras-rl2
#!pip install pygame
#conda install swig -> this should be installed in the Anaconda command line
#conda install -c conda-forge box2d-py ->  this should be installed in the Anaconda command line

# 1. Test Random Environment with OpenAI Gym




In [2]:
#import tensorflow as tf
#print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

import gym
import random

In [3]:
#Game being used
env = gym.make('LunarLander-v2')# ->https://gymnasium.farama.org/environments/box2d/lunar_lander/


states = env.observation_space.shape[0]

#Number of actions available.
actions = env.action_space

In [4]:
print(states)
print(actions.n)


8
4


In [5]:
#Env setting with random behaviour
episodes = 10
for episode in range(1,episodes+1):
    state = env.reset()
    done = False
    score = 0
        
    while not done:
        env.render()
        n_state, reward, done, info = env.step(env.action_space.sample())
        score+=reward
    print('Episode:{} Score:{}'.format(episode,score))
    
env.close()

Episode:1 Score:-87.22762958512209
Episode:2 Score:-410.6787038564532
Episode:3 Score:-138.90818251647892
Episode:4 Score:-172.8068527250178
Episode:5 Score:-173.8826573272541
Episode:6 Score:-299.6772528406517
Episode:7 Score:-120.2283145199923
Episode:8 Score:-99.33083343746699
Episode:9 Score:-122.7421905798728
Episode:10 Score:-113.5958499647744


# 2. Create a Deep Learning Model with Keras



In [6]:
import numpy as np
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Flatten
from tensorflow.keras.optimizers.legacy import Adam

from keras import __version__
tf.keras.__version__ = __version__

#https://www.tensorflow.org/agents/api_docs/python/tf_agents/agents/DdpgAgent?hl=en
#from rl.agents import SARSAAgent #check diferent agents -> https://keras-rl.readthedocs.io/en/latest/
from rl.agents import DQNAgent #check diferent agents -> https://keras-rl.readthedocs.io/en/latest/
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory





In [7]:
def build_model(states, actions):
    model = Sequential() 
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) 
    model.add(Dense(200,activation='relu'))
    model.add(Dense(200,activation='relu'))
    model.add(Dense(actions,activation='linear'))    
    return model

In [8]:
#del model #-> Uncoment if Sequential error appears after building the agent

In [9]:
model = build_model(states,actions.n)





In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 8)                 0         
                                                                 
 dense (Dense)               (None, 200)               1800      
                                                                 
 dense_1 (Dense)             (None, 200)               40200     
                                                                 
 dense_2 (Dense)             (None, 4)                 804       
                                                                 
Total params: 42804 (167.20 KB)
Trainable params: 42804 (167.20 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


# 3. Build Agent with Keras Neural Network




In [11]:
def build_agent(model, actions):
    policy = EpsGreedyQPolicy()
    #earlystop = EarlyStopping(monitor = 'episode_reward', min_delta=.1, patience=5, verbose=1, mode='auto') 
    memory = SequentialMemory(limit=50000,window_length=1)
    #callbacks = [earlystop] 
    nb_steps_warmup = 1000 
    target_model_update = .02 
    #gamma = .99 
    #epochs = training_steps/1000 
    #decay = float(lr/epochs) 
    dqn = DQNAgent(model=model, nb_actions=actions, memory=memory, nb_steps_warmup=nb_steps_warmup, target_model_update = target_model_update, policy=policy)
    return dqn

In [None]:
 #Adam._name = 'hey' ## use in case of error mentioning this parameter as null-
dqn = build_agent(model,actions.n)

lr = .0001 
dqn.compile(Adam(learning_rate=lr), metrics=['mae'],)

training_steps = 1000000

dqn.fit(env, nb_steps=training_steps, visualize=False, verbose=1) 


Training for 2000000 steps ...
Interval 1 (0 steps performed)


  243/10000 [..............................] - ETA: 6s - reward: -4.4510

  updates=self.state_updates,


66 episodes - episode_reward: -227.677 [-1317.158, 270.371] - loss: 20.257 - mae: 14.597 - mean_q: -16.980

Interval 2 (10000 steps performed)
41 episodes - episode_reward: -112.587 [-207.230, 199.116] - loss: 15.823 - mae: 16.921 - mean_q: -5.913

Interval 3 (20000 steps performed)
27 episodes - episode_reward: -148.252 [-298.076, 210.489] - loss: 9.883 - mae: 24.083 - mean_q: -0.509

Interval 4 (30000 steps performed)
11 episodes - episode_reward: -116.290 [-284.742, -48.945] - loss: 7.375 - mae: 27.945 - mean_q: 16.325

Interval 5 (40000 steps performed)
10 episodes - episode_reward: -29.039 [-97.411, 161.585] - loss: 6.071 - mae: 29.228 - mean_q: 25.087

Interval 6 (50000 steps performed)
10 episodes - episode_reward: -49.468 [-95.597, 1.592] - loss: 3.402 - mae: 25.861 - mean_q: 25.156

Interval 7 (60000 steps performed)
11 episodes - episode_reward: -74.742 [-131.813, 16.006] - loss: 2.497 - mae: 24.860 - mean_q: 29.744

Interval 8 (70000 steps performed)
15 episodes - episode_re

In [None]:
scores = dqn.test(env, nb_episodes=100, visualize=False)
print(np.mean(scores.history['episode_reward']))

In [None]:
#lets test again with more 15 episodes
_ = dqn.test(env, nb_episodes=15, visualize=True)
env.close()

In [None]:
dqn.save_weights('LunarLander-v2_weights.h5f',overwrite=True)

In [None]:
#Amazing