In [2]:
# It is good to refer github following as : 
# https://github.com/shivaverma/OpenAIGym/blob/master/lunar-lander/discrete/lunar_lander.py
import gym
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense

In [3]:
class DQN(tf.keras.Model):
    def __init__(self, state_size, action_size):
        super(DQN, self).__init__()
        self.fc1 = Dense(64,activation='relu')
        self.fc2 = Dense(64,activation='relu')
        self.out = Dense(action_size,kernel_initializer=tf.keras.initializers.RandomUniform(-1e-3,1e-3))

    def call(self,x):
        x = self.fc1(x)
        x = self.fc2(x)
        q = self.out(x)
        return q

In [4]:
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size= action_size
        
        self.model = DQN(self.state_size, self.action_size)
        self.model.load_weights("./save_model/LunarLanderv2_dqn_TF")
        
    def get_action(self, state):
        state = np.reshape(state, [1, self.state_size])
        q_value = self.model(state)
        return np.argmax(q_value[0])

In [5]:
%matplotlib tk

ENV_NAME = 'LunarLander-v2'
EPISODES = 10

if __name__ == "__main__":
    env = gym.make(ENV_NAME)
    state_size = env.observation_space.shape[0]
    action_size = env.action_space.n

    agent = DQNAgent(state_size, action_size)
    print('Env Name : ',ENV_NAME)
    print('States {}, Actions {}'
            .format(state_size, action_size))

    scores, episodes, = [], []
    score_avg = 0
    
    for e in range(EPISODES):
        done = False
        score = 0

        state = env.reset()

        while not done:
            env.render()

            action = agent.get_action(state)
            next_state, reward, done, info = env.step(action)

            score += reward
            state = next_state
            
            if done:
                print('epi: {:3d} | score {:3.2f}'.format(e+1, score))
                scores = np.append(scores,score)
    print('Avg. score {:4.2f}'.format(tf.reduce_mean(scores)))

Env Name :  LunarLander-v2
States 8, Actions 4
epi:   1 | score 256.56
epi:   2 | score 285.96
epi:   3 | score 263.00
epi:   4 | score 271.77
epi:   5 | score -39.32
epi:   6 | score -24.88
epi:   7 | score 192.51
epi:   8 | score 275.12
epi:   9 | score 251.50
epi:  10 | score 197.37
Avg. score 192.96


In [None]:
env.close()

  self.func()
