In [1]:
import numpy as np
from keras import backend as K
from keras.layers import Dense, Input
from keras.models import Model
from keras.optimizers import Adam
import gym
#from utils import PlotLearning

Using TensorFlow backend.


In [2]:
class Agent(object):
    # alpha & beta - learning rates for actor and critic respectfully, gamma - discount # layer_size need tuning
    def __init__(self, alpha, beta, gamma=0.99, n_actions=4, layer1_size=1024, layer2_size=512, input_dims=8):
        self.gamma = gamma
        self.alpha = alpha
        self.beta = beta
        self.input_dims = input_dims
        self.fc1_dims = layer1_size
        self.fc2_dims = layer2_size
        self.n_actions = n_actions
    
        self.actor, self.critic, self.policy = self.build_actor_critic_network()
        self.action_space = [i for i in range(self.n_actions)]
    
    def build_actor_critic_network(self):
        inpt = Input(shape=(self.input_dims,)) # comma indicates that it will have batch size
        delta = Input(shape=[1]) # calculation of a loss function
        dense1 = Dense(self.fc1_dims, activation='relu')(inpt)
        dense2 = Dense(self.fc2_dims, activation='relu')(dense1) # these two layers are shared by both actor and critic
        probs = Dense(self.n_actions, activation='softmax')(dense2)
        values = Dense(1, activation='linear')(dense2)
        
        # now we will write a custom loss function for keras
        # y_true are the laybels we pass in (in this case it's the action agent took) # an array of 0 and 1
        # y_pred is the output an agent predicted (output of the NN)
        def custom_loss(y_true, y_pred): 
            out = K.clip(y_pred, 1e-8, 1-1e-8) # it's because there is a possibility of taking a log of 0, which is undesirable
            log_likelihood = y_true*K.log(out)
            return K.sum(-log_likelihood-delta)
        
        actor = Model(input=[inpt, delta], output=[probs])
        actor.compile(optimizer=Adam(lr=self.alpha), loss=custom_loss)
        
        critic = Model(input=[inpt], output=[values])
        critic.compile(optimizer=Adam(lr=self.beta), loss='mean_squared_error')
        
        policy = Model(input=[inpt], output=[probs])
        
        return actor, critic, policy
    
    def choose_action(self, observation):
        state = observation[np.newaxis, :] # adds an axis of long 1-dimention
        probabilities = self.policy.predict(state)[0]
        action = np.random.choice(self.action_space, p=probabilities)
        return action
    
    def learn(self, state, action, reward, state_t, done):
        state = state[np.newaxis, :]
        state_t = state_t[np.newaxis, :]
        
        critic_value_t = self.critic.predict(state_t)
        critic_value = self.critic.predict(state)
        
        target = reward + self.gamma*critic_value_t*(1-int(done)) # (1-int(done)) is here because if done=True, we don't want to predict ne value of state_t
        delta = target - critic_value
        
        actions = np.zeros([1, self.n_actions])
        actions[np.arange(1), action] = 1.0
        
        self.actor.fit([state, delta], actions, verbose=0)
        self.critic.fit(state, target, verbose=0)

In [3]:
if __name__ == '__main__':
    agent = Agent(alpha=0.00001, beta=0.00005)
    
    env = gym.make('LunarLander-v2')
    score_history = []
    num_episodes = 2000
    
    for i in range(num_episodes):
        done = False
        score = 0
        observation = env.reset()
        
        while not done:
            action = agent.choose_action(observation)
            observation_t, reward, done, info = env.step(action)
            agent.learn(observation, action, reward, observation_t, done)
            observation = observation_t
            score += reward
            
        score_history.append(score)
        avg_score = np.mean(score_history[-100:]) # last 100 games
        print('episode ', i, 'score %.2f average score %.2f' % (score, avg_score))
        
    filename = 'lunar-lander-actor-critic.png'



AttributeError: module 'gym.envs.box2d' has no attribute 'LunarLander'