In [None]:
import gym
import tensorflow as tf
import numpy as np

In [None]:
class NESLunar():
    def __init__(self):
        self.env    = gym.make('LunarLander-v2')
        self.epochs = 1000
        self.npop   = 50
        self.alpha  = 0.001
        self.sigma  = 0.3

    def _evaluate(self, _model):
        state = self.env.reset()
        state = np.reshape(state, [1, 8])
        rv    = 0.0
        end   = False

        while not end:
            action = np.argmax(_model.predict(state)[0])
            newState, reward, end, _ = self.env.step(action)
            rv += reward
            state = np.reshape(newState, [1, 8])

        return rv

    def _generateModels(self):
        # Generate model
        input_layer  = tf.keras.Input(shape=[8])
        dense_one    = tf.keras.layers.Dense(124, activation='relu')(input_layer)
        dense_two    = tf.keras.layers.Dense(124, activation='relu')(dense_one)
        dense_three  = tf.keras.layers.Dense(64,  activation='relu')(dense_two)
        output_layer = tf.keras.layers.Dense(4,  activation="softmax")(dense_three)        
 
        return tf.keras.models.Model(inputs=input_layer, outputs=output_layer)

    def _getWeightsBiases(self, _model):
        rvw = [_model.layers[i].get_weights()[0] for i in range(1, len(_model.layers))]
        rvb = [_model.layers[i].get_weights()[1] for i in range(1, len(_model.layers))]

        return rvw, rvb
    
    def _getNoises(self, _models):
        rvn = []

        for idx in range(1,len(_models[0].layers)):
            noise_shape = np.array(_models[0].layers[idx].get_weights()[0]).shape
            temp_noise  = np.random.randn(self.npop, noise_shape[0],noise_shape[1]) * self.sigma

            #add to containers
            rvn.append(temp_noise)

        return rvn

    def _addNoises(self, _models, _noise, _weights, _biases):
        for gen in range(self.npop):
            for idx in range(1,len(_models[0].layers)):
                _updatedWeights = _weights[idx-1] + _noise[idx-1][gen]
                _updatedBiases  = _biases[idx-1]

                _models[gen].layers[idx].set_weights((_updatedWeights, _updatedBiases))

        return _models

    def _evolve(self, _models,_noise,_weights, _rewards):
        for idx in range(1,len(_models[0].layers)):
            update_noise  = _noise[idx - 1]
            updated_noise = np.dot(update_noise.transpose(1,2,0), self._calcStd(_rewards))

            _weights[idx - 1] += self.alpha / (self.sigma * self.npop) * updated_noise
        
        return _weights

    def _calcStd(self, _rewards):
        rv = ((_rewards - np.mean(_rewards)) / (np.std(_rewards)))
        return rv

    def run(self):
        models = [self._generateModels() for _ in range(self.npop)]
        weights, biases = self._getWeightsBiases(models[0])

        rv = [] # Scores and episodes 

        for eps in range(self.epochs):
            rewards = np.zeros(self.npop)
            noise   = self._getNoises(models)
            models  = self._addNoises(models, noise, weights, biases)
            
            for m in range(len(models)):
                rewards[m] = self._evaluate(models[m])

            avg_reward = np.mean(rewards)  
            rv.append((eps,avg_reward))
            if eps%10==0: 
                print ("Episode: ", eps,"Avarage Reward: ", avg_reward)
            
            avg_reward = np.mean(rewards)  
            rv.append((eps,avg_reward))

            if eps % 10 == 0: 
                print ("Episode: ", eps,"Avarage Reward: ", avg_reward)
            
            top_performer = self._evaluate(models[np.argmax(rewards)])
            
            if top_performer > 200:
                models[np.argmax(rewards)].model.save(f"./models/eps_{eps}.hp5")

            weights = self._evolve(models, noise, weights, rewards)       

        return rv


In [None]:
Lunar = NESLunar()
analytics  = Lunar.run()

In [None]:
# Save data
import json

data = {"data": analytics, "eps": [], "rewards": []}
for (e, r) in analytics:
    data["eps"].append(e)
    data["rewards"].append(r)

with open('analytics_data.json', 'w') as fp:
    json.dump(data, fp)