In [None]:
import gym
import numpy as np
from gymclass import Notebook
%matplotlib inline


In [None]:
env = gym.make("LunarLander-v2")
env = env.unwrapped
env.seed(1)

In [None]:
%%time
import time
from dqn_agent import DQNAgent


agent = DQNAgent(state_size=8, action_size=4, seed=0, hidden_layer1=64, hidden_layer2=108)


# Load checkpoint, valid checkpoints 200, 400, 800, 1600, 3200, 6400

load_path = "output/LunarLander-ROBIN.ckpt"
agent.load(load_path)


steps = 1000
episode_rewards = [] 
episodes = 100

observation = env.reset()
for ep in Notebook.log_progress(range(episodes)):
    episode_reward = 0.0
    observation = env.reset()
    tic = time.perf_counter() 
    for t in range(steps):
        action = agent.act(observation)
        observation, reward, done, info = env.step(action)
        episode_reward += reward

        # Oops Crashed or flew away, stops early 
        if episode_reward<-250:
            done = True
        if done:
            break
    episode_rewards.append(episode_reward)
    
    
overall_average_reward = np.mean(episode_rewards)
overall_std_reward = np.std(episode_rewards)
print("100 Tries Average Reward=", overall_average_reward)
print("100 Tries Reward Standard Deviation=", overall_std_reward)
print("100 Tries Fitness Percent =", agent.fitness(overall_average_reward))

In [None]:
import matplotlib.pyplot as plt
plt.plot(np.arange(len(episode_rewards)), episode_rewards)
plt.ylabel('Reward')
plt.xlabel('Tries')
plt.savefig("Fitness.png")
plt.show()

In [None]:
import matplotlib.pyplot as plt
plt.plot(np.arange(len(episode_rewards)), sorted(episode_rewards))
plt.ylabel('Reward')
plt.xlabel('Sorted Tries')
plt.savefig("Fitness.png")
plt.show()
count = 0;
for i in sorted(episode_rewards):
    count +=1
    if i>=200:
        break
print("Above 200 reward from the ", count, " percentile")   

In [None]:
a = np.hstack(episode_rewards)
plt.hist(a, bins='auto')  # arguments are passed to np.histogram
plt.title("Histogram with 'auto' bins")
plt.show()