In [None]:
%%javascript

Jupyter.keyboard_manager.command_shortcuts.add_shortcut('r', {
    help : 'run all cells',
    help_index : 'zz',
    handler : function(event) {
        IPython.notebook.execute_all_cells();
        return false;
    }
})

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import time
import gym
import numpy as np

import Logger
import utils
import CartPole_config as config


In [None]:
experiment_name = None
# experiemnt_name = 
max_train_frame = 2e6


In [None]:
run_name = experiment_name or 'RNG_' + utils.time_str()
logdir = './logdir/' + config.env_name + '/RNG/' + run_name
print('logdir\t\t', logdir)

random_seed = int(time.time())
np.random.seed(random_seed)
print('random seed\t', random_seed)


In [None]:
def run():
    try:
        env = gym.make(config.env_name)
        logger = Logger.Logger(logdir)
        frame = 0
        episode = 0
        rewards = []
        
        env.reset()
        ep_t = 0
        ep_r = 0
        ep_r_clip = 0
        while frame < max_train_frame:
            frame += 1
            ep_t += 1
            
            action = np.random.randint(config.num_action)
            _, reward, done, _ = env.step(action)
            ep_r += reward
            reward = np.clip(reward, -1, 1)
            ep_r_clip += reward
            
            if done:
                episode += 1
                rewards.append(ep_r)
                logger.log_scalar('performance/episode_len', ep_t, frame)
                logger.log_scalar('performance/reward',      ep_r, frame)
                logger.log_scalar('performance/reward_clip', ep_r_clip, frame)
                logger.log_scalar('performance/episodes',    episode, frame)
                
                env.reset()
                done = False
                ep_t = 0
                ep_r = 0
                ep_r_clip = 0
#                 break
                if episode % 5000 == 0:
                    print('episode:', episode, '\tframe', frame)
    except KeyboardInterrupt:
        print('KeyboardInterrupt')
    print('Done')
    return rewards

rewards = run()

In [None]:
print('len', len(rewards))
print('min', np.min(rewards))
print('max', np.max(rewards))
print('mean', np.mean(rewards))
print('sd', np.std(rewards))

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

rewards_s = np.sort(rewards)

def make_plot(val_list, ax):
    means = np.zeros_like(val_list) + np.mean(val_list)
    std = np.std(val_list)
    ax.plot(val_list, label='Rewards')
    ax.plot(range(len(val_list)), means, c='k', label='Mean')
    ax.plot(range(len(val_list)), means + std , c='g', label='Standard deviation')
    ax.plot(range(len(val_list)), means - std , c='g')
    ax.legend()

    
fig = plt.figure()
ax = fig.add_subplot(111)
make_plot(rewards_s, ax)
ax.set_ylabel('Reward')
ax.set_xlabel('Episode')
ax.set_title('Random agent')

plt.tight_layout()
plt.show()
