In [1]:
import itertools
import imageio

def run_episode(env, model, render=False, record=False):
    images = []
    obs = env.reset()
    for t in itertools.count():
        if record:
            images.append(env.render(mode='rgb_array'))
        if render:
            env.render()
        action = model.act(obs)
        obs, reward, done, info = env.step(action)
        if done:
            print("Score: " + str(t + 1))
            env.close()
            break
            
    if record:
        imageio.mimsave('cartpole.gif', [np.array(img) for i, img in enumerate(images) if i%2 == 0], fps=29)

In [2]:
import gym

env = gym.make("CartPole-v1")
env.seed(0)

[0]

In [3]:
from actor_critic import ActorCritic

actor = ActorCritic(env)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [4]:
def train(env, actor, episodes=100):
    returns = []
    mean = 0
    for episode in range(1, episodes+1):
        obs = env.reset()
        score = 0
        for t in itertools.count():
            action = actor.act(obs)
            obs2, reward, done, _ = env.step(action)
            actor.remember(obs, action, reward, obs2, done)
            actor.train()
            obs = obs2
            score += reward
            if done:
                returns.append(score)
                mean += score/10
                if episode % 10 == 0:
                    print("Episode: " + str(episode) + ", Mean Score: " + str(mean))
                    if mean == 500:
                        return returns
                    mean = 0
                break
    
    return returns

In [None]:
returns = train(env, actor, episodes=200)

Episode: 10, Mean Score: 13.899999999999999, e: 0.01
Episode: 20, Mean Score: 9.2, e: 0.01
Episode: 30, Mean Score: 9.2, e: 0.01
Episode: 40, Mean Score: 10.0, e: 0.01
Episode: 50, Mean Score: 17.2, e: 0.01
Episode: 60, Mean Score: 9.8, e: 0.01
Episode: 70, Mean Score: 13.1, e: 0.01
Episode: 80, Mean Score: 25.8, e: 0.01
Episode: 90, Mean Score: 48.699999999999996, e: 0.01
Episode: 100, Mean Score: 51.49999999999999, e: 0.01
Episode: 110, Mean Score: 100.7, e: 0.01
Episode: 120, Mean Score: 77.69999999999999, e: 0.01
Episode: 130, Mean Score: 42.99999999999999, e: 0.01
Episode: 140, Mean Score: 38.300000000000004, e: 0.01
Episode: 150, Mean Score: 100.19999999999999, e: 0.01
Episode: 160, Mean Score: 376.79999999999995, e: 0.01
Episode: 170, Mean Score: 446.3, e: 0.01


In [None]:
run_episode(env, actor, render=True)