In [2]:
from editor import Editor
from Box2D.b2 import pi

# edit your model by running this cell
# hit 's' to save while editing to model.json

editor = Editor()
editor.load('bisected.json')
editor.run()

File saved as: bisected.json


In [None]:
%matplotlib inline
from matplotlib import pyplot as plt

import numpy as np
from IPython.display import clear_output

import pygame
from pygame.locals import (KEYDOWN, K_RIGHT, K_LEFT, K_r, K_s)

from Box2D.b2 import pi

from agent import Agent
from engine import Engine

from time import sleep

# train your model to (hopefully) walk by running this cell

# Physics engine and window
LOWER_LIM = -.4 * np.pi
UPPER_LIM = .4 * np.pi
LIMIT = False
ANGLE_RANGE = UPPER_LIM - LOWER_LIM if LIMIT else np.pi*2.0
engine = Engine(caption='Train', width=1280, height=720, gravity=(0, -20), joint_limit=LIMIT, \
               upper_angle=UPPER_LIM, lower_angle=LOWER_LIM, linear_damping=0.0, angular_damping=0)

engine.load('star.json')

# Add floor
engine.add_static_body(p=(engine.width/2, engine.height-10), size=(engine.width, 10))

# Features
ENCODING_SIZE = 32 # buckets for body angles

def features():
    f = []
    
    
    """for j in engine.world.joints:
        a = j.angle % ANGLE_RANGE
        a /= ANGLE_RANGE
        a = int(a * ENCODING_SIZE)
        encoding = [0] * ENCODING_SIZE
        encoding[a] = 1
        f.extend(encoding)"""
        
    """
    for b in engine.world.bodies:
        if b.userData and isinstance(b.userData, dict):
            a = b.angle % ANGLE_RANGE
            a /= ANGLE_RANGE
            a = int(a * ENCODING_SIZE)
            encoding = [0] * ENCODING_SIZE
            encoding[a] = 1.0
            f.extend(encoding)"""
    
    for b in engine.world.bodies:
        if b.userData and isinstance(b.userData, dict):
            a = b.angle % ANGLE_RANGE
            a /= ANGLE_RANGE
            a = a * 2.0 - 1.0
            f.append(a)
                
    return np.array([f])

def act(action):
    #for j in engine.world.joints:
        #j.motorSpeed = 0.0
        
    #for b in engine.world.bodies:
        #if b.userData:
            #b.angularVelocity = 0
            #b.linearVelocity = (0, b.linearVelocity[1])
        
    motorSpeed = 4.0
    cnt = len(engine.world.joints)
    if action > cnt-1:
        motorSpeed *= -1
        action -= cnt
    joint = engine.world.joints[action]
    joint.motorSpeed = motorSpeed
    
def body_position():
    avg = np.zeros([2])
    cnt = 0
    for b in engine.world.bodies:
        if b.userData:
            avg += [b.position[0], b.position[1]]
            cnt += 1
    avg /= cnt
    return engine.to_screen(avg)

def set_position(position, zero_vel=True):
    current = engine.to_pybox2d(body_position())
    position = engine.to_pybox2d(position)
    shift = (position[0] - current[0], position[1] - current[1])
    for b in engine.world.bodies:
        if b.userData:
            if zero_vel:
                b.linearVelocity = (0, 0)
                b.angularVelocity = 0
            b.position = (b.position[0] + shift[0], b.position[1] + shift[1])

body_count = len(engine.world.bodies) - 1
joint_count = len(engine.world.joints)

if body_count == 0 or joint_count == 0:
    engine.close()
    raise ValueError("Your model doesn't have any physics bodies or joints")

agent = Agent(features=features().shape[1], actions=joint_count*2, hdim=1024, gamma=0.9, learning_rate=0.0001, \
             max_memory=10000)
agent.eps = .2

rs = []
avg_r = 0.0
avg_action = np.zeros([agent.actions])

speedup = False
running = True
while running:
    
    # train
    if len(agent.M) > 0:
        clear_output(wait=True)
        #print('train. avg_r: {}'.format(avg_r))
        fig = plt.plot(rs, 'b')
        plt.plot([0, len(rs)], [0, 0], 'r')
        plt.show()
        if len(rs) > 5000:
            rs = rs[2500:]
        agent.train(iters=min(len(agent.M), 500), batches=4)
        
    t = 0
    
    set_position((engine.width/2.0, engine.height/2.0 + 200))
    
    last_x = body_position()[0]
    
    s1 = features()
    q1 = agent.q_approx(s1)
    a1 = agent.sample_action(q1)
    
    reset = False
    while not reset:
        for event in engine.events():
            if engine.quit_event(event):
                running = False
                reset = True
            if event.type == KEYDOWN:
                k = pygame.key.get_pressed()
                if k[K_RIGHT]:
                    agent.eps += .1
                elif k[K_LEFT]:
                    agent.eps -= .1
                elif k[K_r]:
                    agent.reset()
                elif k[K_s]:
                    speedup = not speedup

        # build xp tuple
        xp = {'s1': s1.copy(), 'a1': a1}

        # act a1
        act(a1)
        
        # keep track
        avg_action[a1] += 1
        
        # step physics engine multiple times
        engine.step_physics(1)

        # reward move right relative
        reward = body_position()[0] - last_x
        
        # reward height
        #reward += (engine.height - body_position()[1])/engine.height
        
        xp['r'] = reward
        
        avg_r += .001 * (reward - avg_r)
        rs.append(avg_r)
                             
        # state2
        s2 = features()
        q2 = agent.q_approx(s2)
        a2 = agent.sample_action(q2)

        xp['s2'] = s2.copy()
        xp['a2'] = a2

        agent.memorize(xp)
        if t > 1000:
            reset = True
        
        if not speedup:
            clear_output(wait=True)
            print(reward)
            print('s1: {} avg_r: {} eps: {} memories: {}\nqs: {}\navg_action: {}'\
                  .format(s1, np.round(avg_r, 1), agent.eps, len(agent.M), q2, avg_action))
            engine.render()
            engine.clock_tick()

        # prepare for next rounds
        s1 = s2
        a1 = a2
        last_x = body_position()[0]
        t += 1

        if last_x > engine.width-50.0 or last_x < 50.0:
            reset = True
            engine.render()
            engine.clock_tick()

engine.close()

0.237596299913
s1: [[ 0.0645169  -0.44395837  0.33231292  0.52781859  0.12029492 -0.16582741
   0.9508208  -0.12812418 -0.62319344]] avg_r: 0.0 eps: 0.6 memories: 10000
qs: [[ 0.01136299  0.0144719   0.01207203  0.00921699  0.01685199  0.00887069
   0.00080969  0.15513938  0.00891549  0.01073637  0.01193487  0.00632068
   0.01619203  0.01247106  0.01085461  0.01424596]]
avg_action: [  1563.   1558.   1483.   1467.   1435.   1501.   1473.  24554.   1484.
   1489.   1512.   1467.   1494.   1557.   1524.   1501.]
