In [None]:
from editor import Editor
from Box2D.b2 import pi

# edit your model by running this cell
# to add a body part hit b and it will spawn under your mouse
# to move body parts drag them with the mouse
# to connect body parts, hit p to pin two bodies at the mouse location
# to change a body part's size, click and hold, then use the arrow keys to modify width/height
# hit 's' to save while editing to model.json

editor = Editor()
editor.load('creatures/pole.json')
editor.run()

In [2]:
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
from IPython.display import clear_output
import pygame
from pygame.locals import (KEYDOWN, K_RIGHT, K_LEFT, K_UP, K_DOWN, K_r, K_s, K_q)
from Box2D.b2 import pi
from engine import Engine
from time import sleep

# train your model to (hopefully) walk by running this cell

# hit s to speed up training, and s again to watch the best set of weights
# use the up and down arrow keys to introduce more or less randomness in weight changes


FILE = 'creatures/two_legs.json'
WEIGHT_FILE = None

TIMESTEPS = 240 # timesteps per episode
HIDDEN = 64

# Physics engine and window
LOWER_LIM = -.2 * np.pi
UPPER_LIM = .2 * np.pi
LIMIT = True
ANGLE_RANGE = UPPER_LIM - LOWER_LIM if LIMIT else np.pi*2.0

engine = Engine(caption='Train', width=1280, height=720, gravity=(0, -50), joint_limit=LIMIT, \
               upper_angle=UPPER_LIM, lower_angle=LOWER_LIM, linear_damping=0.0, angular_damping=0.0)

# Add floor
engine.add_static_body(p=(engine.width/2, engine.height-10), size=(engine.width * 10, 10))

# Load creature
engine.load(FILE)

body_count = len(engine.world.bodies) - 1 # subtract ground body
joint_count = len(engine.world.joints)

if body_count == 0 or joint_count == 0:
    engine.close()
    raise RuntimeError("Your model doesn't have any physics bodies or joints")

def features(time_step=0):
    f = []
    
    for b in engine.world.bodies:
        if b.userData and isinstance(b.userData, dict):
            a = b.angle % ANGLE_RANGE
            a /= ANGLE_RANGE
            a = a * 2.0 - 1.0
            f += [a]
            f += [b.linearVelocity[0]/100]
            f += [b.linearVelocity[1]/100]
            
            #p = engine.to_screen(b.position)
            #f += [p[0]/engine.width]
            #f += [p[1]/engine.height]
            #f += [np.sin(b.angle)]
            #f += [np.cos(b.angle)]
    
    #f += [np.sin(time_step / 2.0)] # Creature's heartbeat? :-)
    
    return np.array([f])
    
def body_position():
    avg = np.zeros([2])
    for b in engine.world.bodies:
        if b.userData:
            avg += [b.position[0], b.position[1]]
    avg /= body_count
    return engine.to_screen(avg)

def set_position(position, zero_vel=True):
    current = engine.to_pybox2d(body_position())
    position = engine.to_pybox2d(position)
    shift = (position[0] - current[0], position[1] - current[1])
    for b in engine.world.bodies:
        if b.userData:
            if zero_vel:
                b.linearVelocity = (0, 0)
                b.angularVelocity = 0
            b.position = (b.position[0] + shift[0], b.position[1] + shift[1])
                
def relu(x):
    return x * (x > 0)

def act(torques):
    for i, j in enumerate(engine.world.joints):
        j.motorSpeed = torques[0, i] * 4
        
class Stream(object):
    def __init__(self, xdim, hdim, ydim):
        self.W1 = np.random.randn(xdim, hdim) * .5
        self.W2 = np.random.randn(hdim, ydim) * .5
        self.x_trace = np.zeros([1, xdim])
        self.h_trace = np.zeros([1, hdim])
        self.y_trace = np.zeros([1, ydim])
        
    def act(self, x):
        return np.tanh(x)
        
    def ff(self, features):
        z1 = features.dot(self.W1)
        h1 = self.act(z1)
        z2 = h1.dot(self.W2)
        h2 = self.act(z2)
        
        decay = 0.8
        self.x_trace = self.x_trace * decay + features
        self.h_trace = self.h_trace * decay + h1
        self.y_trace = self.y_trace * decay + h2
        
        return h2
    
    def train(self, reward, learning_rate=0.1):
        # update weights, either strengthen or decrease connections based on traces
        W1_update = np.multiply(self.x_trace.T, self.h_trace)
        W2_update = np.multiply(self.h_trace.T, self.y_trace)
        
        trace = np.concatenate((self.x_trace.ravel(), self.h_trace.ravel(), self.y_trace.ravel()))
        if np.sum(np.abs(trace)) > 3:
            self.W1 *= 0.998
            self.W2 *= 0.998
        else:
            self.W1 += np.random.randn(self.W1.shape[0], self.W1.shape[1]) * 0.1
            self.W2 += np.random.randn(self.W2.shape[0], self.W2.shape[1]) * 0.1
        
        self.W1 += W1_update * reward * learning_rate
        self.W2 += W2_update * reward * learning_rate
        

stream_model = Stream(features(0).shape[1], HIDDEN, joint_count)

speedup = False
running = True

rs = []
episode = 0

while running:
    
    if episode % 50 == 0:
        clear_output(wait=True)
        if len(rs) > 100:
            rs = rs[-100:]
        fig = plt.plot(rs, 'b')
        plt.plot([0, len(rs)], [0, 0], 'r')
        plt.show()
        
        print('episode: {}'.format(episode))

    # new episode
    engine.clear_all_but_ground()
    engine.load(FILE)
    
    set_position((engine.width/2, engine.height/2))
    
    for j in engine.world.joints:
        j.motorSpeed = np.random.randn()
    
    engine.step_physics(50)
    for b in engine.world.bodies:
        b.linearVelocity = (0, 0)
        b.angularVelocity = 0
    
    t = 0
    episode += 1
    
    reset = False
    while not reset:
        # keyboard input
        for event in engine.events():
            if engine.quit_event(event):
                running = False
                reset = True
            if event.type == KEYDOWN:
                k = pygame.key.get_pressed()
                if k[K_q]:
                    running = False
                elif k[K_s]:
                    speedup = not speedup
                    if not speedup:
                        reset = True
        
        # act
        p0 = body_position()
        f = features(t)
        torques = stream_model.ff(f)
        clear_output(wait=True)
        print(torques)
        act(torques)
        
        engine.step_physics(1)

        # reward
        p1 = body_position()
        reward = p1[0] - p0[0]
        
        # train
        stream_model.train(reward, learning_rate=0.01)
        
        rs += [reward]
        #new_p = body_position()
        #reward = np.average([b.position[1] for b in engine.world.bodies])
        #reward += (new_p[0] - last_p[0])
                                             
        #if t > TIMESTEPS:
        #    engine.render()
        #    reset = True
        
        if not speedup:
            engine.render()
            engine.clock_tick()

        # prepare for next rounds
        t += 1

engine.close()

[[ 1. -1.  1. -1.]]


In [8]:
stream_model.W1

array([[  1.28960407e+01,   1.11120226e+00,  -1.30159012e+01,
          1.97877497e+00,   3.42501186e-01,  -1.44959569e+01,
          1.42569463e+00,  -1.51703240e+01,   1.19298456e+01,
         -1.83829681e+01,  -6.61023008e-01,  -7.51234761e-01,
          1.30268652e+01,   1.37013104e+01,  -8.12597830e+00,
          1.41668945e+01,   7.98188427e-01,  -1.42571791e+01,
          1.05813763e+00,  -1.14134811e+01,  -1.40112452e+01,
         -1.63145551e-01,  -3.41900368e+00,   1.83953641e+00,
          1.25618834e+01,  -1.64820426e+01,   1.82073225e+01,
          1.62814579e+01,   2.03053619e+01,  -3.76944074e-01,
          1.37526327e+01,   9.21369972e+00,  -1.09291064e+00,
         -1.08098370e+00,  -7.05264989e-01,  -1.31172335e+01,
         -1.09662597e+00,  -1.52260578e+01,   1.17600755e+01,
          2.36511393e+00,   1.37072923e+01,  -1.31230008e+01,
          8.66914841e-02,   1.36202205e+00,  -4.12261829e-01,
         -1.87863632e+01,  -1.80192336e+01,  -1.99643915e+01,
        