In [44]:
from aitk.robots.cart import CartWorld

In [45]:
world = CartWorld(quiet=True)

In [46]:
world.watch()

Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xdb\x00C\x00\x08\x06\x0…

In [47]:
robot = world.robots[0]
robot.watch(show_robot=False)

HTML(value='<style>img.pixelated {image-rendering: pixelated;}</style>')

VBox(children=(Image(value=b'', layout="Layout(display='none', height='auto', width='-webkit-fill-available')"…

In [48]:
import random
import math
# 12 degrees:
theta_max = 12 * math.pi / 180
print("theta_max is", theta_max)

def balance(robot):
    robot.reward += 1
    x, x_v, theta, theta_v = robot.get_observation()
    ok = ((-2.4 < x < 2.4) and
          (-theta_max < theta < theta_max) and
          (robot.reward < 200))
    if ok:
        if theta > 0:
            robot.move(1)
        else:
            robot.move(-1)
    else:
        return robot.reward # out of bounds

theta_max is 0.20943951023931953


In [49]:
world.reset()
reward = world.run([balance], real_time=True)
print("Reward:", reward)

0it [00:00, ?it/s]

Reward: 38


# Finding a Winning Policy

In [50]:
from aitk.algorithms import GeneticAlgorithm
from aitk.networks import SimpleNetwork

In [105]:
class GA(GeneticAlgorithm):
    def __init__(self, popSize, world):
        self.network = SimpleNetwork(
            4,
            24,
            24,
            2,
            activation="linear"
        )
        self.world = world
        length = len(self.network.get_weights(flat=True))
        super().__init__(length, popSize)
        
    def balance(self, robot):
        robot.reward += 1
        x, x_v, theta, theta_v = robot.get_observation()
        ok = ((-2.4 < x < 2.4) and
              (-theta_max < theta < theta_max) and
              (robot.reward < 200))
        if ok:
            outputs = self.network.propagate([x, x_v, theta, theta_v])
            index = outputs.argmax()
            action = 1 if index == 1 else -1
            robot.move(action)
        else:
            return robot.reward # out of bounds

    def fitness(self, chromosome, 
                real_time=False, 
                show_progress=False):
        self.network.set_weights(chromosome)
        self.world.reset()
        reward = self.world.run([self.balance], real_time=real_time, 
                                show_progress=show_progress, quiet=True, 
                                interrupt=True)
        return reward
    
    def make_random_gene(self):
        return 1.0 - random.random() * 2.0
    
    def mutate_gene(self, gene):
        return gene + 0.5 - random.random() * 1.0
    
    def is_done(self):
        return self.bestEverScore == 200

In [106]:
ga = GA(50, world)

Genetic algorithm
  Chromosome length: 770
  Population size: 50


In [107]:
ga.network.display(robot.get_observation())

In [109]:
best = ga.evolve(10)

  Maximum number of generations: 10
  Crossover rate: 0.7
  Mutation rate: 0.001
  Elite percentage 0.0
  Elite count: 0
Solution found


In [111]:
ga.fitness(best, real_time=True)

200