In [277]:
import pygame, sys, random
import numpy as np
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import Adam
import math
import matplotlib.pyplot as plt
import pylab
from fractions import Fraction

num_episodes = 1

obstacleRadius = 10
agentRadius = 10

# get size of state and action from environment100

boundaryPos = [100, 100]
boundaryLength = [70,70]
boundaryRadius = 40
dispSize = [1280, 960]
initPosAgentStandard = [dispSize[0] / 2, dispSize[1] / 2]
initPosAgent = initPosAgentStandard#[boundaryPos[0] + boundaryLength[0] / 2, boundaryPos[1] + boundaryLength[1] / 2]
goalPos = [200, 200]
goalAngle = 0#random.randrange(0, 360) * math.pi / 180

moveObstacles = True
action_size = 9
obsNumber = 30
state_size = 2
# state_size = obsNumber + 1



In [2]:
# A2C(Advantage Actor-Critic) agent
class A2CAgent:
    def __init__(self, state_size, action_size):
        self.load_model = True
        
        # get size of state and action
        self.state_size = state_size
        self.action_size = action_size
        self.value_size = 1

        # These are hyper parameters for the Policy Gradient
        self.discount_factor = 0.99
        self.actor_lr = 0.00002
        self.critic_lr = 0.00005

        # create model for policy network
        self.actor = self.build_actor()
        self.critic = self.build_critic()

        if self.load_model:
            self.actor.load_weights("./Practice004_DataSave/Actor_Rev.h5")
            self.critic.load_weights("./Practice004_DataSave/Critic_Rev.h5")

    # approximate policy and value using Neural Network
    # actor: state is input and probability of each action is output of model
    def build_actor(self):
        actor = Sequential()
        actor.add(Dense(128, input_dim=self.state_size, activation='relu', kernel_initializer='glorot_normal'))
        actor.add(Dense(self.action_size, activation='softmax', kernel_initializer='glorot_normal'))
        actor.summary()
        # See note regarding crossentropy in cartpole_reinforce.py
        actor.compile(loss='categorical_crossentropy', optimizer=Adam(lr=self.actor_lr))
        return actor

    # critic: state is input and value of state is output of model
    def build_critic(self):
        critic = Sequential()
        critic.add(Dense(128, input_dim=self.state_size, activation='relu', kernel_initializer='glorot_normal'))
        critic.add(Dense(self.value_size, activation='linear', kernel_initializer='glorot_normal'))
        critic.summary()
        critic.compile(loss="mse", optimizer=Adam(lr=self.critic_lr))
        return critic

    # using the output of policy network, pick action stochastically
    def get_action(self, state):
        policy = self.actor.predict(state, batch_size=1).flatten()
#         print policy
        return policy
#         return np.random.choice(self.action_size, 1, p=policy)[0]

    # update policy network every episode
    def train_model(self, state, action, reward, next_state, done):
        target = np.zeros((1, self.value_size))
        advantages = np.zeros((1, self.action_size))

        value = self.critic.predict(state)[0]
        next_value = self.critic.predict(next_state)[0]

        if done:
            advantages[0][action] = reward - value
            target[0][0] = reward
        else:
            advantages[0][action] = reward + self.discount_factor * (next_value) - value
            target[0][0] = reward + self.discount_factor * next_value

        self.actor.fit(state, advantages, epochs=1, verbose=0)
        self.critic.fit(state, target, epochs=1, verbose=0)

In [4]:
def stateGenerator(obsPosition, agtPosition, idx):
    returnSum = []
    if idx != -1:
        returnSum = returnSum + [agtPosition[0] - obsPosition[idx][0], agtPosition[1] - obsPosition[idx][1]]
    else:
        returnSum = returnSum + [agtPosition[0] - obsPosition[0], agtPosition[1] - obsPosition[1]]
    returnSum = np.reshape(returnSum, [1, 2])
    return returnSum

In [5]:
def takeAction(action):
    xAction = 0
    yAction = 0
    if action == 0:
        xAction = 1
    elif action == 1:
        xAction = 1
        yAction = 1
    elif action == 2:
        xAction = 1
        yAction = -1            
    elif action == 3:
        xAction = -1
        yAction = 1
    elif action == 4:
        xAction = -1
    elif action == 5:
        xAction = -1
        yAction = -1
    elif action == 6:
        yAction = -1
    elif action == 7:
        yAction = 1
    elif action == 8:
        xAction = 0
        yAction = 0
        
    return [xAction, yAction]

In [6]:
def rangeFinder(allObsPos, rangeCenter):
    countObs = 0
    rangeObstacle = [[0,0] for _ in range(obsNumber)]
    for i in range(0, obsNumber):
        if math.sqrt((rangeCenter[0] - allObsPos[i][0])**2 + (rangeCenter[1] - allObsPos[i][1])**2) < boundaryRadius:
            rangeObstacle[countObs] = allObsPos[i]
            countObs += 1
            
    return [countObs, rangeObstacle]

In [7]:
def goalFinder(agtPos):
    goalAngle = math.atan(1.0*(goalPos[1]-agtPos[1])/(goalPos[0]-agtPos[0]))
    tmpGoal = [0,0]
    tmpGoal[0] = int(math.floor(agtPos[0] - boundaryRadius * math.cos(goalAngle)))
    tmpGoal[1] = int(math.floor(agtPos[1] - boundaryRadius * math.sin(goalAngle)))
    return tmpGoal

In [286]:
pygame.init()
screen = pygame.display.set_mode(dispSize)
screen.fill([200, 200, 200])

# make A2C agent
agent = A2CAgent(state_size, action_size)

rList, episodes = [], []

# Make Obstacles (obsNumber)
obstaclePos = [[0, 0] for _ in range(obsNumber)]
for i in range(0,obsNumber):
    obsRadius = random.randrange(agentRadius + obstacleRadius + 1, 300)
    obsAngle = random.randrange(0,360) * math.pi / 180
    obstaclePos[i][0] = int(initPosAgent[0] + obsRadius * math.cos(obsAngle)) #boundaryPos[0] + random.randrange(1, boundaryLength[0])
    obstaclePos[i][1] = int(initPosAgent[1] + obsRadius * math.sin(obsAngle)) #boundaryPos[1] + random.randrange(1, boundaryLength[1])

for e in range(num_episodes):
    # Initialize
    done = False
    score = 0
    x = initPosAgent[0]
    y = initPosAgent[1]
    print("Episode ", e, "Starts!")
    
    while not done:
        [rangeObsNumber, rangeObsPos] = rangeFinder(obstaclePos, initPosAgent)
#         print rangeObsNumber
        tmpAction = []
        for i in range(0,rangeObsNumber):
            state = stateGenerator(rangeObsPos, [x,y], i)
            policyArr = agent.get_action(state)
            if i == 0:
                tmpAction = (1 - policyArr)
            else:
                tmpAction = tmpAction * (1 - policyArr)
        print("0: ", tmpAction)
        tmpAction = [round(elem,0) for elem in tmpAction]
        if rangeObsNumber == 0:
            tmpAction = [1.0/9.0 for _ in range(0, 9)]
        
        tmpGoalPos = goalFinder([x, y])
        state = stateGenerator(tmpGoalPos, [x,y], -1)
        policyArr = agent.get_action(state)
        
        print("1: ", tmpAction)
#         for i in range(0,9):
#             if policyArr[i] == max(policyArr):
#                 break
#         policyArr = [1.0 for _ in range(0,9)]
#         policyArr[i] = 10000.0
        tmpAction = tmpAction * np.asarray(policyArr)
        
        tmpAction = tmpAction / np.sum(tmpAction)
        print("2: ", policyArr)
        print("3: ", tmpAction)
        action = np.random.choice(action_size, 1, p = tmpAction)[0]
        
        xMove = 0
        yMove = 0

        [xMove, yMove] = takeAction(action)

        x = x + xMove
        y = y + yMove

        wallFlag = 0
        collisionFlag = 0
#         [x, y] = ckWall(x, y)
        pygame.draw.circle(screen, [100, 100, 255], [x,y], 10, 0)
#         next_state = stateGenerator(obstaclePos, [x,y])
        initPosAgent = [x,y]
#         if(math.sqrt((x - initPosAgent[0])**2 + (y - initPosAgent[1])**2) >= agentRadius):
#             initPosAgent = [x, y]
#             obstaclePos = [[0, 0] for _ in range(obsNumber)]
#             for i in ranghttps://www.youtube.com/watch?v=agwf-P5FhfMe(0,obsNumber):
#                 obsRadius = random.randrange(agentRadius + obstacleRadius + 1, boundaryRadius)
#                 obsAngle = random.randrange(0,360) * math.pi / 180
#                 obstaclePos[i][0] = int(initPosAgent[0] + obsRadius * math.cos(obsAngle))
#                 obstaclePos[i][1] = int(initPosAgent[1] + obsRadius * math.sin(obsAngle))
#             goalArc = math.atan2(y - initPosAgent[1], x - initPosAgent[0]) * 180 / math.pi
#             if goalArc > -30 and goalArc < 30:
        if math.sqrt((x -  goalPos[0])**2 + (y - goalPos[1])**2) <= agentRadius:
            print("Goal Reached!")
            collisionFlag = 1
            done = True
        for i in range(0,obsNumber):
            if moveObstacles:
                obstaclePos[i][0] = obstaclePos[i][0] + random.randrange(-1,2)
                obstaclePos[i][1] = obstaclePos[i][1] + random.randrange(-1,2)
                
            pygame.draw.circle(screen, [255, 50, 50], obstaclePos[i], 10, 0)
            if math.sqrt((x - obstaclePos[i][0])**2 + (y - obstaclePos[i][1])**2) < 20:
                print("Collision!")
                collisionFlag = -1
                done = True
#                 break
        
        if not done:
            reward = 0.1

        else:
            if collisionFlag == 1:
                reward = 10000 * math.cos(math.atan2(y - initPosAgent[1], x - initPosAgent[0]))
                rList.append(1)
            elif collisionFlag == -1:
                reward = -10000
                rList.append(0)
        
#         agent.train_model(state, action, reward, next_state, done)
        
        score += reward
#         state = next_state
#         if score >= 10000:
#             print "Success!"
#             done = True
        pygame.draw.circle(screen, [255,100,100], initPosAgent, boundaryRadius, 2)

        if done:
            # every episode, plot the play time
            initPosAgent = initPosAgentStandard
            obstaclePos = [[0, 0] for _ in range(obsNumber)]
            for i in range(0,obsNumber):
                obsRadius = random.randrange(agentRadius + obstacleRadius + 1, 300)
                obsAngle = random.randrange(0,360) * math.pi / 180
                obstaclePos[i][0] = int(initPosAgent[0] + obsRadius * math.cos(obsAngle))
                obstaclePos[i][1] = int(initPosAgent[1] + obsRadius * math.sin(obsAngle))

            episodes.append(e)
#             pylab.plot(episodes, rList, 'b')
            
        #circle(Surface, color, pos, radius, width=0)
        pygame.draw.circle(screen, [100,255,100], goalPos, 10, 2)
        pygame.draw.circle(screen, [0, 255, 200], tmpGoalPos, 5, 5)
        #rect(Surface, color, Rect, width=0)
#         pygame.draw.rect(screen, [255,100,100],[boundaryPos[0] - agentRadius, boundaryPos[1] - agentRadius, boundaryLength[0] + agentRadius * 2, boundaryLength[1] + agentRadius * 2],2)
        pygame.display.flip()
        screen.fill([220,220,220])
    print score
    # save the model
#     if e % 50 == 0:
#         agent.actor.save_weights("./Practice004_DataSave/Actor_Rev.h5")
#         agent.critic.save_weights("./Practice004_DataSave/Critic_Rev.h5")
#         pylab.savefig("./Practice004_DataSave/ActorCriticGraph.png")

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1081 (Dense)           (None, 128)               384       
_________________________________________________________________
dense_1082 (Dense)           (None, 9)                 1161      
Total params: 1,545
Trainable params: 1,545
Non-trainable params: 0
_________________________________________________________________
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1083 (Dense)           (None, 128)               384       
_________________________________________________________________
dense_1084 (Dense)           (None, 1)                 129       
Total params: 513
Trainable params: 513
Non-trainable params: 0
_________________________________________________________________
('Episode ', 0, 'Starts!')
('0: ', array([ 1.        ,  0.0033237 ,  0.99680

('1: ', [1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0])
('2: ', array([  2.04494760e-28,   2.67718092e-24,   5.40521200e-15,
         1.38004498e-26,   3.76596057e-04,   9.99623418e-01,
         4.26336379e-31,   8.32312309e-17,   6.92553253e-32], dtype=float32))
('3: ', array([  5.43008234e-25,   7.10889259e-21,   1.43528109e-11,
         3.66452317e-23,   1.00000000e+00,   0.00000000e+00,
         1.13207871e-27,   2.21009300e-13,   1.83898169e-28]))
('0: ', array([  1.00000000e+00,   1.00000000e+00,   9.99997914e-01,
         1.00000000e+00,   1.00000000e+00,   2.14576721e-06,
         1.00000000e+00,   1.00000000e+00,   1.00000000e+00], dtype=float32))
('1: ', [1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0])
('2: ', array([  2.04494760e-28,   2.67718092e-24,   5.40521200e-15,
         1.38004498e-26,   3.76596057e-04,   9.99623418e-01,
         4.26336379e-31,   8.32312309e-17,   6.92553253e-32], dtype=float32))
('3: ', array([  5.43008234e-25,   7.10889259e-21,   1.43528109e-11,
    

        0.27567905,  1.        ,  1.        ,  1.        ], dtype=float32))
('1: ', [1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0])
('2: ', array([  6.36181304e-29,   8.27570014e-25,   5.35983857e-15,
         3.92681130e-27,   1.85217257e-04,   9.99814808e-01,
         1.22349895e-31,   1.91986089e-17,   1.77378733e-32], dtype=float32))
('3: ', array([  3.43478418e-25,   4.46810425e-21,   2.89381165e-11,
         2.12011092e-23,   1.00000000e+00,   0.00000000e+00,
         6.60575031e-28,   1.03654536e-13,   9.57679297e-29]))
('0: ', array([ 1.        ,  1.        ,  0.42458785,  1.        ,  1.        ,
        0.57541209,  1.        ,  1.        ,  1.        ], dtype=float32))
('1: ', [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('2: ', array([  6.36181304e-29,   8.27570014e-25,   5.35983857e-15,
         3.92681130e-27,   1.85217257e-04,   9.99814808e-01,
         1.22349895e-31,   1.91986089e-17,   1.77378733e-32], dtype=float32))
('3: ', array([  6.36181288e-29,   8.27569993e-25

('2: ', array([  2.04494760e-28,   2.67718092e-24,   5.40521200e-15,
         1.38004498e-26,   3.76596057e-04,   9.99623418e-01,
         4.26336379e-31,   8.32312309e-17,   6.92553253e-32], dtype=float32))
('3: ', array([  2.04494757e-28,   2.67718088e-24,   0.00000000e+00,
         1.38004496e-26,   3.76596052e-04,   9.99623404e-01,
         4.26336373e-31,   8.32312297e-17,   6.92553243e-32]))
('0: ', array([  1.00000000e+00,   9.99999344e-01,   7.15255737e-07,
         1.00000000e+00,   1.00000000e+00,   1.00000000e+00,
         1.00000000e+00,   1.00000000e+00,   1.00000000e+00], dtype=float32))
('1: ', [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('2: ', array([  2.04494760e-28,   2.67718092e-24,   5.40521200e-15,
         1.38004498e-26,   3.76596057e-04,   9.99623418e-01,
         4.26336379e-31,   8.32312309e-17,   6.92553253e-32], dtype=float32))
('3: ', array([  2.04494757e-28,   2.67718088e-24,   0.00000000e+00,
         1.38004496e-26,   3.76596052e-04,   9.99623404e-01

('0: ', [])
('1: ', [0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111])
('2: ', array([  6.74828339e-29,   1.16588554e-24,   2.07295390e-15,
         5.48326623e-27,   4.75516048e-04,   9.99524474e-01,
         1.23822284e-31,   7.26451719e-17,   2.05187491e-32], dtype=float32))
('3: ', array([  6.74828345e-29,   1.16588555e-24,   2.07295392e-15,
         5.48326628e-27,   4.75516053e-04,   9.99524484e-01,
         1.23822286e-31,   7.26451726e-17,   2.05187493e-32]))
('0: ', [])
('1: ', [0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111])
('2: ', array([  6.74828339e-29,   1.16588554e-24,   2.07295390e-15,
         5.48326623e-27,   4.75516048e-04,   9.99524474e-01,
         1.23822284e-31,   7.26451719e-17,   2.05187491e-32], dty

('0: ', [])
('1: ', [0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111])
('2: ', array([  2.16922184e-28,   3.77487614e-24,   2.09136882e-15,
         1.92925194e-26,   9.69262852e-04,   9.99030709e-01,
         4.31593189e-31,   3.14890639e-16,   8.00062261e-32], dtype=float32))
('3: ', array([  2.16922190e-28,   3.77487624e-24,   2.09136887e-15,
         1.92925200e-26,   9.69262879e-04,   9.99030737e-01,
         4.31593201e-31,   3.14890648e-16,   8.00062284e-32]))
('0: ', [])
('1: ', [0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111])
('2: ', array([  6.96923750e-28,   1.22160759e-23,   2.10886576e-15,
         6.78436587e-26,   1.97467511e-03,   9.98025298e-01,
         1.50359532e-30,   1.36425339e-15,   3.11793693e-31], dty

('1: ', [0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111])
('2: ', array([  7.37418642e-28,   1.71817392e-23,   8.13905489e-16,
         9.46052842e-26,   5.06955711e-03,   9.94930446e-01,
         1.51830358e-30,   5.14845811e-15,   3.59296737e-31], dtype=float32))
('3: ', array([  7.37418640e-28,   1.71817391e-23,   8.13905486e-16,
         9.46052839e-26,   5.06955710e-03,   9.94930443e-01,
         1.51830358e-30,   5.14845809e-15,   3.59296736e-31]))
('0: ', [])
('1: ', [0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111])
('2: ', array([  7.37418642e-28,   1.71817392e-23,   8.13905489e-16,
         9.46052842e-26,   5.06955711e-03,   9.94930446e-01,
         1.51830358e-30,   5.14845811e-15,   3.59296737e-31], dtype=float32))

('0: ', [])
('1: ', [0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111])
('2: ', array([  7.58636420e-28,   2.42255350e-23,   3.25796162e-16,
         1.28493941e-25,   1.37260165e-02,   9.86273944e-01,
         1.46276523e-30,   1.97742044e-14,   4.17366181e-31], dtype=float32))
('3: ', array([  7.58636450e-28,   2.42255360e-23,   3.25796175e-16,
         1.28493946e-25,   1.37260170e-02,   9.86273983e-01,
         1.46276529e-30,   1.97742052e-14,   4.17366197e-31]))
('0: ', [])
('1: ', [0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111])
('2: ', array([  7.58636420e-28,   2.42255350e-23,   3.25796162e-16,
         1.28493941e-25,   1.37260165e-02,   9.86273944e-01,
         1.46276523e-30,   1.97742044e-14,   4.17366181e-31], dty

('0: ', [])
('1: ', [0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111])
('2: ', array([  1.79250402e-27,   8.01203928e-23,   1.41709777e-16,
         4.26625838e-25,   7.48886168e-02,   9.25111353e-01,
         3.33483610e-30,   2.40956614e-13,   1.29530318e-30], dtype=float32))
('3: ', array([  1.79250407e-27,   8.01203952e-23,   1.41709781e-16,
         4.26625851e-25,   7.48886190e-02,   9.25111381e-01,
         3.33483620e-30,   2.40956622e-13,   1.29530322e-30]))
('0: ', [])
('1: ', [0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111])
('2: ', array([  4.43179399e-27,   2.03105707e-22,   1.48474513e-16,
         1.15807286e-24,   1.44619584e-01,   8.55380416e-01,
         8.71257379e-30,   8.09207573e-13,   3.83975437e-30], dty

('0: ', [])
('1: ', [0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111])
('2: ', array([  1.01399033e-26,   4.75907440e-22,   1.41627151e-16,
         2.84756482e-24,   2.63726860e-01,   7.36273110e-01,
         2.03968199e-29,   2.55257577e-12,   1.01714605e-29], dtype=float32))
('3: ', array([  1.01399036e-26,   4.75907454e-22,   1.41627156e-16,
         2.84756490e-24,   2.63726868e-01,   7.36273132e-01,
         2.03968205e-29,   2.55257585e-12,   1.01714608e-29]))
('0: ', [])
('1: ', [0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111])
('2: ', array([  1.01399033e-26,   4.75907440e-22,   1.41627151e-16,
         2.84756482e-24,   2.63726860e-01,   7.36273110e-01,
         2.03968199e-29,   2.55257577e-12,   1.01714605e-29], dty

('1: ', [0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111])
('2: ', array([  5.30943603e-27,   3.44677664e-22,   4.15744218e-17,
         1.81834608e-24,   4.87557441e-01,   5.12442529e-01,
         8.92955312e-30,   5.16366203e-12,   5.20462932e-30], dtype=float32))
('3: ', array([  5.30943618e-27,   3.44677675e-22,   4.15744231e-17,
         1.81834613e-24,   4.87557456e-01,   5.12442544e-01,
         8.92955339e-30,   5.16366219e-12,   5.20462948e-30]))
('0: ', [])
('1: ', [0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111])
('2: ', array([  8.70232370e-27,   5.85182523e-22,   2.94153598e-17,
         3.03352201e-24,   6.62562072e-01,   3.37437928e-01,
         1.45957173e-29,   1.20442684e-11,   9.50616677e-30], dtype=float32))

('1: ', [0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111])
('2: ', array([  8.70232370e-27,   5.85182523e-22,   2.94153598e-17,
         3.03352201e-24,   6.62562072e-01,   3.37437928e-01,
         1.45957173e-29,   1.20442684e-11,   9.50616677e-30], dtype=float32))
('3: ', array([  8.70232370e-27,   5.85182523e-22,   2.94153598e-17,
         3.03352201e-24,   6.62562072e-01,   3.37437928e-01,
         1.45957173e-29,   1.20442684e-11,   9.50616677e-30]))
('0: ', [])
('1: ', [0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111])
('2: ', array([  5.30943603e-27,   3.44677664e-22,   4.15744218e-17,
         1.81834608e-24,   4.87557441e-01,   5.12442529e-01,
         8.92955312e-30,   5.16366203e-12,   5.20462932e-30], dtype=float32))

('1: ', [0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111])
('2: ', array([  8.70232370e-27,   5.85182523e-22,   2.94153598e-17,
         3.03352201e-24,   6.62562072e-01,   3.37437928e-01,
         1.45957173e-29,   1.20442684e-11,   9.50616677e-30], dtype=float32))
('3: ', array([  8.70232370e-27,   5.85182523e-22,   2.94153598e-17,
         3.03352201e-24,   6.62562072e-01,   3.37437928e-01,
         1.45957173e-29,   1.20442684e-11,   9.50616677e-30]))
('0: ', [])
('1: ', [0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111])
('2: ', array([  8.70232370e-27,   5.85182523e-22,   2.94153598e-17,
         3.03352201e-24,   6.62562072e-01,   3.37437928e-01,
         1.45957173e-29,   1.20442684e-11,   9.50616677e-30], dtype=float32))

('1: ', [0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111])
('2: ', array([  8.70232370e-27,   5.85182523e-22,   2.94153598e-17,
         3.03352201e-24,   6.62562072e-01,   3.37437928e-01,
         1.45957173e-29,   1.20442684e-11,   9.50616677e-30], dtype=float32))
('3: ', array([  8.70232370e-27,   5.85182523e-22,   2.94153598e-17,
         3.03352201e-24,   6.62562072e-01,   3.37437928e-01,
         1.45957173e-29,   1.20442684e-11,   9.50616677e-30]))
('0: ', [])
('1: ', [0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111])
('2: ', array([  8.70232370e-27,   5.85182523e-22,   2.94153598e-17,
         3.03352201e-24,   6.62562072e-01,   3.37437928e-01,
         1.45957173e-29,   1.20442684e-11,   9.50616677e-30], dtype=float32))

('0: ', [])
('1: ', [0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111])
('2: ', array([  1.26818373e-26,   8.86344221e-22,   1.88172770e-17,
         4.44184423e-24,   7.98225522e-01,   2.01774508e-01,
         2.11484712e-29,   2.49144160e-11,   1.53037297e-29], dtype=float32))
('3: ', array([  1.26818369e-26,   8.86344195e-22,   1.88172764e-17,
         4.44184409e-24,   7.98225498e-01,   2.01774502e-01,
         2.11484706e-29,   2.49144153e-11,   1.53037293e-29]))
('0: ', [])
('1: ', [0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111])
('2: ', array([  1.26818373e-26,   8.86344221e-22,   1.88172770e-17,
         4.44184423e-24,   7.98225522e-01,   2.01774508e-01,
         2.11484712e-29,   2.49144160e-11,   1.53037297e-29], dty

In [None]:
print("Percent of successful episodes: " + str(100.0 * sum(rList)/num_episodes) + "%")

# plt.bar(range(len(rList)), rList, color = "Blue", width = 0.00001)
# plt.show()