In [1]:
import numpy as np
from random import randint
import random

###  creation of environnement

In [2]:
class Maze(object):
    
    # creation our maze
    def __init__(self):
        super(Maze, self).__init__()

        self.grid = [
            [0, 0, 1,0],
            [0, -10,0,0],
            [0, 0, 0,-1],
            [0, -10, -10,0],
            [0, 0, 0, 0]
           
        ]
        # Starting position
        self.y = 4
        self.x = 0

        self.actions = [
            [-1, 0], # Up
            [1, 0], #Down
            [0, -1], # Left
            [0, 1] # Right
        ]

    # return your agent to the initial position 
    def reset(self):
        """
            Reset world
        """
        self.y = 4
        self.x = 0
        return (self.y*4+self.x+1)

    # this function made to move your agent to the next cell
    def step(self, action):
        """
            Action: 0, 1, 2, 3
        """
        self.y = max(0, min(self.y + self.actions[action][0],4))
        self.x = max(0, min(self.x + self.actions[action][1],3))

        return (self.y*4+self.x+1) , self.grid[self.y][self.x]

    # here we show our grid on the screen 
    def show(self):
        print("***************************")
        y = 0
        for line in self.grid:
            x = 0
            for pt in line:
                print("%s\t" % (pt if y != self.y or x != self.x else "X"), end="")
                x += 1
            y += 1
            print("")
    # here we insur that our agent has reach to the final state.... the final state has reward of 1 
    def is_finished(self):
        return self.grid[self.y][self.x] == 1 
    
    # in case of a wall.... the wall has punichment of -100
    def is_wall(self):
        return  self.grid[self.y][self.x] == -100 


### choose an action randomly or take the argmax action of Q-table


In [3]:
# choose an action randomly or take the argmax action of Q-table
def take_action(st, Q, eps):
    # Take an action
    if random.uniform(0, 1) < eps:
        action = randint(0, 3)
    else: 
        # Or greedy action
        action = np.argmax(Q[st])
    return action


### Main Code 

In [4]:
env = Maze()
st = env.reset()
env.show()
# initialisation of Q-table or look-up matrix
# rows represent number of state
# columns represent number of actions Up Down left Right
Q = [
    [0, 0, 0, 0],
    [0, 0, 0, 0],
    [0, 0, 0, 0],
    [0, 0, 0, 0],
    [0, 0, 0, 0],
    [0, 0, 0, 0],
    [0, 0, 0, 0],
    [0, 0, 0, 0],
    [0, 0, 0, 0],
    [0, 0, 0, 0],
    [0, 0, 0, 0],
    [0, 0, 0, 0],
    [0, 0, 0, 0],
    [0, 0, 0, 0],
    [0, 0, 0, 0],
    [0, 0, 0, 0],
    [0, 0, 0, 0],
    [0, 0, 0, 0],
    [0, 0, 0, 0],
    [0, 0, 0, 0],
    [0, 0, 0, 0]
]

# here we take 200 cycles  of learning 
for _ in range(200):
    
    # Reset our agent 
    st = env.reset()
    while not env.is_finished():
        
        # take action 
        at = take_action(st, Q, 0.4)
        
        # save the current position
        x=env.x
        y=env.y
        
        stp1, r = env.step(at)

        # Update Q function
        atp1 = take_action(stp1, Q, 0.0)
        
        # in case of wall don't calculate the 
        if env.is_wall():
            env.x=x
            env.y=y
            continue
        # formulation of relation of Q-leanring
        Q[st][at] = Q[st][at] + 0.1*(r + 0.9*Q[stp1][atp1] - Q[st][at])
        
        # show our grid
        env.show()
        st = stp1

env.show()


***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
X	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
X	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
X	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
X	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	X	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	X	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	X	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	X	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	X	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	X	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	X	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	X	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	X	0	
0	

0	X	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	X	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	X	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	X	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	X	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
X	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
X	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
X	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
X	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
X	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	X	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	X	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	X	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
************************

0	-10	-10	0	
0	0	0	0	
***************************
X	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	X	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	X	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
X	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	X	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
X	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
X	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
X	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
X	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
X	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
X	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
X	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
********************

0	X	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	X	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
X	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
X	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
X	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
X	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
X	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
X	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
X	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	X	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	X	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
X	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	

0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
X	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
X	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
X	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	X	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	X	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
X	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
X	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	X	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	X	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
X	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
X	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
X	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
X	0	1

***************************
0	0	1	0	
0	-10	0	0	
X	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
X	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
X	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
X	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
X	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
X	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
X	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
X	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	X	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	X	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	X	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
X	0	0	0	
***************************
0	0	1	0	
0	-

0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
X	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
X	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	X	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
X	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	X	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
X	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	X	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	X	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
X	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
X	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	X	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
X	-10	-10	0	
0	0	0	0	
********************

0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	X	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
X	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
X	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
X	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	X	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
X	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
X	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	X	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	X	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	X	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
X	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	X	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
************

0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
X	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
X	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
X	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
X	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
X	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	X	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	X	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
X	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
X	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
X	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
X	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
X	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
******************

0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
X	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	X	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
X	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
X	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
X	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	X	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	X	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
X	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	X	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
X	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
X	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
X	0	0	-1	
0	-10	-10	0	
0	0	0	0	
**********

0	0	1	0	
0	-10	0	0	
X	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
X	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
X	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
X	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
X	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	X	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	X	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
X	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
X	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
X	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
X	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	X	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	X	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	

0	0	1	0	
0	-10	0	0	
0	0	0	-1	
X	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
X	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
X	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
X	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
X	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
X	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	X	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
X	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
X	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
X	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
X	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
X	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
X	-10	0	0	
0	0	0	-1	
0	-10	-10	0	

***************************
0	0	1	0	
X	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
X	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
X	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	X	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	X	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
0	0	0	-1	
X	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	0	0	
X	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
X	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
X	0	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	X	1	0	
0	-10	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	X	0	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10	X	0	
0	0	0	-1	
0	-10	-10	0	
0	0	0	0	
***************************
0	0	1	0	
0	-10

In [5]:
a=env.reset()
# here
i=1
while not env.is_finished():
    print("Point ",i,"(",env.y,",",env.x,")")
    b=np.argmax(Q[a])
    a,_=env.step(b)
    i+=1
    


Point  1 ( 4 , 0 )
Point  2 ( 3 , 0 )
Point  3 ( 2 , 0 )
Point  4 ( 1 , 0 )
Point  5 ( 0 , 0 )
Point  6 ( 0 , 1 )


In [6]:
for s in range(1, 21):
    print("state : ",s,"==Q-table==>", Q[s])

state :  1 ==Q-table==> [0.7912607918444735, 0.68159273637706, 0.7441614685112159, 0.8999999755504265]
state :  2 ==Q-table==> [0.7626022485043156, -9.201688992571576, 0.7301553506491567, 0.9999999957696215]
state :  3 ==Q-table==> [0, 0, 0, 0]
state :  4 ==Q-table==> [0.0, 0.0, 0.271, 0]
state :  5 ==Q-table==> [0.809999903369986, 0.5891086144306733, 0.7021859553440031, -8.845075125213512]
state :  6 ==Q-table==> [0.33367733880155853, 0.1281946606362968, 0.7109723842932824, 0.33218448254439215]
state :  7 ==Q-table==> [0.7712320754503901, 0.013546431900000003, -2.5900395305204764, 0.0029970000000000005]
state :  8 ==Q-table==> [0.025200000000000004, 0, 0.06712320754503902, 0]
state :  9 ==Q-table==> [0.728999626927878, 0.5407260643570168, 0.6141394692854159, 0.38772380432884157]
state :  10 ==Q-table==> [-1.0, -3.965732625170215, 0.585552928120541, 0.05504836292991992]
state :  11 ==Q-table==> [0.2933274085012081, 0, 0.039467162682248494, -0.1]
state :  12 ==Q-table==> [0.0, 0.0, 0, -