# 1. Passive learning without random policy

We set a discount factor $\gamma = 0.95$ as we consider that future reward are very important. However, we don't set $\gamma = 0.95$ as we want to penalize state that are far from the solution

In [3]:
from environment import SimpleMazeObstacle
from agent import PassiveAgentTD

row, col = 10, 20
#
env = SimpleMazeObstacle(row, col,seed=2,ratio_obstacles=0.5)
env.reset()
env.render()

0    |#|#|#|#|#|#|#|#|#|#|#|#|#|#|#|#|#|#|#|#|
1    |#|C|C|A|.|.|P|P|.|P|.|.|#|.|.|.|.|.|#|#|
2    |#|.|#|#|#|#|#|.|#|#|#|.|#|#|#|#|#|P|#|#|
3    |#|P|#|.|P|P|.|.|#|.|.|.|#|P|.|.|P|.|#|#|
4    |#|.|#|.|#|#|#|#|#|E|#|#|#|.|#|#|#|.|#|#|
5    |#|.|#|P|P|.|#|.|.|.|#|.|.|P|#|.|#|C|#|#|
6    |#|.|#|#|#|.|#|.|#|#|#|.|#|#|#|.|#|.|#|#|
7    |#|.|.|P|.|P|#|.|.|.|.|P|#|P|.|.|P|P|#|#|
8    |#|#|#|#|#|#|#|#|#|#|#|#|#|#|#|#|#|#|#|#|
9    |#|#|#|#|#|#|#|#|#|#|#|#|#|#|#|#|#|#|#|#|



In [None]:
agent = PassiveAgentTD(env, 100, gamma=0.95, debug=False)
agent.learning()
agent.print_u_table()

In [None]:
import matplotlib.pyplot as plt
utilities = agent.get_utilities()
visited_states = agent.get_visited_state()

fig = plt.figure()
ax = plt.subplot(111)
for i in range(len(visited_states)):
    plt.plot(utilities[i], label="state "+str(visited_states[i]))

# Shrink current axis by 20%
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])

# Put a legend to the right of the current axis
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.xlabel("Number of trials")
plt.show()

In [None]:
states = agent.get_visited_state()
utilities = agent.get_utilities()
print("   ", end="")
for i in range(col):
    print("{:<8}".format(str(i)), end=" ")
print()
for i in range(row):
    print("{:<2}".format(str(i)), end=" ")
    for j in range(col):
        if [i, j] in states:
            print("{:<8}".format(str(round(utilities[states.index([i, j])][-1], 2))), end=" ")
        else:
            print("{:<8}".format(""), end=" ")
    print()

# 2. Passive learning with a random policy

In [None]:
from environment import SimpleMaze
from agent import PassiveAgentTD

row, col = 7, 12

env = SimpleMaze(row, col)
env.render()

In [None]:
agent = PassiveAgentTD(env, 500, seed=0, gamma=0.95, random_policy=True, debug=False)
agent.learning()

In [None]:
a = [1, 2, 3, 8]
print(a.pop(0))

In [None]:
states = agent.get_visited_state()
utilities = agent.get_utilities()
print("   ", end="")
for i in range(col):
    print("{:<8}".format(str(i)), end=" ")
print()
for i in range(row):
    print("{:<2}".format(str(i)), end=" ")
    for j in range(col):
        if [i, j] in states:
            print("{:<8}".format(str(round(utilities[states.index([i, j])][-1], 2))), end=" ")
        else:
            print("{:<8}".format(""), end=" ")
    print()

# 3. Active learning with Q-function

In [None]:
from environment import SimpleMaze
from agent import ActiveAgentQLearning

row, col = 7, 12

env = SimpleMaze(row, col)
env.render()

In [None]:
agent = ActiveAgentQLearning(env, 200, q_min=2000, n_min=20, gamma=0.95, debug=False)
agent.learning()

## utility for each state

In [None]:
utilities = agent.get_utilities()
print("   ", end="")
for i in range(col):
    print("{:<8}".format(str(i)), end=" ")
print()
for i in range(row):
    print("{:<2}".format(str(i)), end=" ")
    for j in range(col):
        print("{:<8}".format(str(round(utilities[i*col + j].item(), 2))), end=" ")
    print()

## State representation

In [None]:
print("   ", end="")
for i in range(col):
    print("{:<8}".format(str(i)), end=" ")
print()
for i in range(row):
    print("{:<2}".format(str(i)), end=" ")
    for j in range(col):
        print("{:<8}".format(str(i*col + j)), end=" ")
    print()

# Maze environment

In [None]:
from environment import Maze

env = Maze(13, 30)
env.render()

In [None]:
test = [1, 0]
a = [1, 1]
a[1] = 0
print(a == test)

In [None]:
ACTIONS: dict = {  # we define the different actions doable
    "north": (-1, 0),
    "east": (0, 1),
    "south": (1, 0),
    "west": (0, -1)
}

In [None]:
list(ACTIONS.keys())

In [None]:
import torch
print(torch.__version__)

In [None]:
from environment import SimpleMaze

row, col = 7, 12

env = SimpleMaze(row, col)
env.render()

In [None]:
random_platter_size_tensor=torch.rand(row,col)
random_platter_size_tensor

In [None]:
random_platter_size_tensor.ndim

In [None]:
print(f"Minimum: {random_platter_size_tensor.min()}")
print(f"Maximum: {random_platter_size_tensor.max()}")
# print(f"Mean: {x.mean()}") # this will error
print(f"Mean: {random_platter_size_tensor.type(torch.float32).mean()}") # won't work without float datatype
print(f"Sum: {random_platter_size_tensor.sum()}")

In [None]:
grille = []
for i in range(3):
    row = []
    for j in range(3):
        row.append(0)
    grille.append(row)

In [None]:
grille

In [None]:
import torch
GRILLE = torch.zeros((3,3), dtype=torch.int)
GRILLE[0][0].item()

In [None]:
t = [1,2]

In [None]:
t[1]

In [None]:
x=10
rand_int = torch.randint(0, x, size=(1,)).item()

print(rand_int)

In [None]:
grid = torch.zeros((3,3), dtype=torch.int)
grid[0][0].item()
character_pos = [0,0]

In [None]:
free_place = []
character_pos = [0,0]
grid[0][0] = 5

for i in torch.arange(3):
    for j in torch.arange(3):
       # print("La valeur de ",i.item(),",",j.item(),":",grid[i][j].item())
       if [i,j] == character_pos :
        continue
       if grid[i][j].item() == 0:
            free_place.append([i.item(),j.item()])

place_obstacle =[]
type_obstacle =[]
random.shuffle(free_place) #ressort la liste mélanger

for i in range(int(len(free_place)/2)):
    place_obstacle.append(free_place.pop(0)) #supprime premier élément et le choisis
    type_obstacle.append(torch.randint(1, 3, size=(1,)).item()) # ici jsp pq on met 3 et pas 2

for i in range(len(place_obstacle)):
    grid[place_obstacle[i][0]][place_obstacle[i][1]] = type_obstacle[i]

print("generation finie")

In [None]:
 grid

In [None]:


free_place=[0,1,2,3,4,5,6,7,8,9]
place_obstacle =[]
type_obstacle =[]
random.shuffle(free_place) #ressort la liste mélanger

for i in range(int(number_free_place/2)):
    place_obstacle.append(free_place.pop(0)) #supprime premier élément et le choisis
    type_obstacle.append(torch.randint(1, 3, size=(1,)).item()) # ici jsp pq on met 3 et pas 2


In [None]:
place_obstacle

In [None]:
type_obstacle

In [None]:
for i in range(10):
    if i % 2 == 0:
        print(i)
        i += 1
    else:
        print("pas pair")
        continue


In [None]:
grid = torch.zeros((3,3), dtype=torch.int)

character_pos = [0,0]

grid[character_pos[0],character_pos[1]] == 1
    return -200
grid[character_pos[0],character_pos[1]] == 2
    return +200