# Task 1

In [1]:
# Creamos la clase Environment
class Environment:

    # Constructor
    def __init__(self, board, dimensions):
        self.board = board
        self.dimensions = dimensions
        self.current_state = (0,0)

    # Retorna el estado actual
    def get_current_state(self):
        return self.current_state

    # Retorna las acciones disponibles para cada estado
    def get_possible_actions(self, state):
        possible_actions = []
        x, y = state
        if x > 0 and self.board[x-1][y] != '*':
            possible_actions.append('up')
        if x < self.dimensions[0]-1 and self.board[x+1][y] != '*':
            possible_actions.append('down')
        if y > 0 and self.board[x][y-1] != '*':
            possible_actions.append('left')
        if y < self.dimensions[1]-1 and self.board[x][y+1] != '*':
            possible_actions.append('right')
        return possible_actions

    # Ejecuta la acción y retorna la recompensa y el nuevo estado
    def do_action(self, action):
        x, y = self.current_state
        if action == 'up':
            x = x - 1
        elif action == 'down':
            x = x + 1
        elif action == 'left':
            y = y - 1
        elif action == 'right':
            y = y + 1
        reward = self.board[x][y]
        self.current_state = (x, y)
        return reward, self.current_state

    # Restablece el ambiente a su estado inicial
    def reset(self):
        self.current_state = (0,0)

    # Verifica si el estado actual es terminal
    def is_terminal(self):
        x, y = self.current_state
        return self.board[x][y] == 1

In [11]:
board = [[' ',' ',' ',' '],
        [' ','*',' ',1],
        [' ',' ',' ',-1],
        [' ',' ',' ',' ']]
        
grid = Environment(board,[len(board),len(board[0])])
grid.board

[[' ', ' ', ' ', ' '],
 [' ', '*', ' ', 1],
 [' ', ' ', ' ', -1],
 [' ', ' ', ' ', ' ']]

In [12]:
current_state = grid.get_current_state()
print(current_state)
print(grid.get_possible_actions([0,0]))
print(grid.do_action('right'))
print(grid.is_terminal()'\n')

current_state = grid.get_current_state()
print(current_state)
print(grid.get_possible_actions(current_state))
print(grid.do_action('right'))
print(grid.is_terminal()'\n')

current_state = grid.get_current_state()
print(current_state)
print(grid.get_possible_actions(current_state))
print(grid.do_action('right'))
print(grid.is_terminal()'\n')

current_state = grid.get_current_state()
print(current_state)
print(grid.get_possible_actions(current_state))
print(grid.do_action('down'),'\n')

(0, 0)
['down', 'right']
(' ', (0, 1)) 

False
(0, 1)
['left', 'right']
(' ', (0, 2)) 

(0, 2)
['down', 'left', 'right']
(' ', (0, 3)) 

(0, 3)
['down', 'left']
(1, (1, 3)) 



In [None]:
class GridWorld:
    def __init__(self, dimensions):
        self.dimensions = dimensions
        self.board = [[0 for _ in range(dimensions[1])] for _ in range(dimensions[0])]
        self.board[-1][-1] = 1  # Estado objetivo
        self.board[0][0] = 0  # Estado inicial
        self.current_state = (0, 0)

    def get_current_state(self):
        return self.current_state

    def get_possible_actions(self, state):
        actions = ["up", "down", "left", "right"]
        i, j = state

        if i == 0:
            actions.remove("up")
        if i == self.dimensions[0] - 1:
            actions.remove("down")
        if j == 0:
            actions.remove("left")
        if j == self.dimensions[1] - 1:
            actions.remove("right")

        return actions

    def do_action(self, action):
        i, j = self.current_state
        reward = -1

        if action == "up":
            next_state = (i - 1, j)
        elif action == "down":
            next_state = (i + 1, j)
        elif action == "left":
            next_state = (i, j - 1)
        elif action == "right":
            next_state = (i, j + 1)

        self.current_state = next_state

        i, j = self.current_state
        reward = self.board[i][j]
        if reward == -1:
            self.current_state = (0, 0)

        return reward, self.current_state

    def reset(self):
        self.current_state = (0, 0)

    def is_terminal(self):
        return self.board[self.current_state] == 1

# Task 3

Una empresa de transporte de pasajeros por carretera, que se enfrenta al desafío de maximizar sus beneficios mientras se asegura de que sus clientes lleguen a su destino a tiempo.

- ***Estado:*** 
Número de pasajeros, Estado de la carretera, Posición de la empresa, Estado del tiempo

- ***Acción:*** 
Aumentar el número de vehículos, Reducir el número de vehículos, Ajustar los horarios, Utilizar vías alternativas, Cambiar el precio

- ***Recompensa:*** 
Máximizar los beneficios, Asegurar que los clientes lleguen a tiempo