## TASK 2

In [1]:
import numpy as np

# Defining the grid as per the image
u_grid = np.array([
    [7.41, 7.52, 7.65, 10, 7.54],
    [7.31, None, -10, 5.82, -10],
    [7.15, None, -10, 4.31, None],
    [6.98, 6.77, 6.44, 5.87, 6.12],
    [6.90, 6.80, 6.59, 6.51, 6.34]
])

# Defining the rewards and probabilities
steping_reward = -0.1
failure_probability = 0.2
successful_probability = 0.8

# Probability of moving left or right when the movement fails
failure_move_probability = failure_probability / 2

# The coordinates of the green states 
green_states = [(1, 0), (3, 2), (4, 1)]

# function to calculate the expected utility of moving in a particular direction
def calculate_expected_utility(state, direction, grid):

    row, col = state
    # The utility of the current state
    # Calculating the utility of the intended move
    if direction == 'UP':
        new_state = (max(row - 1, 0), col)
    elif direction == 'DOWN':
        new_state = (min(row + 1, grid.shape[0] - 1), col)
    elif direction == 'LEFT':
        new_state = (row, max(col - 1, 0))
    elif direction == 'RIGHT':
        new_state = (row, min(col + 1, grid.shape[1] - 1))
    
    # If the new state is inaccessible, the utility is of the current state
    if grid[new_state] is None:
        intended_utility = grid[state]
    else:
        intended_utility = grid[new_state]
    
    # Calculating the utility of the failed moves (perpendicular to the given direction)
    if direction in ['UP', 'DOWN']:
        fail_states = [(row, max(col - 1, 0)), (row, min(col + 1, grid.shape[1] - 1))]
    else:
        fail_states = [(max(row - 1, 0), col), (min(row + 1, grid.shape[0] - 1), col)]
    
    failed_utilities = []
    for fs in fail_states:
        if grid[fs] is None:
            failed_utilities.append(grid[state])
        else:
            failed_utilities.append(grid[fs])
    
    # Calculating expected utility
    expected_utility = (intended_utility * successful_probability + 
                        sum(failed_utilities) * failure_move_probability +
                        steping_reward)
   
    return expected_utility

# function to find the optimal action for a state
def find_optimal_action(state, grid):
   
    actions = ['UP', 'DOWN', 'LEFT', 'RIGHT']
    expected_utilities = [calculate_expected_utility(state, action, grid) for action in actions]
    optimal_action = actions[np.argmax(expected_utilities)]
    return optimal_action, max(expected_utilities)

# Calculate the optimal action for each green state
optimal_actions = {}
for state in green_states:
    optimal_actions[state] = find_optimal_action(state, u_grid)

optimal_actions


{(1, 0): ('UP', 7.290000000000001),
 (3, 2): ('DOWN', 6.436000000000001),
 (4, 1): ('LEFT', 6.777000000000001)}

   Function to Calculate the expected utility -
    It Calculates the expected utility of moving in a given direction from a state.
    
    :parameter state: tuple (row, column) of the current state
    :parameter direction: the direction to move in ('UP', 'DOWN', 'LEFT', 'RIGHT')
    :parameter grid: the utility grid
    It returns the expected utility of moving in the given direction


   
    Function to find the optimal action for the agent in a given state.
    
    :parameter state: tuple (row, column) of the current state
    :parameter grid: the utility grid
    It returns the optimal action ('UP', 'DOWN', 'LEFT', 'RIGHT')
    
    