In [9]:
import gymnasium as gym
import numpy as np
import random

In [10]:
#setting up in the custom maze enviorment
desc=["SFFF", "FHHH", "FFFF", "HFHF", "FFGF"]

env = gym.make('FrozenLake-v1', desc=desc, map_name="5x4", is_slippery=False, render_mode="human") 
observation, info = env.reset()

In [11]:
# Define parameters
gamma = 0.5  # Discount factor
alpha = 0.9  # Learning rate
epsilon = 0.025  # Epsilon-greedy parameter
num_episodes = 10000

#this will show how many times the elf coompleted the maze and how many times it hit the wall and lake
completion_num = 0
into_wall = 0
into_lake = 0

In [12]:
# Initialize Q-values
num_states = env.observation_space.n
num_actions = env.action_space.n
Q = np.zeros((num_states, num_actions))
# Q-learning algorithm
for episode in range(num_episodes):
    state = env.reset()[0]
    done = False
    
    steps = []
    
    while not done:
        # Epsilon-greedy action selection
        if np.random.rand() < epsilon:
            action = env.action_space.sample()  # Random action
        else:
            ind = 0
            maxIndices = []
            maxNum = -1
            for num in Q[state]:
                if num > maxNum:
                    maxIndices = [ind]
                    maxNum = num
                elif num == maxNum:
                    maxIndices.append(ind)
                ind += 1
                
            maxInd = -1
            if len(maxIndices) >= 1:
                maxInd = maxIndices[random.randint(0, len(maxIndices) - 1)]
                
            if maxInd == -1:
                maxInd = env.action_space.sample()
            action = maxInd  # Greedy action
        
        # Take action and observe next state and reward
        steps.append(action)
        step = env.step(action)
        next_state = step[0]
        reward = step[1]
        done = step[2]
        
        if reward == 0 and done:
            print("Hit lake :C"+ str(episode + 1))
            reward = -1
            into_wall += 1
        elif next_state == state:
            print("ran into wall")
            reward = -1
            into_lake += 1
        
        # Update Q-value
        Q[state, action] += alpha * (reward + gamma * np.max(Q[next_state, :]) - Q[state, action])
        
        if reward == 1:
            print("Hit gift! At Episode: "+str(episode +1))
            print(Q)
            print("Steps: ")
            print(steps)
            completion_num += 1
        
        # Move to next state
        state = next_state


hit lake :C
ran into wall
ran into wall
hit lake :C
ran into wall
hit lake :C
ran into wall
ran into wall
hit lake :C
hit lake :C
hit lake :C
hit lake :C
ran into wall
Hit gift! At Episode: 8
[[-0.9   0.    0.   -0.9 ]
 [ 0.   -0.99  0.   -0.9 ]
 [ 0.   -0.9   0.    0.  ]
 [ 0.    0.    0.    0.  ]
 [-0.9   0.   -0.9   0.  ]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]
 [-0.9  -0.9   0.    0.  ]
 [ 0.    0.    0.   -0.9 ]
 [ 0.   -0.9   0.    0.  ]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]
 [ 0.   -0.9   0.9   0.  ]
 [ 0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.  ]]
Steps: 
[1, 1, 2, 0, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 2, 1, 1, 1, 2]
ran into wall
ran into wall
hit lake :C
hit lake :C
ran into wall
ran into wall
ran into wall
hit lake :C
hit lake :C
hit lake :C
Hit gift! At Episode: 14
[[-0.9    0.     0.    -0.9  ]
 [ 0.    -0.99 

KeyboardInterrupt: 

In [13]:
# Optimal policy
optimal_policy = np.argmax(Q, axis=1)

print("Optimal policy:")
print(optimal_policy)

#prints out how many time it hit the lake, wall, or has completed
print("# of episode completed:" + str(episode + 1))
print("# of times elf completed maze: " + str(completion_num))
print("# of times elf hit the wall: " + str(into_wall))
print("# of times elf went into the lake: " + str(into_lake))

Optimal policy:
[1 0 0 0 1 0 0 0 2 1 0 0 0 1 0 0 2 2 0 0]
# of episode completed:3213
# of times elf completed maze: 3098
# of times elf hit the wall: 114
# of times elf went into the lake: 106


In [14]:
env.close()