In [1]:
import gymnasium as gym
import matplotlib.pyplot as plt
import numpy as np
import random

In [2]:
max_episodes = 50000
lr = 0.1
discount_factor = 0.9
epsilon = 1.0
interval = 1000

In [3]:
env = gym.make('MountainCar-v0')
env._max_episode_steps = 1000

env_low = env.observation_space.low
env_high = env.observation_space.high

pos_space = np.linspace(env_low[0], env_high[0], 18)
vel_space = np.linspace(env_low[1], env_high[1], 28)

In [4]:
def get_state(state):

    pos, vel = state
    discretized_pos = np.digitize(pos, pos_space)
    discretized_vel = np.digitize(vel, vel_space)

    return (discretized_pos, discretized_vel)

def max_action(q_table, state, actions = [0, 1, 2]):
    return np.argmax(np.array([q_table[state, action] for action in actions]))

def choose_action(state, q_table, epsilon):
    if random.uniform(0, 1) < epsilon:
        return env.action_space.sample()
    else:
        return max_action(q_table, state)


def make_q_table():
    col1, col2 = np.meshgrid(range(len(pos_space) + 1),
                             range(len(vel_space) + 1))
    states = np.column_stack((col1.flatten(), col2.flatten()))
    q_table = {}

    for state in states:
        for action in range(env.action_space.n):
            q_table[tuple(state), action] = 0
    return q_table

In [5]:
def train_q_learning_model(max_episodes, lr, discount_factor, epsilon, interval, max_episode_steps = 1000):
    env = gym.make('MountainCar-v0')
    env._max_episode_steps = max_episode_steps
    
    rewards_q = []
    current_reward = 0
    
    q_table = make_q_table()

    for ep in range(max_episodes):
        observation, info = env.reset()
        
        if ep % interval == 0:
            print('Game number:', ep, 'Epsilon: ',
                  epsilon, 'Reward: ', current_reward)
         
        current_reward = 0
        terminated, truncated = False, False
        state = get_state(observation)
        
        while not (terminated or truncated):

            action = choose_action(state, q_table, epsilon)  
            
            next_observation, reward, terminated, truncated, info = env.step(action)
            current_reward += reward
            
            next_state = get_state(next_observation)
            
            next_action = max_action(q_table, next_state)

            q_table[state, action] += lr * (reward + discount_factor * q_table[next_state, next_action] - q_table[state, action])
                
                
            state = next_state
            
        epsilon = epsilon - 2/max_episodes if epsilon > 0.01 else 0.01
        rewards_q.append(current_reward)
    return rewards_q, q_table

In [6]:
import gymnasium as gym
import numpy as np

test_rewards_q = []
# Assuming 'MountainCar-v0' environment


def test_q_learning_model(q_table, episodes=5):
    env = gym.make('MountainCar-v0', render_mode='human')
    
    for ep in range(episodes):
        observation, info = env.reset()

        test_reward = 0
        terminated, truncated = False, False
        state = get_state(observation)

        while not (terminated or truncated):

            action = choose_action(state, q_table, epsilon = 0)  

            next_observation, reward, terminated, truncated, info = env.step(action)
            test_reward += reward

            next_state = get_state(next_observation)

            state = next_state

        test_rewards_q.append(test_reward)
    return test_rewards_q

In [None]:
# TRAINING COMMAND

train_rewards, q_table = train_q_learning_model(max_episodes, lr, discount_factor, epsilon, interval)

In [None]:
# SAVE Q TABLE FILE

file = open('q_table.txt', 'w')
file.write(str(q_table))

In [7]:
#TESTING COMMAND

file = open('q_table.txt', 'r')
q_table = eval(file.read())
test_rewards = test_q_learning_model(q_table)

In [1]:
env.close()

NameError: name 'env' is not defined

In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.scatter(range(max_episodes), train_rewards,s=0.5)
ax.set_xlabel('Episode')
ax.set_ylabel('Reward')
ax.set_title('Training Rewards')
plt.show()