# Mountain Car Q-learning Agent

Train an agent on the MountainCar-v0 environment using the Q-learning algorithm with a discretized state space.

In [None]:
# Install required packages
!pip install gym numpy matplotlib

In [None]:
import gym
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict

## Environment setup and state discretization

In [None]:
env = gym.make("MountainCar-v0")

# Discretization bins for position and velocity
n_bins = (18, 14)
obs_space_low = env.observation_space.low
obs_space_high = env.observation_space.high
obs_bins = [np.linspace(obs_space_low[i], obs_space_high[i], n_bins[i] - 1) for i in range(len(n_bins))]

def discretize_state(state):
    return tuple(np.digitize(s, bins) for s, bins in zip(state, obs_bins))

## Initialize Q-table and set hyperparameters

In [None]:
q_table = defaultdict(lambda: np.zeros(env.action_space.n))

# Hyperparameters
alpha = 0.1
gamma = 0.99
epsilon = 1.0
epsilon_decay = 0.995
epsilon_min = 0.01
episodes = 10000
rewards = []

## Training loop

In [None]:
for episode in range(episodes):
    state = discretize_state(env.reset()[0])
    done = False
    total_reward = 0

    while not done:
        if np.random.random() < epsilon:
            action = env.action_space.sample()
        else:
            action = np.argmax(q_table[state])

        next_state_raw, reward, done, _, _ = env.step(action)
        next_state = discretize_state(next_state_raw)

        old_value = q_table[state][action]
        next_max = np.max(q_table[next_state])
        q_table[state][action] = old_value + alpha * (reward + gamma * next_max - old_value)

        state = next_state
        total_reward += reward

    if epsilon > epsilon_min:
        epsilon *= epsilon_decay

    rewards.append(total_reward)

    if episode % 1000 == 0:
        avg_reward = np.mean(rewards[-1000:])
        print(f"Episode {episode}, Avg Reward: {avg_reward:.2f}, Epsilon: {epsilon:.3f}")

## Plot the total reward per episode

In [None]:
plt.plot(rewards)
plt.xlabel("Episode")
plt.ylabel("Total Reward")
plt.title("Q-learning on MountainCar-v0")
plt.grid(True)
plt.show()