In [1]:
import sys
import gymnasium as gym
import matplotlib.pyplot as plt
import numpy as np
import random

In [2]:
env = gym.make("MountainCar-v0",render_mode = "human")
print(env.action_space)
print(env.observation_space)
print(env.action_space.sample())

Discrete(3)
Box([-1.2  -0.07], [0.6  0.07], (2,), float32)
2


In [None]:
env = gym.make("MountainCar-v0",render_mode = "human")
observation, info,__ = env.reset()

for i in range(100):
   action = env.action_space.sample()  # agent policy that uses the observation and info
   observation, reward, terminated, truncated, info,_ = env.step(action)
env.close()

In [None]:
env = gym.make("MountainCar-v0", render_mode = "human")
observation, info = env.reset()
terminated = False
truncated = False

while not terminated and not truncated:
    action = env.action_space.sample()  # agent policy that uses the observation and info
    observation, reward, terminated, truncated, info = env.step(action)
    print(f"{observation} -> {reward}")
   
env.close()


print(env.observation_space.low)
print(env.observation_space.high)


def discretize(x):
    return tuple(((x - np.array([-1.2, -0.07])) / np.array([0.1, 0.01])).astype(int))
env = gym.make("MountainCar-v0", render_mode = "human")
observation, info = env.reset()
terminated = False
truncated = False

while not terminated and not truncated:
    action = env.action_space.sample()  # agent policy that uses the observation and info
    observation, reward, terminated, truncated, info = env.step(action)
    print(discretize(observation))

env.close()


Q = {}
actions = (0,1,2)

def qvalues(state):
    return [Q.get((state,a),0) for a in actions]
# hyperparameters
alpha = 0.3
gamma = 0.9
epsilon = 0.90

In [None]:
def probs(v,eps=1e-4):
    v = v-v.min()+eps
    v = v/v.sum()
    return v

env = gym.make("MountainCar-v0", render_mode = "human")
Qmax = 0
cum_rewards = []
rewards = []
for epoch in range(1000):
    observation, info = env.reset()
    cum_reward=0
    terminated = False
    truncated = False
    # == do the simulation ==
    while not terminated and not truncated:
        s = discretize(observation)
        if random.random()<epsilon:
        # exploitation - chose the action according to Q-Table probabilities
            v = probs(np.array(qvalues(s)))
            a = random.choices(actions,weights=v)[0]
        else:
        # exploration - randomly chose the action
            a = np.random.randint(env.action_space.n)

        observation, reward, terminated, truncated, info = env.step(a)
        cum_reward+=reward
        ns = discretize(observation)
        Q[(s,a)] = (1 - alpha) * Q.get((s,a),0) + alpha * (reward + gamma * max(qvalues(ns)))
    
                
    cum_rewards.append(cum_reward)
    rewards.append(cum_reward)
    # == Periodically print results and calculate average reward ==
    if epoch%50==0:
        print(f"{epoch}: {np.average(cum_rewards)}, alpha={alpha}, epsilon={epsilon}")
        if np.average(cum_rewards) > Qmax:
            Qmax = np.average(cum_rewards)
            Qbest = Q
        cum_rewards=[]
plt.plot(rewards)
def running_average(x,window):
    return np.convolve(x,np.ones(window)/window,mode='valid')

plt.plot(running_average(rewards,100))

In [None]:
env = gym.make("MountainCar-v0", render_mode = "human")
observation, info = env.reset()
terminated = False
truncated = False
while not terminated and not truncated:
    s = discretize(observation)
    v = probs(np.array(qvalues(s)))
    a = random.choices(actions,weights=v)[0]
    observation, reward, terminated, truncated, info = env.step(a)
    
env.close()



from PIL import Image
env = gym.make("MountainCar-v0", render_mode = "rgb_array")
observation, info = env.reset()
i=0
ims = []
terminated = False
truncated = False
while not terminated and not truncated:
   s = discretize(observation)
   img= env.render()
   ims.append(Image.fromarray(img))
   v = probs(np.array([Q.get((s,a),0) for a in actions]))
   a = random.choices(actions,weights=v)[0]
   observation, reward, terminated, truncated, info = env.step(a)
   i+=1
   
env.close()
ims[0].save('images/mountain-car.gif',save_all=True,append_images=ims[1::2],loop=0,duration=5)
print(i)