In [1]:
import gym
import numpy as np
import matplotlib.pyplot as plt 

env = gym.make("MountainCar-v0")
#env.reset()

LEARNING_RATE = 0.1
DISCOUNT = 0.95
EPISODES = 2000

SHOW_EVERY = 500


#print(env.observation_space.high)
#print(env.observation_space.low)
#print(env.action_space.n)

DISCRETE_OS_SIZE = [20] * len(env.observation_space.high)
discrete_os_win_size = (env.observation_space.high - env.observation_space.low) / DISCRETE_OS_SIZE

#print(discrete_os_win_size)

epsilon = 0.5
START_EPSILON_DECAYING = 1
END_EPISODE_DECAYING = EPISODES // 2

epsilon_decay_value = epsilon / (END_EPISODE_DECAYING - START_EPSILON_DECAYING)


q_table = np.random.uniform(low=-2, high=0, size=(DISCRETE_OS_SIZE + [env.action_space.n]))

#print(q_table.shape)

ep_rewards = []
aggr_ep_rewards = {'ep': [], 'avg': [], 'min':[], 'max':[]}



def get_discrete_state(state):
    discrete_state = (state - env.observation_space.low) / discrete_os_win_size
    return tuple(discrete_state.astype(np.int))


for episode in range(EPISODES):
    episode_reward = 0
    if episode % SHOW_EVERY == 0:
        print(episode)
        render = True
    else:
        render = False
    
    done = False
    discrete_state = get_discrete_state(env.reset())

    #print(discrete_state)

    #print(np.argmax(q_table[discrete_state]))
    

    while not done:
        if np.random.random() > epsilon:
            action = np.argmax(q_table[discrete_state])
        else:
            action = np.random.randint(0, env.action_space.n)
        action = np.argmax(q_table[discrete_state])
        new_state, reward, done, _ = env.step(action)
        episode_reward += reward
        new_discrete_state = get_discrete_state(new_state)
        
        #print(reward, new_state)
        if render:
            env.render()
            
        if not done:
            max_future_q = np.max(q_table[new_discrete_state])
            current_q = q_table[discrete_state + (action, )]        
            new_q = (1 - LEARNING_RATE) * current_q + LEARNING_RATE * (reward + DISCOUNT * max_future_q)
            q_table[discrete_state + (action, )] = new_q                                                          
        elif new_state[0] >= env.goal_position:  
            print(f"We made in on episode {episode}")
            q_table[discrete_state + (action, )] = 0

        discrete_state = new_discrete_state
        
    if END_EPISODE_DECAYING >= episode  >= START_EPSILON_DECAYING:
        epsilon -= epsilon_decay_value
        
    ep_rewards.append(episode_reward)
    
    if not episode % SHOW_EVERY:
        np.save(f"qtables/{episode}-qtable.npy", q_table)
        average_reward = sum(ep_rewards[-SHOW_EVERY:])/len(ep_rewards[-SHOW_EVERY:])
        aggr_ep_rewards['ep'].append(episode)
        aggr_ep_rewards['avg'].append(average_reward)
        aggr_ep_rewards['min'].append(min(ep_rewards[-SHOW_EVERY:]))
        aggr_ep_rewards['max'].append(max(ep_rewards[-SHOW_EVERY:]))
        
        print(f"Episdoe: {episode} avg: {average_reward} min: {min(ep_rewards[-SHOW_EVERY:])} max; {max(ep_rewards[-SHOW_EVERY:])}")


env.close()

plt.plot(aggr_ep_rewards['ep'], aggr_ep_rewards['avg'], label="avg")
plt.plot(aggr_ep_rewards['ep'], aggr_ep_rewards['min'], label="min")
plt.plot(aggr_ep_rewards['ep'], aggr_ep_rewards['max'], label="max")
plt.legend(loc=4)
plt.show()

0
Episdoe: 0 avg: -200.0 min: -200.0 max; -200.0
We made in on episode 290
We made in on episode 457
We made in on episode 464
We made in on episode 488
We made in on episode 490
We made in on episode 493
We made in on episode 494
500
Episdoe: 500 avg: -199.75 min: -200.0 max; -165.0
We made in on episode 555
We made in on episode 558
We made in on episode 560
We made in on episode 561
We made in on episode 562
We made in on episode 563
We made in on episode 564
We made in on episode 565
We made in on episode 569
We made in on episode 571
We made in on episode 582
We made in on episode 611
We made in on episode 612
We made in on episode 627
We made in on episode 642
We made in on episode 723
We made in on episode 725
We made in on episode 728
We made in on episode 738
We made in on episode 807
We made in on episode 808
We made in on episode 812
We made in on episode 828
We made in on episode 838
We made in on episode 849
We made in on episode 852
We made in on episode 856
We made in on

We made in on episode 1730
We made in on episode 1732
We made in on episode 1733
We made in on episode 1734
We made in on episode 1735
We made in on episode 1736
We made in on episode 1738
We made in on episode 1740
We made in on episode 1741
We made in on episode 1742
We made in on episode 1743
We made in on episode 1745
We made in on episode 1746
We made in on episode 1747
We made in on episode 1749
We made in on episode 1751
We made in on episode 1752
We made in on episode 1753
We made in on episode 1756
We made in on episode 1757
We made in on episode 1758
We made in on episode 1759
We made in on episode 1760
We made in on episode 1763
We made in on episode 1765
We made in on episode 1769
We made in on episode 1770
We made in on episode 1773
We made in on episode 1774
We made in on episode 1776
We made in on episode 1777
We made in on episode 1781
We made in on episode 1784
We made in on episode 1794
We made in on episode 1798
We made in on episode 1799
We made in on episode 1800
W

<Figure size 640x480 with 1 Axes>