# A Practitioner’s Guide to Reinforcement Learning
As described [here](https://medium.com/towards-data-science/a-practitioners-guide-to-reinforcement-learning-1f1e249f8fa5).

In [None]:
from itertools import product
import random

from gymnasium.spaces.tuple import Tuple


def space_to_tuples(space):
    if isinstance(space, Tuple):
        for encoding in product(*[range(factor.n) for factor in space]):
            yield encoding
    else:
        for encoding in range(space.n):
            yield encoding

def get_best_action(q_table, state): # for a given state, find the best action from the Q table
    return max(((action, value) for action, value in q_table[state].items()), key=lambda x: x[1])[0]

In [None]:
import gymnasium as gym
from PIL import Image

env = gym.make("FrozenLake-v1", render_mode="rgb_array", is_slippery=False)
# env = gym.make("Taxi-v3", render_mode="rgb_array")
# env = gym.make('Blackjack-v1', natural=True, sab=False, render_mode="rgb_array")

In [None]:
alpha = 0.1
gamma = 0.9
n_episodes = 100_000
max_steps = 100
epsilon = 0.2

q_table = {i: {j: 0 for j in space_to_tuples(env.action_space)} for i in space_to_tuples(env.observation_space)}

In [None]:
for _ in range(n_episodes):
    # new episode (game), so we need a reset
    state, _ = env.reset()
    
    # play the game for max_steps
    for step in range(max_steps):
        # pick an action
        if random.random() < epsilon:
            # sometimes it is random to encourage exploration, otherwise we cannot find good policies ...
            action = env.action_space.sample()
        else:
            # ... and sometimes we use the best action according to our Q-table
            action = get_best_action(q_table, state)
        
        # we take that action and then get some data from the game
        next_state, reward, terminated, _, _ = env.step(action)
        
        # update the Q-table according to the magic formula
        q_table[state][action] = (1-alpha)*q_table[state][action] + alpha*(reward + gamma*max(q_table[next_state].values()))

        # check if the game is finished, can be if you win or lose
        if terminated:
            # a new episode starts after this
            break
        else:
            # if the episode continues, update the current state (we took an action!)
            state = next_state

In [None]:
def play_episode(env, q_table):
    state, _ = env.reset()

    while True:
        action = get_best_action(q_table, state)
        next_state, reward, terminated, _, _ = env.step(action)
        
        if terminated:
            break
        else:
            state = next_state
   
    return reward

rewards = []
for episode in range(10000):
    rewards.append(play_episode(env, q_table))

In [None]:
from collections import Counter

print(Counter(rewards))