# **FrozenLake - generacja epizodu**

In [1]:
import gym
import numpy as np
import random

env = gym.make("FrozenLake-v0", map_name='4x4', is_slippery=False)

Funkcja generująca politykę stochastyczną:

In [2]:
def create_random_sto_policy(env):
    policy = {}
    for key in range(0, env.observation_space.n):
        p = {}
        for action in range(0, env.action_space.n):
            p[action] = 1 / env.action_space.n
        policy[key] = p
    return policy

Testujemy:

In [3]:
policy = create_random_sto_policy(env)
policy

{0: {0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25},
 1: {0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25},
 2: {0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25},
 3: {0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25},
 4: {0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25},
 5: {0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25},
 6: {0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25},
 7: {0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25},
 8: {0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25},
 9: {0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25},
 10: {0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25},
 11: {0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25},
 12: {0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25},
 13: {0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25},
 14: {0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25},
 15: {0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25}}

Funkcja generująca epizod:

In [4]:
def generate_episode(env, policy):
    env.reset()
    episode = []
    finished = False

    while not finished:
        s = env.s  

        timestep = []
        timestep.append(s)
        
        n = random.uniform(0, sum(policy[s].values()))
        
        top_range = 0
        for prob in policy[s].items():
            top_range += prob[1]
            if n < top_range:
                action = prob[0]
                break
        
        #observation, reward, done, info
        _, reward, finished, _ = env.step(action)
        
        timestep.append(action)
        timestep.append(reward)

        episode.append(timestep)
        
    return episode

Testujemy:

In [5]:
print("LEFT = 0 DOWN = 1 RIGHT = 2 UP = 3")
for i in range(100):
  print("Epizod ",i,": ",generate_episode(env, policy))

LEFT = 0 DOWN = 1 RIGHT = 2 UP = 3
Epizod  0 :  [[0, 3, 0.0], [0, 0, 0.0], [0, 1, 0.0], [4, 1, 0.0], [8, 1, 0.0]]
Epizod  1 :  [[0, 1, 0.0], [4, 2, 0.0]]
Epizod  2 :  [[0, 3, 0.0], [0, 3, 0.0], [0, 1, 0.0], [4, 2, 0.0]]
Epizod  3 :  [[0, 3, 0.0], [0, 0, 0.0], [0, 1, 0.0], [4, 3, 0.0], [0, 2, 0.0], [1, 1, 0.0]]
Epizod  4 :  [[0, 0, 0.0], [0, 0, 0.0], [0, 1, 0.0], [4, 0, 0.0], [4, 1, 0.0], [8, 3, 0.0], [4, 0, 0.0], [4, 0, 0.0], [4, 1, 0.0], [8, 1, 0.0]]
Epizod  5 :  [[0, 2, 0.0], [1, 0, 0.0], [0, 0, 0.0], [0, 1, 0.0], [4, 0, 0.0], [4, 1, 0.0], [8, 1, 0.0]]
Epizod  6 :  [[0, 0, 0.0], [0, 0, 0.0], [0, 0, 0.0], [0, 0, 0.0], [0, 0, 0.0], [0, 0, 0.0], [0, 2, 0.0], [1, 1, 0.0]]
Epizod  7 :  [[0, 2, 0.0], [1, 0, 0.0], [0, 1, 0.0], [4, 1, 0.0], [8, 0, 0.0], [8, 2, 0.0], [9, 1, 0.0], [13, 2, 0.0], [14, 0, 0.0], [13, 2, 0.0], [14, 3, 0.0], [10, 3, 0.0], [6, 3, 0.0], [2, 0, 0.0], [1, 0, 0.0], [0, 2, 0.0], [1, 0, 0.0], [0, 0, 0.0], [0, 1, 0.0], [4, 2, 0.0]]
Epizod  8 :  [[0, 3, 0.0], [0, 0, 0.0], [0