# Library 

In [132]:
import gym

# Implementation

In [29]:
env= gym.make('FrozenLake-v1',render_mode = 'human',is_slippery = False)
cur_state = env.reset()
cur_state

(0, {'prob': 1})

## Number of States & Actions

In [32]:
print('num_states :' , env.observation_space)

num_states : Discrete(16)


In [34]:
# To represent it as an Integer number
print('num_states :' , env.observation_space.n)

num_states : 16


In [38]:
print('num_actions :' , env.action_space)

num_actions : Discrete(4)


### Actions
1. (0) Left 
2. (1) Down
3. (2) Right
4. (3) Up

In [40]:
print('num_actions :' , env.action_space.n)

num_actions : 4


## Actions probabilities

### Deterministic Environment 

In [42]:
print('P : ',env.P[0][1]) # env.P[state][action] -> returns the new state, it's probability and if the episode is terminated 

P :  [(1.0, 4, 0.0, False)]


### Stochastic Environment 

In [134]:
env= gym.make('FrozenLake-v1',render_mode = 'human',is_slippery = True) # Added noise 

In [90]:
print('P : ',env.P[0][1]) # env.P[state][action] -> returns the new state and it's probability 

P :  [(0.3333333333333333, 0, 0.0, False), (0.3333333333333333, 4, 0.0, False), (0.3333333333333333, 1, 0.0, False)]


### State variable Content 
- (next_state , reward , terminated , truncated , transition_probability from old to new state)
- done = terminated or truncated
- terminated -> reaches the final stage
- truncated -> the allowed time is up

In [88]:
env.reset()
state = env.step(1) # Down with probability of 1/3 
env.render()
print(state)

(0, 0.0, False, False, {'prob': 0.3333333333333333})


## Random action space

In [104]:
random_action = env.action_space.sample()
print(random_action)

1


## Random episode generation

In [140]:
num_episodes = 10
num_timesteps = 20 # maximum allowed steps per episode
for e in range(num_episodes):
    state = env.reset()
    env.render()
    for t in range(num_timesteps):
        random_action = env.action_space.sample()
        new_state , reward , terminated , truncated , info = env.step(random_action)
        print('Time stamp {} :'.format(t+1) , 'Reward {} :'.format(reward),  'Episode {}'.format(e+1))
        env.render()
        done = terminated or truncated
        if done:
            break  

Time stamp 1 : Reward 0.0 : Episode 1
Time stamp 2 : Reward 0.0 : Episode 1
Time stamp 3 : Reward 0.0 : Episode 1
Time stamp 1 : Reward 0.0 : Episode 2
Time stamp 2 : Reward 0.0 : Episode 2
Time stamp 1 : Reward 0.0 : Episode 3
Time stamp 2 : Reward 0.0 : Episode 3
Time stamp 3 : Reward 0.0 : Episode 3
Time stamp 4 : Reward 0.0 : Episode 3
Time stamp 5 : Reward 0.0 : Episode 3
Time stamp 6 : Reward 0.0 : Episode 3
Time stamp 7 : Reward 0.0 : Episode 3
Time stamp 8 : Reward 0.0 : Episode 3
Time stamp 9 : Reward 0.0 : Episode 3
Time stamp 10 : Reward 0.0 : Episode 3
Time stamp 1 : Reward 0.0 : Episode 4
Time stamp 2 : Reward 0.0 : Episode 4
Time stamp 3 : Reward 0.0 : Episode 4
Time stamp 4 : Reward 0.0 : Episode 4
Time stamp 5 : Reward 0.0 : Episode 4
Time stamp 6 : Reward 0.0 : Episode 4
Time stamp 7 : Reward 0.0 : Episode 4
Time stamp 8 : Reward 0.0 : Episode 4
Time stamp 9 : Reward 0.0 : Episode 4
Time stamp 10 : Reward 0.0 : Episode 4
Time stamp 11 : Reward 0.0 : Episode 4
Time stam

In [142]:
env.close()