In [None]:
!pip install -r https://raw.githubusercontent.com/malkiAbdelhamid/Advanced-Deep-Learning-2023-2024-esisba/master/lab1_QLearning/requirements_lab1.txt

#### 👉Before you solve a Reinforcement Learning problem you need to define what are
- the environment
- the states
- the actions
- the rewards

#### 👉We are using the `FrozenLake-v1` environment from OpenAI's gym: https://www.gymlibrary.dev/environments/toy_text/frozen_lake/

#### 👉`FrozenLake-v1` is an easy environment because the action space is small, and the state space is large but finite.

#### 👉Environments with a finite number of actions and states are called tabular

### Import the Gymnasium Library

In [None]:
import gymnasium as gym
from gymnasium.envs.toy_text.frozen_lake import  generate_random_map


### Create a FrozenLake-v1 environment with gym.make()
- default map=4x4
- In order to display the environment's current state you need to add the parameter==> render_mode="rgb_array"

In [None]:

env=gym.make("FrozenLake-v1", is_slippery=False, render_mode="rgb_array")
#env=gym.make("FrozenLake-v1",desc=generate_random_map(size=8), map_name="8x8", is_slippery=False, render_mode="rgb_array")

### We reset the environment to its initial state with state = env.reset()

In [None]:
state=env.reset()

#### State space
-  The state is a value representing the agent’s current position as current_row * nrows + current_col (where both the row and col start at 0).
- For example, the goal position in the 8x8 map can be calculated as follows: 7 * 8 + 7 = 63. The number of possible observations is dependent on the size of the map. For example, the 8x8 map has 64 possible states.

In [None]:
print("State Space {}".format(env.observation_space.n))
print(env.observation_space.sample())

#### Action space :
##### The agent takes a 1-element vector for actions. The action space is (dir), where dir decides direction to move in which can be:

- 0: LEFT

- 1: DOWN

- 2: RIGHT

- 3: UP

In [None]:
print("Action Space {}".format(env.action_space.n))
print(env.action_space.sample())

#### env.render(): display the environment's current state

In [None]:
import matplotlib.pyplot as plt

image=env.render()
plt.imshow(image)


#### env.step(n_action)--> next state, reward, terminated,truncated, info
#### Updates an environment with actions returning:
 - the next agent state,
 - the reward for taking that actions,
 - if the environment has terminated or truncated due to the latest action
 - and information from the environment about the step

In [None]:
#apply the right action,
next_state, reward, isTerminated, isTruncated, _=env.step(2)
print(next_state, reward, isTerminated, isTruncated)
plt.imshow(env.render())

#### First, reset the environment, then define the trajectory which is a set of necessary actions required  to achieve the goal
#### Finally, record the different steps through a video

In [None]:
import imageio
import numpy as np

env.reset()
images=[]
images.append(env.render())

#right, right, down, down, down, right

env.step(2)
images.append(env.render())

env.step(2)
images.append(env.render())

env.step(1)
images.append(env.render())

env.step(1)
images.append(env.render())

env.step(1)
images.append(env.render())

env.step(2)
images.append(env.render())

imageio.mimsave('./render.mp4', [np.array(img) for i, img in enumerate(images)], fps=1)

In [None]:
from base64 import b64encode
from IPython.display import HTML

# Show video
compressed_path = 'replay.mp4'
mp4 = open(compressed_path,'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()

HTML("""<video width=400 controls>      <source src="%s" type="video/mp4"></video>""" % data_url)

## vectorized environment gym
#### We create a vectorized environment (a method for stacking multiple independent environments into a single environment) of 16 environments, this way, we’ll have more diverse experiences during the training.
https://gymnasium.farama.org/api/vector/

In [None]:
envs=gym.vector.make('FrozenLake-v1',  num_envs=4)
env2=gym.make('FrozenLake-v1')

In [None]:
envs.reset(seed=42)


In [None]:
env.reset()