<h1 style="text-align:center;"><span style="color:blue;">Reinforcement Learning with OpenAI Gym</span></h1><br />

<center><img src ="area-51.jpg" width="500" /></center>

- **A** - Action
- **R** - Reward
- **E** - Environment
- **A** - Agent

<img src="RL_illustration.png" />

- [Cart Pole Control Environment in OpenAI Gym - Introduction to OpenAI Gym](https://aleksandarhaber.com/cart-pole-control-environment-in-openai-gym-gymnasium-introduction-to-openai-gym/)


In [15]:
# Import the libraries {%pip install (VS-CODE) as needed}
# Remove below hashtags if you need to install
#%pip install gym
#%pip install numpy
#%pip install time
#%pip install pygame
import gym
import numpy as np
import time
import pygame
from pygame import gfxdraw

## Create an Environment for our Cart Pole

[gymnasium.Env](https://gymnasium.farama.org/api/env/)

In [16]:
env = gym.make('CartPole-v1', render_mode='human')

(state,_) = env.reset()

In [17]:
# If you want to close the game comment the below line out
#env.close()

### The States - what is happening in each frame

<img src="cart-states.png" />

#### Four States

1. x Position of the cart
2. &#7819; Velocity of the cart
3. &#952; Pole angle
4. &#952; Angular Velocity (Theta dot)

In [18]:
# Run the simulation
env.render()

In [19]:
# Push the cart in one direction (0 = left)
env.step(0)


(array([-0.01122449, -0.22503795,  0.04272921,  0.30858096], dtype=float32),
 1.0,
 False,
 False,
 {})

In [20]:
# Observation space limits
# Defines where the box goes
env.observation_space

Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32)

In [21]:
# upper limit
env.observation_space.high

array([4.8000002e+00, 3.4028235e+38, 4.1887903e-01, 3.4028235e+38],
      dtype=float32)

In [22]:
# lower limit
env.observation_space.low

array([-4.8000002e+00, -3.4028235e+38, -4.1887903e-01, -3.4028235e+38],
      dtype=float32)

In [23]:
# action space
env.action_space

Discrete(2)

In [24]:
# All specifications
env.spec

EnvSpec(id='CartPole-v1', entry_point='gym.envs.classic_control.cartpole:CartPoleEnv', reward_threshold=475.0, nondeterministic=False, max_episode_steps=500, order_enforce=True, autoreset=False, disable_env_checker=False, apply_api_compatibility=False, kwargs={'render_mode': 'human'}, namespace=None, name='CartPole', version=1)

In [25]:
# Maximum number of steps per episode
env.spec.max_episode_steps

500

In [26]:
# Reward threshold per episode
env.spec.reward_threshold

475.0

In [27]:
# Simulate the environment
episodeNumber=100
timeSteps=100

In [28]:
# Run the simulation
for episodeIndex in range(episodeNumber):
    initial_state=env.reset()
    print(episodeIndex)
    appendedObservations = []
    for timeIndex in range(timeSteps):
        print(timeIndex)
        random_action = env.action_space.sample()
        observation, reward, terminated, truncated, info = env.step(random_action)
        appendedObservations.append(observation)
        time.sleep(0.0001)
        if (terminated):
            time.sleep(0.001)
            break
env.close()

0
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
1
0
1
2
3
4
5
6
7
8
9
10
11
12


13
14
15
2
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
3
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
4
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
5
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
6
0
1
2
3
4
5
6
7
8
9
10
11
12
13
7
0
1
2
3
4
5
6
7
8
9
10
11
8
0
1
2
3
4
5
6
7
8
9
10
9
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
10
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
11
0
1
2
3
4
5
6
7
8
9
10
11
12
12
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
13
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
14
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
15
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
16
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
0
1
2
3
4
5
6
7
8
9
10
11
12
1

In [29]:
env.close()