In [1]:
import gym
import numpy as np

from tqdm import trange

env = gym.make('Pendulum-v0')

The pendulum challenge is to keep a 
frictionless pendulum standing up

# The Pendulum

## States

| Num | Observation |
|:-:|:-:|
| 0 | cos(theta) |
| 1 | sin(theta) |
| 2 | theta dot |


<style>
td {
  font-size: 100px
}
    
    
</style>


In [2]:
print(f"The shape of the state space: {env.observation_space.shape}")
print(f"Highest value: {env.observation_space.high}")
print(f"Lowest value: {env.observation_space.low}")

env.observation_space

print(f"A sample state: {env.observation_space.sample()}")

The shape of the state space: (3,)
Highest value: [1. 1. 8.]
Lowest value: [-1. -1. -8.]
A sample state: [ 0.82655776 -0.11062134 -6.148579  ]


## Actions

| Num | Action |
|:-:|:-:|
| 0 | Joint effort |

In [3]:
print(f"The shape of the action space: {env.action_space.shape}")
print(f"Highest value: {env.action_space.high}")
print(f"Lowest value: {env.action_space.low}")

env.action_space.seed(473)

print(f"A sample action: {env.action_space.sample()}")

The shape of the action space: (1,)
Highest value: [2.]
Lowest value: [-2.]
A sample action: [-0.8608973]


In [4]:
env.action_space.sample()

array([1.8785995], dtype=float32)

## Reward

Function: $-(theta^2 + 0.1*thetaDot^2 + 0.001*action^2)$

# The Agent

In [5]:
import sys
from collections import deque

import matplotlib.pyplot as plt
%matplotlib inline

In [6]:
class Agent:
    def __init__(self):
        pass
    
    def act(self, state):
        return [np.random.normal(0., .8)]


In [7]:
## Utils

def print_iteaction(iteraction, score, best_score):
    "function responsible to print some infos each iteration"
    print(f"{i:4d} - Best Score: {best_score:5.2f} - {score:5.2f}",
          end="\r",
          flush=True)
    

In [8]:
num_episodes = 1000

agent = Agent()

best_score = -np.inf

for i in range(num_episodes):  
    score = 0
    state = env.reset()
    
    for _ in range(300):
        action = agent.act(state)
        state, reward, done, _ = env.step(action)
        
        score += reward
        if done:
            if score > best_score:
                best_score = score
            print_iteaction(i, score, best_score)
            break

 999 - Best Score: -595.89 - -1166.047