In [1]:
import gym
import pygame
from stable_baselines3 import PPO
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

In [9]:
env_name = 'CartPole-v0'
env = gym.make(env_name)

In [10]:
# Here we are initializing the agent to chose random actions and seeing if the environment is working properly
for episode in range(1,11):
    score = 0 
    # re-setting it as a new game
    state = env.reset()
    done = False
    while not done:
        env.render()
        # action space is the set of actions, which is two for our env, 
        # and returns a random number, either 0 or 1, that is, moving left or right
        action = env.action_space.sample()
        # execute the action
        n_state, reward, done, info = env.step(action)
        # this above statement exectues the corresponding action and returns 4 variables
        # if done is true then it breaks the loop
        # reward has the corresponding reward and we add it to the score
        score += reward
    print('Episode: ', episode)
    print('Score: ', score)
env.close()
# OBSERVED THAT IT DOES

Episode:  1
Score:  28.0
Episode:  2
Score:  25.0
Episode:  3
Score:  28.0
Episode:  4
Score:  22.0
Episode:  5
Score:  11.0
Episode:  6
Score:  14.0
Episode:  7
Score:  30.0
Episode:  8
Score:  55.0
Episode:  9
Score:  29.0
Episode:  10
Score:  11.0


In [15]:
env = gym.make(env_name)
# We have to vectorize our environment
# What does this mean?
env = DummyVecEnv([lambda: env])
# Policy is a set of instructions, how to take input and produce output
# CNN Policy, takes image as input
# LSTM Policy, observe the prevous n sttaes and gives action
# MLP Policy, takes in current state 
# Verbose = 1 means displays all the information in the console
model = DQN("MlpPolicy", env, verbose=1)

Using cpu device


In [16]:
model.learn(total_timesteps=100000, log_interval = 4)

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.995    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 4377     |
|    time_elapsed     | 0        |
|    total_timesteps  | 48       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.988    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 3650     |
|    time_elapsed     | 0        |
|    total_timesteps  | 131      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.981    |
| time/               |          |
|    episodes         | 12       |
|    fps              | 3130     |
|    time_elapsed     | 0        |
|    total_timesteps  | 201      |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.78     |
| time/               |          |
|    episodes         | 112      |
|    fps              | 3562     |
|    time_elapsed     | 0        |
|    total_timesteps  | 2311     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.771    |
| time/               |          |
|    episodes         | 116      |
|    fps              | 3541     |
|    time_elapsed     | 0        |
|    total_timesteps  | 2407     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.761    |
| time/               |          |
|    episodes         | 120      |
|    fps              | 3552     |
|    time_elapsed     | 0        |
|    total_timesteps  | 2521     |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.553    |
| time/               |          |
|    episodes         | 220      |
|    fps              | 3526     |
|    time_elapsed     | 1        |
|    total_timesteps  | 4710     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.536    |
| time/               |          |
|    episodes         | 224      |
|    fps              | 3548     |
|    time_elapsed     | 1        |
|    total_timesteps  | 4888     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.524    |
| time/               |          |
|    episodes         | 228      |
|    fps              | 3552     |
|    time_elapsed     | 1        |
|    total_timesteps  | 5014     |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.311    |
| time/               |          |
|    episodes         | 328      |
|    fps              | 3518     |
|    time_elapsed     | 2        |
|    total_timesteps  | 7255     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.303    |
| time/               |          |
|    episodes         | 332      |
|    fps              | 3519     |
|    time_elapsed     | 2        |
|    total_timesteps  | 7335     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.296    |
| time/               |          |
|    episodes         | 336      |
|    fps              | 3522     |
|    time_elapsed     | 2        |
|    total_timesteps  | 7408     |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0746   |
| time/               |          |
|    episodes         | 436      |
|    fps              | 3539     |
|    time_elapsed     | 2        |
|    total_timesteps  | 9741     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0689   |
| time/               |          |
|    episodes         | 440      |
|    fps              | 3539     |
|    time_elapsed     | 2        |
|    total_timesteps  | 9801     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.0624   |
| time/               |          |
|    episodes         | 444      |
|    fps              | 3498     |
|    time_elapsed     | 2        |
|    total_timesteps  | 9869     |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 544      |
|    fps              | 3323     |
|    time_elapsed     | 3        |
|    total_timesteps  | 12101    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 548      |
|    fps              | 3318     |
|    time_elapsed     | 3        |
|    total_timesteps  | 12189    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 552      |
|    fps              | 3279     |
|    time_elapsed     | 3        |
|    total_timesteps  | 12286    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 652      |
|    fps              | 3316     |
|    time_elapsed     | 4        |
|    total_timesteps  | 14467    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 656      |
|    fps              | 3322     |
|    time_elapsed     | 4        |
|    total_timesteps  | 14548    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 660      |
|    fps              | 3325     |
|    time_elapsed     | 4        |
|    total_timesteps  | 14613    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 760      |
|    fps              | 3260     |
|    time_elapsed     | 5        |
|    total_timesteps  | 17018    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 764      |
|    fps              | 3257     |
|    time_elapsed     | 5        |
|    total_timesteps  | 17086    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 768      |
|    fps              | 3255     |
|    time_elapsed     | 5        |
|    total_timesteps  | 17175    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 868      |
|    fps              | 3267     |
|    time_elapsed     | 5        |
|    total_timesteps  | 19335    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 872      |
|    fps              | 3268     |
|    time_elapsed     | 5        |
|    total_timesteps  | 19442    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 876      |
|    fps              | 3266     |
|    time_elapsed     | 5        |
|    total_timesteps  | 19519    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 976      |
|    fps              | 3296     |
|    time_elapsed     | 6        |
|    total_timesteps  | 21723    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 980      |
|    fps              | 3298     |
|    time_elapsed     | 6        |
|    total_timesteps  | 21798    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 984      |
|    fps              | 3297     |
|    time_elapsed     | 6        |
|    total_timesteps  | 21876    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1084     |
|    fps              | 3318     |
|    time_elapsed     | 7        |
|    total_timesteps  | 24316    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1088     |
|    fps              | 3320     |
|    time_elapsed     | 7        |
|    total_timesteps  | 24405    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1092     |
|    fps              | 3320     |
|    time_elapsed     | 7        |
|    total_timesteps  | 24479    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1192     |
|    fps              | 3329     |
|    time_elapsed     | 8        |
|    total_timesteps  | 26774    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1196     |
|    fps              | 3328     |
|    time_elapsed     | 8        |
|    total_timesteps  | 26845    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1200     |
|    fps              | 3328     |
|    time_elapsed     | 8        |
|    total_timesteps  | 26924    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1300     |
|    fps              | 3352     |
|    time_elapsed     | 8        |
|    total_timesteps  | 29103    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1304     |
|    fps              | 3352     |
|    time_elapsed     | 8        |
|    total_timesteps  | 29175    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1308     |
|    fps              | 3354     |
|    time_elapsed     | 8        |
|    total_timesteps  | 29274    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1408     |
|    fps              | 3372     |
|    time_elapsed     | 9        |
|    total_timesteps  | 31398    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1412     |
|    fps              | 3372     |
|    time_elapsed     | 9        |
|    total_timesteps  | 31471    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1416     |
|    fps              | 3374     |
|    time_elapsed     | 9        |
|    total_timesteps  | 31591    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1516     |
|    fps              | 3351     |
|    time_elapsed     | 10       |
|    total_timesteps  | 33685    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1520     |
|    fps              | 3344     |
|    time_elapsed     | 10       |
|    total_timesteps  | 33782    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1524     |
|    fps              | 3344     |
|    time_elapsed     | 10       |
|    total_timesteps  | 33865    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1624     |
|    fps              | 3349     |
|    time_elapsed     | 10       |
|    total_timesteps  | 36177    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1628     |
|    fps              | 3351     |
|    time_elapsed     | 10       |
|    total_timesteps  | 36251    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1632     |
|    fps              | 3350     |
|    time_elapsed     | 10       |
|    total_timesteps  | 36336    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1732     |
|    fps              | 3377     |
|    time_elapsed     | 11       |
|    total_timesteps  | 38577    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1736     |
|    fps              | 3379     |
|    time_elapsed     | 11       |
|    total_timesteps  | 38681    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1740     |
|    fps              | 3380     |
|    time_elapsed     | 11       |
|    total_timesteps  | 38763    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1840     |
|    fps              | 3393     |
|    time_elapsed     | 12       |
|    total_timesteps  | 40999    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1844     |
|    fps              | 3394     |
|    time_elapsed     | 12       |
|    total_timesteps  | 41085    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1848     |
|    fps              | 3393     |
|    time_elapsed     | 12       |
|    total_timesteps  | 41157    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1948     |
|    fps              | 3401     |
|    time_elapsed     | 12       |
|    total_timesteps  | 43164    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1952     |
|    fps              | 3401     |
|    time_elapsed     | 12       |
|    total_timesteps  | 43276    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1956     |
|    fps              | 3402     |
|    time_elapsed     | 12       |
|    total_timesteps  | 43349    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2056     |
|    fps              | 3424     |
|    time_elapsed     | 13       |
|    total_timesteps  | 45698    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2060     |
|    fps              | 3424     |
|    time_elapsed     | 13       |
|    total_timesteps  | 45769    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2064     |
|    fps              | 3424     |
|    time_elapsed     | 13       |
|    total_timesteps  | 45836    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2164     |
|    fps              | 3435     |
|    time_elapsed     | 13       |
|    total_timesteps  | 47979    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2168     |
|    fps              | 3436     |
|    time_elapsed     | 13       |
|    total_timesteps  | 48082    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2172     |
|    fps              | 3436     |
|    time_elapsed     | 14       |
|    total_timesteps  | 48146    |
----------------------------------
----------------------------------
| rollout/          

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2268     |
|    fps              | 3365     |
|    time_elapsed     | 14       |
|    total_timesteps  | 50118    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.504    |
|    n_updates        | 29       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2272     |
|    fps              | 3350     |
|    time_elapsed     | 14       |
|    total_timesteps  | 50156    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.525    |
|    n_updates        | 38       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2344     |
|    fps              | 3079     |
|    time_elapsed     | 16       |
|    total_timesteps  | 50846    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.117    |
|    n_updates        | 211      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2348     |
|    fps              | 3064     |
|    time_elapsed     | 16       |
|    total_timesteps  | 50885    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.11     |
|    n_updates        | 221      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2420     |
|    fps              | 2832     |
|    time_elapsed     | 18       |
|    total_timesteps  | 51590    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0208   |
|    n_updates        | 397      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2424     |
|    fps              | 2824     |
|    time_elapsed     | 18       |
|    total_timesteps  | 51628    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0227   |
|    n_updates        | 406      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2496     |
|    fps              | 2603     |
|    time_elapsed     | 20       |
|    total_timesteps  | 52923    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00321  |
|    n_updates        | 730      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2500     |
|    fps              | 2590     |
|    time_elapsed     | 20       |
|    total_timesteps  | 53006    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00254  |
|    n_updates        | 751      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2572     |
|    fps              | 2454     |
|    time_elapsed     | 21       |
|    total_timesteps  | 53951    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00162  |
|    n_updates        | 987      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2576     |
|    fps              | 2449     |
|    time_elapsed     | 22       |
|    total_timesteps  | 53990    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0013   |
|    n_updates        | 997      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2648     |
|    fps              | 2357     |
|    time_elapsed     | 23       |
|    total_timesteps  | 54685    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00139  |
|    n_updates        | 1171     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2652     |
|    fps              | 2352     |
|    time_elapsed     | 23       |
|    total_timesteps  | 54723    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000873 |
|    n_updates        | 1180     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2724     |
|    fps              | 2263     |
|    time_elapsed     | 24       |
|    total_timesteps  | 55433    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00252  |
|    n_updates        | 1358     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2728     |
|    fps              | 2259     |
|    time_elapsed     | 24       |
|    total_timesteps  | 55473    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00177  |
|    n_updates        | 1368     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2800     |
|    fps              | 2187     |
|    time_elapsed     | 25       |
|    total_timesteps  | 56159    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000294 |
|    n_updates        | 1539     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2804     |
|    fps              | 2182     |
|    time_elapsed     | 25       |
|    total_timesteps  | 56203    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00229  |
|    n_updates        | 1550     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2876     |
|    fps              | 2111     |
|    time_elapsed     | 26       |
|    total_timesteps  | 56892    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00164  |
|    n_updates        | 1722     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2880     |
|    fps              | 2108     |
|    time_elapsed     | 27       |
|    total_timesteps  | 56930    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000463 |
|    n_updates        | 1732     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2952     |
|    fps              | 2047     |
|    time_elapsed     | 28       |
|    total_timesteps  | 57613    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00184  |
|    n_updates        | 1903     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2956     |
|    fps              | 2043     |
|    time_elapsed     | 28       |
|    total_timesteps  | 57655    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00235  |
|    n_updates        | 1913     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3028     |
|    fps              | 1987     |
|    time_elapsed     | 29       |
|    total_timesteps  | 58385    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000351 |
|    n_updates        | 2096     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3032     |
|    fps              | 1984     |
|    time_elapsed     | 29       |
|    total_timesteps  | 58421    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0012   |
|    n_updates        | 2105     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3104     |
|    fps              | 1931     |
|    time_elapsed     | 30       |
|    total_timesteps  | 59103    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000217 |
|    n_updates        | 2275     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3108     |
|    fps              | 1928     |
|    time_elapsed     | 30       |
|    total_timesteps  | 59142    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000235 |
|    n_updates        | 2285     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3180     |
|    fps              | 1883     |
|    time_elapsed     | 31       |
|    total_timesteps  | 59834    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00024  |
|    n_updates        | 2458     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3184     |
|    fps              | 1880     |
|    time_elapsed     | 31       |
|    total_timesteps  | 59873    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000699 |
|    n_updates        | 2468     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3256     |
|    fps              | 1820     |
|    time_elapsed     | 33       |
|    total_timesteps  | 60834    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0202   |
|    n_updates        | 2708     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3260     |
|    fps              | 1817     |
|    time_elapsed     | 33       |
|    total_timesteps  | 60881    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00244  |
|    n_updates        | 2720     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3332     |
|    fps              | 1775     |
|    time_elapsed     | 34       |
|    total_timesteps  | 61577    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0346   |
|    n_updates        | 2894     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3336     |
|    fps              | 1771     |
|    time_elapsed     | 34       |
|    total_timesteps  | 61621    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0131   |
|    n_updates        | 2905     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3408     |
|    fps              | 1717     |
|    time_elapsed     | 36       |
|    total_timesteps  | 62339    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.022    |
|    n_updates        | 3084     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3412     |
|    fps              | 1713     |
|    time_elapsed     | 36       |
|    total_timesteps  | 62378    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00635  |
|    n_updates        | 3094     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3484     |
|    fps              | 1681     |
|    time_elapsed     | 37       |
|    total_timesteps  | 63054    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0401   |
|    n_updates        | 3263     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3488     |
|    fps              | 1679     |
|    time_elapsed     | 37       |
|    total_timesteps  | 63093    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0406   |
|    n_updates        | 3273     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3560     |
|    fps              | 1645     |
|    time_elapsed     | 38       |
|    total_timesteps  | 63797    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0474   |
|    n_updates        | 3449     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3564     |
|    fps              | 1644     |
|    time_elapsed     | 38       |
|    total_timesteps  | 63835    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0135   |
|    n_updates        | 3458     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3636     |
|    fps              | 1621     |
|    time_elapsed     | 39       |
|    total_timesteps  | 64527    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0199   |
|    n_updates        | 3631     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3640     |
|    fps              | 1620     |
|    time_elapsed     | 39       |
|    total_timesteps  | 64566    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0246   |
|    n_updates        | 3641     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3712     |
|    fps              | 1597     |
|    time_elapsed     | 40       |
|    total_timesteps  | 65274    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0116   |
|    n_updates        | 3818     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3716     |
|    fps              | 1596     |
|    time_elapsed     | 40       |
|    total_timesteps  | 65313    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0544   |
|    n_updates        | 3828     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3788     |
|    fps              | 1573     |
|    time_elapsed     | 41       |
|    total_timesteps  | 66037    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0256   |
|    n_updates        | 4009     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3792     |
|    fps              | 1572     |
|    time_elapsed     | 42       |
|    total_timesteps  | 66074    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0123   |
|    n_updates        | 4018     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3864     |
|    fps              | 1554     |
|    time_elapsed     | 42       |
|    total_timesteps  | 66763    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00941  |
|    n_updates        | 4190     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3868     |
|    fps              | 1553     |
|    time_elapsed     | 43       |
|    total_timesteps  | 66801    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0121   |
|    n_updates        | 4200     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3940     |
|    fps              | 1532     |
|    time_elapsed     | 44       |
|    total_timesteps  | 67506    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0371   |
|    n_updates        | 4376     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3944     |
|    fps              | 1531     |
|    time_elapsed     | 44       |
|    total_timesteps  | 67543    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0191   |
|    n_updates        | 4385     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4016     |
|    fps              | 1511     |
|    time_elapsed     | 45       |
|    total_timesteps  | 68263    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0561   |
|    n_updates        | 4565     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4020     |
|    fps              | 1509     |
|    time_elapsed     | 45       |
|    total_timesteps  | 68302    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0236   |
|    n_updates        | 4575     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4092     |
|    fps              | 1485     |
|    time_elapsed     | 46       |
|    total_timesteps  | 69173    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.041    |
|    n_updates        | 4793     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4096     |
|    fps              | 1484     |
|    time_elapsed     | 46       |
|    total_timesteps  | 69210    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0126   |
|    n_updates        | 4802     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4168     |
|    fps              | 1465     |
|    time_elapsed     | 47       |
|    total_timesteps  | 70121    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0867   |
|    n_updates        | 5030     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4172     |
|    fps              | 1464     |
|    time_elapsed     | 47       |
|    total_timesteps  | 70158    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0592   |
|    n_updates        | 5039     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4244     |
|    fps              | 1411     |
|    time_elapsed     | 51       |
|    total_timesteps  | 72562    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0118   |
|    n_updates        | 5640     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4248     |
|    fps              | 1410     |
|    time_elapsed     | 51       |
|    total_timesteps  | 72607    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.102    |
|    n_updates        | 5651     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4320     |
|    fps              | 1379     |
|    time_elapsed     | 53       |
|    total_timesteps  | 73727    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0368   |
|    n_updates        | 5931     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4324     |
|    fps              | 1379     |
|    time_elapsed     | 53       |
|    total_timesteps  | 73761    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0616   |
|    n_updates        | 5940     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4396     |
|    fps              | 1362     |
|    time_elapsed     | 54       |
|    total_timesteps  | 74711    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0499   |
|    n_updates        | 6177     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4400     |
|    fps              | 1361     |
|    time_elapsed     | 54       |
|    total_timesteps  | 74752    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0268   |
|    n_updates        | 6187     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4472     |
|    fps              | 1340     |
|    time_elapsed     | 56       |
|    total_timesteps  | 75912    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00602  |
|    n_updates        | 6477     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4476     |
|    fps              | 1339     |
|    time_elapsed     | 56       |
|    total_timesteps  | 75963    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0495   |
|    n_updates        | 6490     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4548     |
|    fps              | 1323     |
|    time_elapsed     | 58       |
|    total_timesteps  | 76963    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0646   |
|    n_updates        | 6740     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4552     |
|    fps              | 1322     |
|    time_elapsed     | 58       |
|    total_timesteps  | 77013    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0408   |
|    n_updates        | 6753     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4624     |
|    fps              | 1301     |
|    time_elapsed     | 60       |
|    total_timesteps  | 78134    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0708   |
|    n_updates        | 7033     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4628     |
|    fps              | 1299     |
|    time_elapsed     | 60       |
|    total_timesteps  | 78275    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.147    |
|    n_updates        | 7068     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4700     |
|    fps              | 1283     |
|    time_elapsed     | 61       |
|    total_timesteps  | 79194    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.03     |
|    n_updates        | 7298     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4704     |
|    fps              | 1282     |
|    time_elapsed     | 61       |
|    total_timesteps  | 79244    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0126   |
|    n_updates        | 7310     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4776     |
|    fps              | 1262     |
|    time_elapsed     | 63       |
|    total_timesteps  | 80461    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0139   |
|    n_updates        | 7615     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4780     |
|    fps              | 1259     |
|    time_elapsed     | 64       |
|    total_timesteps  | 80649    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0465   |
|    n_updates        | 7662     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4852     |
|    fps              | 1145     |
|    time_elapsed     | 80       |
|    total_timesteps  | 91772    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.176    |
|    n_updates        | 10442    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4856     |
|    fps              | 1140     |
|    time_elapsed     | 81       |
|    total_timesteps  | 92460    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0815   |
|    n_updates        | 10614    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

<stable_baselines3.dqn.dqn.DQN at 0x2dec61e8940>

In [17]:
model.save("dqn_cartpole")

In [18]:
evaluate_policy(model, env, n_eval_episodes = 10, render = True)

(186.7, 18.579827770999387)

In [19]:
env.close()

In [20]:
env_name = 'CartPole-v0'
env = gym.make(env_name)

In [21]:
# Here we are initializing the agent to chose random actions and seeing if the environment is working properly
model = DQN.load("dqn_cartpole")
for episode in range(1,11):
    score = 0 
    # re-setting it as a new game
    obs = env.reset()
    done = False
    while not done:
        env.render()
        # action space is the set of actions, which is two for our env, 
        # and returns a random number, either 0 or 1, that is, moving left or right
        action, _ = model.predict(obs)
        # execute the action
        obs, reward, done, info = env.step(action)
        # this above statement exectues the corresponding action and returns 4 variables
        # if done is true then it breaks the loop
        # reward has the corresponding reward and we add it to the score
        score += reward
    print('Episode: ', episode)
    print('Score: ', score)
env.close()
# OBSERVED THAT IT DOES

Episode:  1
Score:  174.0
Episode:  2
Score:  150.0
Episode:  3
Score:  200.0
Episode:  4
Score:  182.0
Episode:  5
Score:  196.0
Episode:  6
Score:  167.0
Episode:  7
Score:  200.0
Episode:  8
Score:  200.0
Episode:  9
Score:  200.0
Episode:  10
Score:  200.0
