<a href="https://colab.research.google.com/github/HemantTiwariGitHub/RLPlayground/blob/main/CartpoleTest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#remove " > /dev/null 2>&1" to see what is going on under the hood
!pip install gym pyvirtualdisplay > /dev/null 2>&1
!apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1

In [None]:
!apt-get update > /dev/null 2>&1
!apt-get install cmake > /dev/null 2>&1
!pip install --upgrade setuptools 2>&1
!pip install ez_setup > /dev/null 2>&1
!pip install gym[atari] > /dev/null 2>&1

# Imports and Helper functions


In [None]:
import gym
from gym import logger as gymlogger
from gym.wrappers import Monitor
gymlogger.set_level(40) #error only
import tensorflow as tf
import numpy as np
import random
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import math
import glob
import io
import base64
from IPython.display import HTML

from IPython import display as ipythondisplay

In [None]:
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1400, 900))
display.start()

In [None]:
"""
Utility functions to enable video recording of gym environment and displaying it
To enable video, just do "env = wrap_env(env)""
"""

def show_video():
  mp4list = glob.glob('video/*.mp4')
  if len(mp4list) > 0:
    mp4 = mp4list[0]
    video = io.open(mp4, 'r+b').read()
    encoded = base64.b64encode(video)
    ipythondisplay.display(HTML(data='''<video alt="test" autoplay 
                loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii'))))
  else: 
    print("Could not find video")
    

def wrap_env(env):
  env = Monitor(env, './video', force=True)
  return env

In [None]:
env = wrap_env(gym.make("CartPole-v1"))

In [None]:
#check out the pacman action space!
print(env.action_space)
print(env.observation_space)

In [None]:
state = env.reset()
print(state)
currentState = state
i = 0
TotalReward = 0
while True:
  
    env.render()
    
    #your agent goes here
    
    action = env.action_space.sample() 
         
    nextState, reward, done, info = env.step(action) 
    TotalReward+=reward;
  
    print(i,": " , currentState , " :  " , action , " :  " , reward , " : " , nextState , " : " , done, " : " , TotalReward)

    currentState = nextState
  
    i=i+1
        
    if done: 
      break;
            
env.close()
show_video()

**Starting Cartpole**

**bold text**

In [None]:
import numpy as np
import random
import math
import matplotlib.pyplot as plt
import os

In [None]:
# for plumbing code
import collections
from collections import deque
import pickle

# the environment
import gym

# the Agent
from Agent import CartpoleAgent22


In [None]:
# breakout environment
env = gym.make('CartPole-v0')

# get size of state and action from environment
state_size = env.observation_space.shape[0] # equal to 4 in case of cartpole 
action_size = env.action_space.n            # equal to 2 in case of cartpole

# agent needs to be initialised outside the loop since the DQN
# network will be initialised along with the agent
agent = CartpoleAgent22(action_size=action_size, state_size=state_size)


# to store rewards in each episode
rewards_per_episode, episodes = [], []

In [None]:
# make dir to store model weights
if not os.path.exists("saved_model_weights"):
    os.mkdir("saved_model_weights")

# n_episodes
n_episodes = 1000


In [None]:
#### simulation starts ####
for episode in range(n_episodes):

    done = False
    score = 0

    # reset at the start of each episode
    state = env.reset()
    i=0
    while not done:
        #env.render()

        # get action for the current state and take a step in the environment
        action = agent.get_action(state)
        next_state, reward, done, info = env.step(action)

        # save the sample <s, a, r, s', done> to the replay memory
        agent.append_sample(state, action, reward, next_state, done)

        

        # train after each step
        agent.train_model()

        # add reward to the total score of this episode
        score += reward

 
        #print(i,": " , state , " :  " , action , " :  " , reward , " : " , next_state , " : " , done, " : " , score)

        state = next_state
  
        i=i+1



    # store total reward obtained in this episode
    rewards_per_episode.append(score)
    episodes.append(episode)

    # epsilon decay
    if agent.epsilon > agent.epsilon_min:
        agent.epsilon *= agent.epsilon_decay

    # every episode:
    print("episode {0}, reward {1}, memory_length {2}, epsilon {3}".format(episode,
                                                                         score,
                                                                         len(agent.memory),
                                                                         agent.epsilon))
    # every few episodes:
    if episode % 10 == 0:
        # store q-values of some prespecified state-action pairs
        # q_dict = agent.store_q_values()

        # save model weights
        agent.save_model_weights(name="model_weights.h5")

#### simulation complete ####

In [None]:
# save stuff as pickle
def save_pickle(obj, name):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

# make directory
if not os.path.exists("saved_pickle_files"):
    os.mkdir("saved_pickle_files")

In [None]:
# save rewards_per_episode
save_pickle(rewards_per_episode, "saved_pickle_files/rewards_per_episode")

In [None]:

# plot results
with open('saved_pickle_files/rewards_per_episode.pkl', 'rb') as f:
    rewards_per_episode = pickle.load(f)

plt.plot(list(range(len(rewards_per_episode))), rewards_per_episode)
plt.xlabel("episode number")
plt.ylabel("reward per episode")

# save plots in saved_plots/ directory
plt.savefig('rewards.png')

print("Average reward of last 100 episodes is {0}".format(np.mean(rewards_per_episode[-100:]))) 
