In [19]:
import numpy as np
import gym
from gym import Env, spaces
import random

class MountainCarEnv(Env):
    def __init__(self):
        super(MountainCarEnv, self).__init__()

        self.action_space = spaces.Discrete(3)
        self.observation_space = spaces.Box(low=np.array([-1.2,-0.07]),
                                high=np.array([0.6, 0.07]))

        self.state = [-0.8, 0] #position and velocity
        self.force = 0.001
        self.gravity = 0.0025
        self.goal_position = 0.5
        self.reward = 0
    
    def step(self, action):
        currPos, currVelocity = self.state

        currVelocity += (action - 1) * self.force - self.gravity * np.cos(3 * currPos) # New Velocity
        nextVelocity = np.clip(currVelocity, -0.07, 0.07)

        # Making sure that they are within the limit
        currPos += currVelocity # New Position
        nextPosition = np.clip(currPos, -1.2, 0.6)
        if nextPosition <= -1.2 and nextVelocity < 0:
            nextVelocity = 0
        
        self.reward -= 1
        self.num_steps += 1
        message = ""

        done = False
        if nextPosition >= self.goal_position:
            done = True
        
        self.state = [nextPosition, nextVelocity]
        reward = -1
        return self.state,reward, done, {}
    
    def render(self, action, reward):
        print(f"Position: {self.state[0]}, Velocity: {self.state[1]}")
        print(f"Action: {action}, Reward: {reward}\n")
    
    def reset(self):
        self.state = [-0.8, 0.0]
        self.num_steps = 0
        return self.state

env = MountainCarEnv()

print("Initial Environment: ")
env.render(action=1, reward=0)

NUM_EPISODES = 10
for episode in range(NUM_EPISODES):
    steps = 0
    print(f"-------------------------EPISODE {episode + 1}----------------------")
    done = False
    state = env.reset()
    while not done:

        action = 2 if env.state[1] > 0 else 0
        nextState, reward, done, _ = env.step(action)
        steps += 1

        env.render(action=action, reward=-1)
        if(steps >= 200):
            print(f"Stopping the episode {episode + 1}")
            break
        
        
    if(done == True):
        print(f"SUCCESSFUL AT {steps} STEPS")
        break
            

Initial Environment: 
Position: -0.8, Velocity: 0
Action: 1, Reward: 0

-------------------------EPISODE 1----------------------
Position: -0.7991565157111469, Velocity: 0.0008434842888531144
Action: 0, Reward: -1

Position: -0.7954738261003227, Velocity: 0.00368268961082423
Action: 2, Reward: -1

Position: -0.7889707509194286, Velocity: 0.006503075180894079
Action: 2, Reward: -1

Position: -0.7796810641729315, Velocity: 0.009289686746497099
Action: 2, Reward: -1

Position: -0.7676541884872057, Velocity: 0.012026875685725781
Action: 2, Reward: -1

Position: -0.752956107064798, Velocity: 0.01469808142240768
Action: 2, Reward: -1

Position: -0.7356704030587966, Velocity: 0.017285704006001414
Action: 2, Reward: -1

Position: -0.7158993111933247, Velocity: 0.019771091865471872
Action: 2, Reward: -1

Position: -0.6937646452773137, Velocity: 0.02213466591601104
Action: 2, Reward: -1

Position: -0.669408450681355, Velocity: 0.02435619459595873
Action: 2, Reward: -1

Position: -0.6429932261698

  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
