In [2]:
import gym

import math
%matplotlib inline
import matplotlib.pyplot as plt


In [3]:
# Create the Cart-Pole game environment
env = gym.make('CartPole-v1')


In [4]:
gravity = 9.8
masscart = 1.0
masspole = 0.1
total_mass = (masspole + masscart)
length = 0.5 # actually half the pole's length
polemass_length = (masspole * length)
force_mag = 10.0
tau = 0.02 
kinematics_integrator = 'euler'
theta_threshold_radians = 12 * 2 * math.pi / 360

In [5]:
def nextStep(state, action):    # calculate next state status
    
    x, x_dot, theta, theta_dot = state
    force = force_mag if action == 1 else -force_mag
    costheta = math.cos(theta)
    sintheta = math.sin(theta)
    temp = (force + polemass_length * theta_dot * theta_dot * sintheta) / total_mass
    thetaacc = (gravity * sintheta - costheta* temp) / (length * (4.0/3.0 - masspole * costheta * costheta / total_mass))
    xacc  = temp - polemass_length * thetaacc * costheta / total_mass
    if kinematics_integrator == 'euler':
        x  = x + tau * x_dot
        x_dot = x_dot + tau * xacc
        theta = theta + tau * theta_dot
        theta_dot = theta_dot + tau * thetaacc
    else: # semi-implicit euler
        x_dot = x_dot + tau * xacc
        x  = x + tau * x_dot
        theta_dot = theta_dot + tau * thetaacc
        theta = theta + tau * theta_dot
    state = (x, x_dot, theta, theta_dot)
    
    return state


In [6]:
def safeSpeedCheck(state):      # check if the current speed can be slowed down before out of border
    nextState = state
    if nextState[1] <= 0:
        while nextState[1] <= 0:
            nextState = nextStep(nextState, 1)
    else:
        while nextState[1] >= 0:
            nextState = nextStep(nextState, 0)
        
    if abs(nextState[0]) < 2.4:
        return True
    else:
        return False 

In [7]:
def safeAngleCheck(state):      # check if the pole angle can be corrected before out of range
    nextState = state
    if nextState[2] <= 0:
        while nextState[2] <= 0:
            nextState = nextStep(nextState, 0)
            if nextState[2] <= - theta_threshold_radians:
                return False
    else:    
        while nextState[2] >= 0:
            nextState = nextStep(nextState, 1)
            if nextState[2] >= theta_threshold_radians:
                return False
        
    return True

In [10]:
count = 0
for i in range(500):
    next_state = env.reset()
    env.render()
    score = 0
    done = False
    while not done:
        next_state_l = nextStep(next_state, 0)      # calculate state if go left
        next_state_r = nextStep(next_state, 1)      # calculate state if go right
        if not safeSpeedCheck(next_state_l):        # check if go left get in to unstoppable speed
            next_state, reward, done, _ = env.step(1)
            score += 1
            if score >= 200:
                done = True
            continue
        
        if not safeSpeedCheck(next_state_r):        # check if go right get in to unstoppable speed
            next_state, reward, done, _ = env.step(0)
            score += 1
            if score >= 200:
                done = True
            continue
        
        if not safeAngleCheck(next_state_l):        # check if go left, can the pole still be corrected
            next_state, reward, done, _ = env.step(1)
            score += 1
            if score >= 200:
                done = True
            continue
        
        if not safeAngleCheck(next_state_r):        # check if go right, can the pole still be corrected
            next_state, reward, done, _ = env.step(0)
            score += 1
            if score >= 200:
                done = True
            continue
            
        # compare two state, pick the one with less pole angle
        if abs(next_state_l[2]) < abs(next_state_r[2]): 
            next_state, reward, done, _ = env.step(0)
        elif abs(next_state_l[2]) > abs(next_state_r[2]):
            next_state, reward, done, _ = env.step(1)
        # if pole angles are equal, pick the one with less pole velocity
        else:
            if abs(next_state_l[3]) < abs(next_state_r[3]):
                next_state, reward, done, _ = env.step(0)
            else:
                next_state, reward, done, _ = env.step(1)
                
        score += 1
        if score >= 200:
            done = True
    
    # if score < 200:
    #     print(next_state)
    #     print(score)
    if score >= 200:
        count += 1

env.close()
print("Percentage of reward over 200 in 500 games", count/500)

Percentage of reward over 200 in 500 games 0.766
