In [None]:
# cartpole  using Hill Climbing
# Start with Random Initialization,  

# The problem consists of balancing a pole connected with one joint on top of a moving cart.
# In CartPole's environment, there are four observations at any given state, 
# representing information such as  [Poisition of Cart, Velocity of Cart, angle of Pole, rotation rate of Pole]
# Using these observations, the agent needs to decide on one of two possible actions: move the cart left or right.

In [None]:
import gym
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# EPISODE is one sequence of states, actions and rewards, which ends with terminal state.( start of game till end of game)
# TIMESTEP is  a single state , action and reward

EPISODE=1000
TIMESTEP=20000

 

In [None]:
# 4 random numbers between [-1,1] for cartpole-v0 parameters
# retuns a numpy array of length of 4.

def randomSearch():
    return np.random.uniform(-1,1,4)

In [None]:
def runEpisode(env,parameters,timestep):
    
    observation=env.reset()
    
    done=False
    tr=0
    
    for j in range(timestep):
           # env.render()
            
            action = 0 if np.matmul(parameters,observation) < 0 else 1
            
            observation, reward,done,info = env.step(action)
            
            tr += reward
              
            #print(observation)
            #print(reward)
          
            if done or tr >=200:
                break
                
    return tr
                

In [None]:
# Create an EPISODES Loop ( Note we reset the env on each episode)
# In each EPISODE, we create a TIMESTEP loop to take action & observation, until this episode terminates, or we reach the end of TIMESTEP .
# Observation is a [Poisition of Cart, Velocity of Cart, angle of Pole, rotation rate of Pole]

In [None]:
def main():
    
    env = gym.make('CartPole-v1')
    
    parameters = randomSearch()
    bestReward=0
    totalRewards=[]
    noise=1.0001
    for i in range(EPISODE):
        newParameters = parameters +(randomSearch() * noise)
        reward = runEpisode(env,newParameters,TIMESTEP)
        totalRewards.append(reward)
        
        if reward > bestReward:
            bestReward=reward
            bestParameters=newParameters
            parameters= newParameters
        
        if reward >=200:
                print(" 200 achieved in episode {}".format(i))
                print("Best reward {}".format(bestReward))
                print("Best Parameters {}".format(bestParameters))
                print("-----------------------------------------")
                
    print("")
    print("")
    print("Average reward after {0} consecutive trials: {1}".format(EPISODE,sum(totalRewards)/EPISODE))
    print("Best reward {}".format(bestReward))
    print("Best Parameters {}".format(bestParameters))
    
     
    env.close()
    env.env.close()  # hack solution to environments not closing
    plotChart(totalRewards)

In [None]:
def plotChart(rew):
  
    fig=plt.figure()
    ax = fig.add_subplot(111)
    plt.plot(np.arange(1,len(rew)+1),rew)
    plt.ylabel("Rewards")
    plt.xlabel("Episode #")
    plt.show()

In [None]:
if __name__=="__main__":
    main()