# Notebook having a PD controller which achieves max return

In [2]:
import gym
import torch
import numpy as np

In [3]:
P = 0.1
D = 0.5
I = 0.0  # don't use integral

desired_state = np.array([0, 0, 0, 0])
desired_mask = np.array([0, 0, 1, 0])
integral = 0 # we are not going to use intergral
prev_error = 0.0
derivative = 0.0
 

In [4]:
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))


def PID_controller(state, integral, prev_error, derivative):
    '''
    Only uses the PD controller
    '''
    error = state - desired_state
    integral += error
    derivative = error - prev_error
    prev_error = error
    pid = np.dot(P * error  + D * derivative, desired_mask)

    action = sigmoid(pid)
    action = np.round(action).astype(np.int32)
    return action, integral, prev_error, derivative

In [5]:
# create a loop to run cartpole using the PID controller and print out the performance
returns = [] 

env = gym.make('CartPole-v1')
for i in range(100):
    state = env.reset()[0]
    return_ = 0
    integral = 0 # we are not going to use intergral
    prev_error = 0.0
    derivative = 0.0
    for _ in range(1000):
        # env.render()
        action, integral, prev_error, derivative = PID_controller(state, integral, prev_error, derivative)
        state, reward, done, term,  _ = env.step(action)
        return_ += reward
        if done or term:
            returns.append(return_)
            break
    
env.close()



In [6]:
print(np.mean(returns))

500.0
