# Lunar Lander

## Imports for this lesson

In [31]:
import gymnasium as gym
import numpy as np
import matplotlib.pyplot as plt
from time import sleep
from deap import base, creator, tools, algorithms
import random

## Description of the problem

The goal is to land the shuttle safely in a safe zone.

<img src="https://www.gymlibrary.dev/_images/lunar_lander.gif" width="400" align="center">



### Observation space

At each timestep the lander has access to its current state, consisting of
- the coordinates of the lander in $x$ and $y$;
- the coordinates of its linear velocity in $x$ and $y$;
- its angle;
- its angular velocity;
-  two booleans that represent whether each leg is in contact with the ground or not (touch sensors on each leg).

### PID controller

A Proportional Integrative Derivative (PID) Controller continously calculate an error value $e(t)$ as the difference between a desired setpoint (SP) and a measured process variable (PV). The controller attempts to minimize the error over time by adjustment of a control variable  $a(t)$ of the form
$$a(t) := K_p e(t) + K_i \int_0^t e(\tau) d\tau + K_d \frac{\textrm{d}e(t)}{\textrm{d}t}.$$

In our setting, PV are the altitude and the angle of the lander. The SP is composed by:
- an altitude setpoint equal to $|x_{p}|$, where $x_p$ is the $x$-coordinate of the actual position of the lander. This coincides with the horizontal distance to the target.
- an angle setpoint equal to $\frac{\pi}{4}(x_p + v_x)$, where $v_x$ is the $x$-coordinate of the velocity of the lander.

In our case, the PID controller is actually a PD controller, since the I component is not needed<span style="color:red"> WHY? </span> 

In [42]:
def pid(state, params):
    """ Calculates settings based on PID control. 

    Args:
        state (list): 8-element list of the current space, containing respectively the
        x and y coordinates of the lander, x and y coordinates of its velocity, its
        angle, its angular velocity and finally two booleans representing the touch
        sensors on each leg.
        params (list): 4-element list of the current PID parameters, respectively the
        proportional and derivative altitude and the proportional and derivative
        angle.
    
    Returns:
        (np.array) the current action.
    """
    #PID PARAMETERS
    # proportional altitude
    kp_y = params[0] 
    # derivative altitude
    kd_y = params[1]
    # proportional angle
    kp_angle = params[2]  
    # derivative angle
    kd_angle = params[3] 

    # CURRENT SPACE PARAMETERS
    # x-coordinate of the lander
    x_p = state[0]
    # y-coordinate of the lander
    y_p = state[1]
    # x-coordinate of the velocity
    v_x = state[2]
    # y-coordinate of the velocity
    v_y = state[3]
    # angle of the lander
    angle = state[4]
    # angular velocity
    omega = state[5]
    # touch sensors
    touch_sensor_1 = state[6]
    touch_sensor_2 = state[7]
    
    # calculate setpoints (target values)
    y_target = np.abs(x_p)
    angle_target = (np.pi/4)*(x_p + v_x)

    # calculate error values
    y_error = (y_target - y_p)
    angle_error = (angle_target - angle)
    
    # compute a(t) = [a_y(t), a_angle(t)]
    a_y = kp_y*y_error + kd_y*v_y
    a_angle = kp_angle*angle_error + kd_angle*omega
        
    # gym wants them as a np.array in which each entry
    # belongs to the interval [-1,1]
    a = np.array([a_y, a_angle])
    a = np.clip(a, -1, +1)
    
    # If the legs are on the ground we made it, kill engines
    if(touch_sensor_1 or touch_sensor_2):
        a[:] = 0   
    return a

## Optimize PID parameters via Genetic Algorithms

In [41]:
def run(params, env, verbose=False):
    """ Runs an episode given PID parameters. 

    Args:
        params (list): PID parameters.
        env (gym.Env): gym environment.
        verbose (bool): True if we want to see the rendering of the run.

    Returns:
        (float, list) the total reward and the list containing all the states
        investigated.
    """
    # initialize state list
    state_list = []
    terminated = False
    truncated = False
    # reset environment
    state, _ = env.reset()
    if verbose:
        env.render()
        sleep(.005)
    state_list.append(state)
    total_reward = 0

    # main loop of the run
    while not (terminated or truncated):
        a = pid(state,params)
        # run one step of the environment’s dynamics.
        new_state,reward, terminated, truncated, _ = env.step(a)
        # update total_reward
        total_reward += reward
        if verbose:
            env.render()
            sleep(.005)
        state_list.append(new_state)
        # update current state
        state = new_state
    return total_reward, state_list

In [38]:
def evalParams(individual, env):
    # test params over 5 trial avg
    scores = []
    for trial in range(5):
        score, _ = run(individual,env)
        scores.append(score)
    mean_score = np.mean(scores)
    return mean_score,

def init_ga():
    creator.create("FitnessMax", base.Fitness, weights=(1.0,))
    creator.create("Individual", list, fitness=creator.FitnessMax)

    toolbox = base.Toolbox()
    toolbox.register("attr_float", random.uniform, a=-10., b=10.)
    toolbox.register("individual", tools.initRepeat, creator.Individual,
                     toolbox.attr_float, n=4)
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)
    toolbox.register("evaluate", evalParams, env=env)
    toolbox.register("mate", tools.cxOnePoint)
    toolbox.register("mutate", tools.mutGaussian, mu=0., sigma=1., indpb=0.25)
    toolbox.register("select", tools.selBest)
    return toolbox


def optimize(NIND, NGEN, toolbox):
    """Optimize PID gains using GA. """

    pop = toolbox.population(n=NIND)
    hof = tools.HallOfFame(1)
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("std", np.std)
    stats.register("min", np.min)
    stats.register("max", np.max)

    pop, log = algorithms.eaMuPlusLambda(pop, toolbox, mu=NIND, lambda_=NIND, cxpb=0.5, mutpb=0.2,
                                         ngen=NGEN, stats=stats, halloffame=hof, verbose=True)

    params = tools.selBest(pop, 1)[0]
    return params

In [39]:
# Setup environment
env = gym.make("LunarLanderContinuous-v2")
state, info = env.reset(seed=42)

#RUN GA
NIND = 20
NGEN = 20
toolbox = init_ga()
params = optimize(NIND, NGEN, toolbox)
print(f"The best parameters are {params}")
print(f"The best fitness is {params.fitness.values[0]}")
params = np.array(params)
env.close()

gen	nevals	avg     	std    	min     	max   
0  	20    	-2702.98	6254.67	-21536.7	238.62
1  	15    	-496.936	298.965	-752.183	238.62
2  	13    	-296.374	277.033	-614.164	242.18
3  	14    	-40.0981	221.709	-386.959	271.859
4  	13    	170.735 	130.099	-115.589	271.859
5  	13    	247.753 	11.3996	237.225 	271.859
6  	18    	256.828 	10.0785	242.18  	276.676
7  	14    	266.966 	6.37797	254.968 	282.864
8  	11    	272.126 	5.2742 	266.489 	282.864
9  	16    	274.973 	4.40957	269.601 	282.864
10 	10    	277.652 	3.92035	271.859 	284.533
11 	16    	279.012 	2.874  	275.126 	284.533
12 	9     	280.79  	2.41907	277.459 	284.533
13 	18    	282.208 	2.00814	278.923 	285.881
14 	10    	283.521 	1.5138 	282.654 	289.008
15 	13    	283.724 	1.55792	282.864 	289.008
16 	14    	283.828 	1.5444 	282.864 	289.008
17 	13    	285.057 	3.72931	282.864 	299.214
18 	16    	286.279 	4.45793	282.864 	299.214
19 	13    	288.422 	5.24036	283.901 	299.214
20 	14    	288.956 	5.02507	284.533 	299.214
The best param

In [40]:
#Final runs with tuned PID
env = gym.make("LunarLanderContinuous-v2", render_mode = "human")
env._max_episode_steps = 300
state, info = env.reset(seed=42)
for _ in range(5):
    total, state_list = run(params, env, verbose=True)
env.close()

In [None]:
def graph(states):
        states_reshaped = np.array(states).reshape(len(states),-1)
        plt.plot(states_reshaped[:,0],label='x')
        plt.plot(states_reshaped[:,1],label='y')
        plt.plot(states_reshaped[:,2],label='vx')
        plt.plot(states_reshaped[:,3],label='vy')
        plt.plot(states_reshaped[:,4],label='theta')
        plt.plot(states_reshaped[:,5],label='vtheta')
        plt.legend()
        plt.grid()
        plt.ylim(-1.1,1.1)
        plt.title('PID Control')
        plt.ylabel('Value')
        plt.xlabel('Steps')
        plt.show()

graph(state_list)
    
