# Example of Running the Simulation

In [1]:
# Comment the following line if you want to view the plots in a separate window:
# %matplotlib inline

import numpy as np
import random
import time

from DiscreteRLFlyEnv import DiscreteFlyEnv
from computations import RAD2DEG
from controller import QController

import matplotlib.pyplot as plt
import seaborn as sns
from utils import map_range

Load a Trained Q-Table

In [None]:
q_table = np.load('train_results/q_table_final.npy')

print(f'Q Table Shape: {q_table.shape} = {q_table.shape[0] * q_table.shape[1] * q_table.shape[2]} entries')

Create an environment with the same configuration that the Q-Table was trained on

In [None]:
state_space = q_table.shape[:2]
n_actions = q_table.shape[2]
env = DiscreteFlyEnv(config_path='physics_engine/config_short.json', state_space=state_space, action_space=n_actions)

print("Observation Space", env.observation_space)
print("Sample observation", env.observation_space.sample())  # display a random observation
print("Action Space Shape", env.action_space.n)
print("Action Space Sample", env.action_space.sample())


Creating the Prerequisites

In [None]:
'''Play with various controllers:'''
controller = QController(q_table)

'''If you want to randomize the starting pitch and delta pitch, uncomment:'''
# random_start_pitch = random.uniform(-np.pi, np.pi)
# random_start_delta_pitch = random.uniform(-140, 140)
# env.curr_euler_angles[1] = random_start_pitch
# env.curr_angular_vel[1] = random_start_delta_pitch

print(f'Start pitch: {env.curr_euler_angles[1] * RAD2DEG}')
print(f'Start delta pitch: {env.curr_angular_vel[1]}')

Run Loop

In [None]:
begin = time.time() # Keep track of how long the simulation runs

action = np.zeros(env.action_space.shape) # Initial action is to do nothing
i = 0
is_done = False

while not is_done:
    if i % 20 == 0:
        print(f'Iteration {i}')
    obs, reward, is_done, info = env.step(action)
    action = controller.respond(obs)[0]

    i += 1

print(f'Total time elapsed before rendering: {time.time() - begin}')

Render the flight. To see the Plotly trajectory (recommended!), open the "plotly_flight.html" file that was generated.

In [None]:
env.render(
    x_axis=False, 
    x_vs_z=False, 
    render_3d=False, 
    render_3d_plotly=True, 
    render_euler_angles=True,
    render_delta_euler_angles=True
)

# Increasing the Resolution of the Q-Table via Interpolation

In [21]:
def double_resolution(q_table):
    # Create an empty array with the new shape
    extended_q_table = np.zeros((q_table.shape[0]*2 - 1, q_table.shape[1]*2 - 1, q_table.shape[2]))

    # Copy original values into new table
    extended_q_table[::2, ::2, :] = q_table

    # Interpolate along the rows
    extended_q_table[1::2, ::2, :] = (q_table[:-1, :, :] + q_table[1:, :, :]) / 2
    extended_q_table[::2, 1::2, :] = (q_table[:, :-1, :] + q_table[:, 1:, :]) / 2
    extended_q_table[1::2, 1::2, :] = (
        q_table[:-1, :-1, :] + q_table[1:, :-1, :] + q_table[:-1, 1:, :] + q_table[1:, 1:, :]
    ) / 4
    return extended_q_table

extended_q_table = double_resolution(q_table)

In [22]:
def q_table_heatmap(q_table, env: DiscreteFlyEnv):
    angle_bucket_size = (env.pitch_range[1] - env.pitch_range[0]) / q_table.shape[0]
    angle_buckets = np.round(np.arange(env.pitch_range[0], env.pitch_range[1], angle_bucket_size) * RAD2DEG, 2)
    delta_pitch_bucket_size = (env.delta_pitch_range[1] - env.delta_pitch_range[0]) / q_table.shape[1]
    delta_pitches = np.round(np.arange(env.delta_pitch_range[0], env.delta_pitch_range[1], delta_pitch_bucket_size), 2)

    q_aggregated = np.argmax(q_table, axis=-1).T
    q_aggregated = map_range(q_aggregated, 0, env.action_space.n-1, env.delta_phi_range[0], env.delta_phi_range[1])
    # Create a single heatmap
    plt.figure(figsize=(10, 8))
    sns.heatmap(q_aggregated, xticklabels=angle_buckets, yticklabels=delta_pitches, annot=False, fmt=".2f", cmap="coolwarm")
    plt.title('Heatmap of Q-table Aggregated Across Actions')
    plt.xlabel('Angles')
    plt.ylabel('Y-axis')
    plt.show()

q_table_heatmap(extended_q_table, env)

In [None]:
state_space = extended_q_table.shape[:2]
n_actions = extended_q_table.shape[2]
env = DiscreteFlyEnv(config_path='config.json', state_space=state_space, action_space=n_actions)

'''Play with various controllers:'''
controller = QController(extended_q_table)

'''If you want to randomize the starting pitch and delta pitch, uncomment:'''
# random_start_pitch = random.uniform(-np.pi, np.pi)
# random_start_delta_pitch = random.uniform(-140, 140)
# env.curr_euler_angles[1] = random_start_pitch
# env.curr_angular_vel[1] = random_start_delta_pitch

print(f'Start pitch: {env.curr_euler_angles[1] * RAD2DEG}')
print(f'Start delta pitch: {env.curr_angular_vel[1]}')




In [None]:
begin = time.time() # Keep track of how long the simulation runs

action = np.zeros(env.action_space.shape) # Initial action is to do nothing
i = 0
is_done = False

while not is_done:
    if i % 20 == 0:
        print(f'Iteration {i}')
    obs, reward, is_done, info = env.step(action)
    action = controller.respond(obs)[0]

    i += 1

print(f'Total time elapsed before rendering: {time.time() - begin}')

In [None]:
env.render(
    x_axis=False, 
    x_vs_z=False, 
    render_3d=False, 
    render_3d_plotly=True, 
    render_euler_angles=True,
    render_delta_euler_angles=True
)