In [None]:
import airsim
import torch
import numpy as np

# Load trained Pytorch model
policy_net = torch.load("drone_policy_model.pth")
policy_net.eval() 

# Setup AirSim connection
client = airsim.MultirotorClient()
client.confirmConnection()
client.enableApiControl(True)
client.armDisarm(True)

state = client.getMultirotorState()

episode_rewards = []

for episode in range(10):

  client.reset()
  
  state = get_state(client)
  
  episode_reward = 0
  done = False
  
  while not done:

    # Get state from sensors
    state = get_state(client)
    
    # Sample action from policy network
    action = policy_net(torch.from_numpy(state))
    action = action.detach().numpy()

    # Take action
    client.moveByVelocityAsync(action[0], action[1], action[2], 1)

    # Get reward
    reward = get_reward(state, action)
    episode_reward += reward

    if episode_complete():
      done = True

  # Log episode reward
  episode_rewards.append(episode_reward)
  
# Close client  
client.enableApiControl(False)
print("Average Reward", np.mean(episode_rewards))

In [None]:
def get_state(client):

  state = {}

  # Camera image
  img = client.simGetImage("drone_camera") 
  state['image'] = img

  # Drone position
  pos = client.getMultirotorState().kinematics_estimated.position
  state['position'] = pos

  # Drone velocity
  vel = client.getMultirotorState().kinematics_estimated.linear_velocity
  state['velocity'] = vel

  return state

In [None]:
def get_reward(state, action, client, global_path):

  reward = 0

  # Object visible reward
  object_visible = state['object_detector_output']
  if object_visible > 0.5:
     reward += 10

  # Collision avoidance penalty
  if client.simGetCollisionInfo().has_collided == True:
     reward -= 5

  # Deviation from global path penalty
  current_pos = state['position']
  nearest_pt = find_nearest(global_path, current_pos)
  deviation = distance(current_pos, nearest_pt)
  reward -= deviation * 0.1

  # Time penalty
  reward -= 0.1 * state['time_step']

  # Object detection confidence reward
  detection_confidence = state['detection_confidence']
  reward += detection_confidence * 2
  
  return reward

In [None]:
action_space = spaces.Box(low=-1, high=1, shape=(3,)) 

# Action is velocity in x, y, z
# action = [vx, vy, vz]