Skip to content

Commit

Permalink
Performance audit (intentionally duplicates #6829) (#6847)
Browse files Browse the repository at this point in the history
* UAVCAN ESC output: removing ESC output channels from published message that are always zero. This allows the UAVCAN stack to always transfer only the minimum number of output values, avoiding redundant zeroes and the associated increase in bus load and CPU time

* Added a separate mixer file for CAN quadrotor

* Sampling profiler improvements

* PMSP: Output more endpoints

* Matrix update

* libc usage workaround

* Removed UAVCAN perfcounters

* Matrix submodule update
  • Loading branch information
pavel-kirienko authored and David Sidrane committed Mar 18, 2017
1 parent c20b85e commit 2b2c307
Show file tree
Hide file tree
Showing 9 changed files with 40 additions and 36 deletions.
18 changes: 14 additions & 4 deletions Debug/poor-mans-profiler.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
#!/bin/bash
#
# Author: Pavel Kirienko <pavel.kirienko@zubax.com>
#
# Poor man's sampling profiler for NuttX.
#
# Usage: Install flamegraph.pl in your PATH, configure your .gdbinit, run the script with proper arguments and go
# have a coffee. When you're back, you'll see the flamegraph. Note that frequent calls to GDB significantly
# interfere with normal operation of the target, which means that you can't profile real-time tasks with it.
# For best results, ensure that the PC is not overloaded, the USB host controller to which the debugger is
# connected is not congested. You should also allow the current user to set negative nice values.
#
# The FlameGraph script can be downloaded from https://github.com/brendangregg/FlameGraph. Thanks Mr. Gregg.
#
# Requirements: ARM GDB with Python support
# Requirements: ARM GDB with Python support. You can get one by downloading the sources from
# https://launchpad.net/gcc-arm-embedded and building them with correct flags.
# Note that Python support is not required if no per-task sampling is needed.
#

set -e
Expand All @@ -33,7 +41,7 @@ which flamegraph.pl > /dev/null || die "Install flamegraph.pl first"
nsamples=0
sleeptime=0.1 # Doctors recommend 7-8 hours a day
taskname=
elf=$root/Build/px4fmu-v2_default.build/firmware.elf
elf=
append=0
fgfontsize=10
fgwidth=1900
Expand Down Expand Up @@ -69,6 +77,8 @@ do
shift
done

[[ -z "$elf" ]] && die "Please specify the ELF file location, e.g.: build_px4fmu-v4_default/src/firmware/nuttx/firmware_nuttx"

#
# Temporary files
#
Expand Down Expand Up @@ -237,8 +247,8 @@ for s, f in sorted(stacks.items(), key=lambda (s, f): s):
print('Total stack frames:', num_stack_frames, file=sys.stderr)
print('Top consumers (distribution of the stack tops):', file=sys.stderr)
for name,num in sorted(stack_tops.items(), key=lambda (name, num): num, reverse=True)[:10]:
print('% 5.1f%% ' % (100 * num / num_stack_frames), name, file=sys.stderr)
for name,num in sorted(stack_tops.items(), key=lambda (name, num): num, reverse=True)[:300]:
print('% 7.3f%% ' % (100 * num / num_stack_frames), name, file=sys.stderr)
EOF

cat $stacksfile | python /tmp/pmpn-folder.py > $foldfile
Expand Down
1 change: 1 addition & 0 deletions ROMFS/px4fmu_common/init.d/4012_quad_x_can
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,5 @@ then
param set MC_YAWRATE_D 0.0
fi

set MIXER quad_x_can
set OUTPUT_MODE uavcan_esc
1 change: 1 addition & 0 deletions ROMFS/px4fmu_common/mixers/quad_x_can.main.mix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
R: 4x 10000 10000 10000 0
2 changes: 1 addition & 1 deletion src/lib/matrix
19 changes: 19 additions & 0 deletions src/modules/uavcan/actuators/esc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,25 @@ void UavcanEscController::update_outputs(float *outputs, unsigned num_outputs)
}
}

/*
* Remove channels that are always zero.
* The objective of this optimization is to avoid broadcasting multi-frame transfers when a single frame
* transfer would be enough. This is a valid optimization as the UAVCAN specification implies that all
* non-specified ESC setpoints should be considered zero.
* The positive outcome is a (marginally) lower bus traffic and lower CPU load.
*
* From the standpoint of the PX4 architecture, however, this is a hack. It should be investigated why
* the mixer returns more outputs than are actually used.
*/
for (int index = int(msg.cmd.size()) - 1; index >= _max_number_of_nonzero_outputs; index--) {
if (msg.cmd[index] != 0) {
_max_number_of_nonzero_outputs = index + 1;
break;
}
}

msg.cmd.resize(_max_number_of_nonzero_outputs);

/*
* Publish the command message to the bus
* Note that for a quadrotor it takes one CAN frame
Expand Down
1 change: 1 addition & 0 deletions src/modules/uavcan/actuators/esc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ class UavcanEscController
* ESC states
*/
uint32_t _armed_mask = 0;
uint8_t _max_number_of_nonzero_outputs = 0;

/*
* Perf counters
Expand Down
24 changes: 0 additions & 24 deletions src/modules/uavcan/uavcan_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,18 +110,6 @@ UavcanNode::UavcanNode(uavcan::ICanDriver &can_driver, uavcan::ISystemClock &sys
}
/* _server_command_sem use case is a signal */
px4_sem_setprotocol(&_server_command_sem, SEM_PRIO_NONE);

if (_perfcnt_node_spin_elapsed == nullptr) {
errx(1, "uavcan: couldn't allocate _perfcnt_node_spin_elapsed");
}

if (_perfcnt_esc_mixer_output_elapsed == nullptr) {
errx(1, "uavcan: couldn't allocate _perfcnt_esc_mixer_output_elapsed");
}

if (_perfcnt_esc_mixer_total_elapsed == nullptr) {
errx(1, "uavcan: couldn't allocate _perfcnt_esc_mixer_total_elapsed");
}
}

UavcanNode::~UavcanNode()
Expand Down Expand Up @@ -164,9 +152,6 @@ UavcanNode::~UavcanNode()

_instance = nullptr;

perf_free(_perfcnt_node_spin_elapsed);
perf_free(_perfcnt_esc_mixer_output_elapsed);
perf_free(_perfcnt_esc_mixer_total_elapsed);
pthread_mutex_destroy(&_node_mutex);
px4_sem_destroy(&_server_command_sem);

Expand Down Expand Up @@ -697,7 +682,6 @@ int UavcanNode::init(uavcan::NodeID node_id)

void UavcanNode::node_spin_once()
{
perf_begin(_perfcnt_node_spin_elapsed);
const int spin_res = _node.spinOnce();

if (spin_res < 0) {
Expand All @@ -708,8 +692,6 @@ void UavcanNode::node_spin_once()
if (_tx_injector != nullptr) {
_tx_injector->injectTxFramesInto(_node);
}

perf_end(_perfcnt_node_spin_elapsed);
}

/*
Expand Down Expand Up @@ -868,12 +850,8 @@ int UavcanNode::run()
// Mutex is unlocked while the thread is blocked on IO multiplexing
(void)pthread_mutex_unlock(&_node_mutex);

perf_end(_perfcnt_esc_mixer_total_elapsed); // end goes first, it's not a mistake

const int poll_ret = ::poll(_poll_fds, _poll_fds_num, PollTimeoutMs);

perf_begin(_perfcnt_esc_mixer_total_elapsed);

(void)pthread_mutex_lock(&_node_mutex);

node_spin_once(); // Non-blocking
Expand Down Expand Up @@ -965,9 +943,7 @@ int UavcanNode::run()

// Output to the bus
_outputs.timestamp = hrt_absolute_time();
perf_begin(_perfcnt_esc_mixer_output_elapsed);
_esc_controller.update_outputs(_outputs.output, _outputs.noutputs);
perf_end(_perfcnt_esc_mixer_output_elapsed);
}


Expand Down
4 changes: 0 additions & 4 deletions src/modules/uavcan/uavcan_main.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,10 +209,6 @@ class UavcanNode : public device::CDev
// index into _poll_fds for each _control_subs handle
uint8_t _poll_ids[NUM_ACTUATOR_CONTROL_GROUPS_UAVCAN];

perf_counter_t _perfcnt_node_spin_elapsed = perf_alloc(PC_ELAPSED, "uavcan_node_spin_elapsed");
perf_counter_t _perfcnt_esc_mixer_output_elapsed = perf_alloc(PC_ELAPSED, "uavcan_esc_mixer_output_elapsed");
perf_counter_t _perfcnt_esc_mixer_total_elapsed = perf_alloc(PC_ELAPSED, "uavcan_esc_mixer_total_elapsed");

void handle_time_sync(const uavcan::TimerEvent &);

typedef uavcan::MethodBinder<UavcanNode *, void (UavcanNode::*)(const uavcan::TimerEvent &)> TimerCallback;
Expand Down
6 changes: 3 additions & 3 deletions src/systemcmds/tests/test_matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,9 +317,9 @@ bool MatrixTest::filterTests()

bool MatrixTest::helperTests()
{
ut_test(fabs(wrap_pi(4.0) - (4.0 - 2 * M_PI)) < 1e-5);
ut_test(fabs(wrap_pi(-4.0) - (-4.0 + 2 * M_PI)) < 1e-5);
ut_test(fabs(wrap_pi(3.0) - (3.0)) < 1e-3);
ut_test(::fabs(wrap_pi(4.0) - (4.0 - 2 * M_PI)) < 1e-5);
ut_test(::fabs(wrap_pi(-4.0) - (-4.0 + 2 * M_PI)) < 1e-5);
ut_test(::fabs(wrap_pi(3.0) - (3.0)) < 1e-3);
wrap_pi(NAN);

Vector3f a(1, 2, 3);
Expand Down

0 comments on commit 2b2c307

Please sign in to comment.