diff --git a/Debug/poor-mans-profiler.sh b/Debug/poor-mans-profiler.sh index 4b03e5631004..ab06a1b66ab2 100755 --- a/Debug/poor-mans-profiler.sh +++ b/Debug/poor-mans-profiler.sh @@ -1,20 +1,12 @@ #!/bin/bash # -# Author: Pavel Kirienko -# # Poor man's sampling profiler for NuttX. # # Usage: Install flamegraph.pl in your PATH, configure your .gdbinit, run the script with proper arguments and go # have a coffee. When you're back, you'll see the flamegraph. Note that frequent calls to GDB significantly # interfere with normal operation of the target, which means that you can't profile real-time tasks with it. -# For best results, ensure that the PC is not overloaded, the USB host controller to which the debugger is -# connected is not congested. You should also allow the current user to set negative nice values. -# -# The FlameGraph script can be downloaded from https://github.com/brendangregg/FlameGraph. Thanks Mr. Gregg. # -# Requirements: ARM GDB with Python support. You can get one by downloading the sources from -# https://launchpad.net/gcc-arm-embedded and building them with correct flags. -# Note that Python support is not required if no per-task sampling is needed. +# Requirements: ARM GDB with Python support # set -e @@ -41,7 +33,7 @@ which flamegraph.pl > /dev/null || die "Install flamegraph.pl first" nsamples=0 sleeptime=0.1 # Doctors recommend 7-8 hours a day taskname= -elf= +elf=$root/Build/px4fmu-v2_default.build/firmware.elf append=0 fgfontsize=10 fgwidth=1900 @@ -77,8 +69,6 @@ do shift done -[[ -z "$elf" ]] && die "Please specify the ELF file location, e.g.: build_px4fmu-v4_default/src/firmware/nuttx/firmware_nuttx" - # # Temporary files # @@ -247,8 +237,8 @@ for s, f in sorted(stacks.items(), key=lambda (s, f): s): print('Total stack frames:', num_stack_frames, file=sys.stderr) print('Top consumers (distribution of the stack tops):', file=sys.stderr) -for name,num in sorted(stack_tops.items(), key=lambda (name, num): num, reverse=True)[:300]: - print('% 7.3f%% ' % (100 * num / num_stack_frames), name, file=sys.stderr) +for name,num in sorted(stack_tops.items(), key=lambda (name, num): num, reverse=True)[:10]: + print('% 5.1f%% ' % (100 * num / num_stack_frames), name, file=sys.stderr) EOF cat $stacksfile | python /tmp/pmpn-folder.py > $foldfile diff --git a/ROMFS/px4fmu_common/init.d/4012_quad_x_can b/ROMFS/px4fmu_common/init.d/4012_quad_x_can index 01f90b4704a8..f6246fef8e75 100644 --- a/ROMFS/px4fmu_common/init.d/4012_quad_x_can +++ b/ROMFS/px4fmu_common/init.d/4012_quad_x_can @@ -25,5 +25,4 @@ then param set MC_YAWRATE_D 0.0 fi -set MIXER quad_x_can set OUTPUT_MODE uavcan_esc diff --git a/ROMFS/px4fmu_common/mixers/quad_x_can.main.mix b/ROMFS/px4fmu_common/mixers/quad_x_can.main.mix deleted file mode 100644 index d6a29801481c..000000000000 --- a/ROMFS/px4fmu_common/mixers/quad_x_can.main.mix +++ /dev/null @@ -1 +0,0 @@ -R: 4x 10000 10000 10000 0 diff --git a/src/lib/matrix b/src/lib/matrix index 8ef252767691..cf924956d7d6 160000 --- a/src/lib/matrix +++ b/src/lib/matrix @@ -1 +1 @@ -Subproject commit 8ef25276769127595c8cf27c2ee026a2954173cc +Subproject commit cf924956d7d62ce18bfc4f8441e9177ddb69c0dc diff --git a/src/modules/uavcan/actuators/esc.cpp b/src/modules/uavcan/actuators/esc.cpp index 92d12ba32ca1..d24ad9a1faa3 100644 --- a/src/modules/uavcan/actuators/esc.cpp +++ b/src/modules/uavcan/actuators/esc.cpp @@ -136,25 +136,6 @@ void UavcanEscController::update_outputs(float *outputs, unsigned num_outputs) } } - /* - * Remove channels that are always zero. - * The objective of this optimization is to avoid broadcasting multi-frame transfers when a single frame - * transfer would be enough. This is a valid optimization as the UAVCAN specification implies that all - * non-specified ESC setpoints should be considered zero. - * The positive outcome is a (marginally) lower bus traffic and lower CPU load. - * - * From the standpoint of the PX4 architecture, however, this is a hack. It should be investigated why - * the mixer returns more outputs than are actually used. - */ - for (int index = int(msg.cmd.size()) - 1; index >= _max_number_of_nonzero_outputs; index--) { - if (msg.cmd[index] != 0) { - _max_number_of_nonzero_outputs = index + 1; - break; - } - } - - msg.cmd.resize(_max_number_of_nonzero_outputs); - /* * Publish the command message to the bus * Note that for a quadrotor it takes one CAN frame diff --git a/src/modules/uavcan/actuators/esc.hpp b/src/modules/uavcan/actuators/esc.hpp index ced372f7c684..40b151e30825 100644 --- a/src/modules/uavcan/actuators/esc.hpp +++ b/src/modules/uavcan/actuators/esc.hpp @@ -107,7 +107,6 @@ class UavcanEscController * ESC states */ uint32_t _armed_mask = 0; - uint8_t _max_number_of_nonzero_outputs = 0; /* * Perf counters diff --git a/src/modules/uavcan/uavcan_main.cpp b/src/modules/uavcan/uavcan_main.cpp index e6bbc0d07827..204d8c79a3c3 100644 --- a/src/modules/uavcan/uavcan_main.cpp +++ b/src/modules/uavcan/uavcan_main.cpp @@ -110,6 +110,18 @@ UavcanNode::UavcanNode(uavcan::ICanDriver &can_driver, uavcan::ISystemClock &sys } /* _server_command_sem use case is a signal */ px4_sem_setprotocol(&_server_command_sem, SEM_PRIO_NONE); + + if (_perfcnt_node_spin_elapsed == nullptr) { + errx(1, "uavcan: couldn't allocate _perfcnt_node_spin_elapsed"); + } + + if (_perfcnt_esc_mixer_output_elapsed == nullptr) { + errx(1, "uavcan: couldn't allocate _perfcnt_esc_mixer_output_elapsed"); + } + + if (_perfcnt_esc_mixer_total_elapsed == nullptr) { + errx(1, "uavcan: couldn't allocate _perfcnt_esc_mixer_total_elapsed"); + } } UavcanNode::~UavcanNode() @@ -152,6 +164,9 @@ UavcanNode::~UavcanNode() _instance = nullptr; + perf_free(_perfcnt_node_spin_elapsed); + perf_free(_perfcnt_esc_mixer_output_elapsed); + perf_free(_perfcnt_esc_mixer_total_elapsed); pthread_mutex_destroy(&_node_mutex); px4_sem_destroy(&_server_command_sem); @@ -682,6 +697,7 @@ int UavcanNode::init(uavcan::NodeID node_id) void UavcanNode::node_spin_once() { + perf_begin(_perfcnt_node_spin_elapsed); const int spin_res = _node.spinOnce(); if (spin_res < 0) { @@ -692,6 +708,8 @@ void UavcanNode::node_spin_once() if (_tx_injector != nullptr) { _tx_injector->injectTxFramesInto(_node); } + + perf_end(_perfcnt_node_spin_elapsed); } /* @@ -850,8 +868,12 @@ int UavcanNode::run() // Mutex is unlocked while the thread is blocked on IO multiplexing (void)pthread_mutex_unlock(&_node_mutex); + perf_end(_perfcnt_esc_mixer_total_elapsed); // end goes first, it's not a mistake + const int poll_ret = ::poll(_poll_fds, _poll_fds_num, PollTimeoutMs); + perf_begin(_perfcnt_esc_mixer_total_elapsed); + (void)pthread_mutex_lock(&_node_mutex); node_spin_once(); // Non-blocking @@ -943,7 +965,9 @@ int UavcanNode::run() // Output to the bus _outputs.timestamp = hrt_absolute_time(); + perf_begin(_perfcnt_esc_mixer_output_elapsed); _esc_controller.update_outputs(_outputs.output, _outputs.noutputs); + perf_end(_perfcnt_esc_mixer_output_elapsed); } diff --git a/src/modules/uavcan/uavcan_main.hpp b/src/modules/uavcan/uavcan_main.hpp index f84dff1630c9..b962333a0608 100644 --- a/src/modules/uavcan/uavcan_main.hpp +++ b/src/modules/uavcan/uavcan_main.hpp @@ -209,6 +209,10 @@ class UavcanNode : public device::CDev // index into _poll_fds for each _control_subs handle uint8_t _poll_ids[NUM_ACTUATOR_CONTROL_GROUPS_UAVCAN]; + perf_counter_t _perfcnt_node_spin_elapsed = perf_alloc(PC_ELAPSED, "uavcan_node_spin_elapsed"); + perf_counter_t _perfcnt_esc_mixer_output_elapsed = perf_alloc(PC_ELAPSED, "uavcan_esc_mixer_output_elapsed"); + perf_counter_t _perfcnt_esc_mixer_total_elapsed = perf_alloc(PC_ELAPSED, "uavcan_esc_mixer_total_elapsed"); + void handle_time_sync(const uavcan::TimerEvent &); typedef uavcan::MethodBinder TimerCallback; diff --git a/src/systemcmds/tests/test_matrix.cpp b/src/systemcmds/tests/test_matrix.cpp index 029f36ceb54f..d2e574810f71 100644 --- a/src/systemcmds/tests/test_matrix.cpp +++ b/src/systemcmds/tests/test_matrix.cpp @@ -317,9 +317,9 @@ bool MatrixTest::filterTests() bool MatrixTest::helperTests() { - ut_test(::fabs(wrap_pi(4.0) - (4.0 - 2 * M_PI)) < 1e-5); - ut_test(::fabs(wrap_pi(-4.0) - (-4.0 + 2 * M_PI)) < 1e-5); - ut_test(::fabs(wrap_pi(3.0) - (3.0)) < 1e-3); + ut_test(fabs(wrap_pi(4.0) - (4.0 - 2 * M_PI)) < 1e-5); + ut_test(fabs(wrap_pi(-4.0) - (-4.0 + 2 * M_PI)) < 1e-5); + ut_test(fabs(wrap_pi(3.0) - (3.0)) < 1e-3); wrap_pi(NAN); Vector3f a(1, 2, 3);