Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CAN: Fixed watchdog due to CAN stack overflow #15921

Merged
merged 13 commits into from
Dec 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion Tools/AP_Periph/AP_Periph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,28 @@ static void update_rainbow()
#endif


#if CONFIG_HAL_BOARD == HAL_BOARD_CHIBIOS && CH_DBG_ENABLE_STACK_CHECK == TRUE
void AP_Periph_FW::show_stack_free()
{
const uint32_t isr_stack_size = uint32_t((const uint8_t *)&__main_stack_end__ - (const uint8_t *)&__main_stack_base__);
can_printf("ISR %u/%u", stack_free(&__main_stack_base__), isr_stack_size);

for (thread_t *tp = chRegFirstThread(); tp; tp = chRegNextThread(tp)) {
uint32_t total_stack;
if (tp->wabase == (void*)&__main_thread_stack_base__) {
// main thread has its stack separated from the thread context
total_stack = uint32_t((const uint8_t *)&__main_thread_stack_end__ - (const uint8_t *)&__main_thread_stack_base__);
} else {
// all other threads have their thread context pointer
// above the stack top
total_stack = uint32_t(tp) - uint32_t(tp->wabase);
}
can_printf("%s STACK=%u/%u\n", tp->name, stack_free(tp->wabase), total_stack);
}
}
#endif



void AP_Periph_FW::update()
{
Expand All @@ -242,13 +264,28 @@ void AP_Periph_FW::update()
hal.uartA->printf("RNG %u %ucm\n", rangefinder.num_sensors(), rangefinder.distance_cm_orient(ROTATION_NONE));
#endif
hal.scheduler->delay(1);
show_stack_usage();
#endif
#ifdef HAL_PERIPH_NEOPIXEL_COUNT
hal.rcout->set_serial_led_num_LEDs(HAL_PERIPH_NEOPIXEL_CHAN, HAL_PERIPH_NEOPIXEL_COUNT, AP_HAL::RCOutput::MODE_NEOPIXEL);
#endif
}

static uint32_t last_error_ms;
const auto &ierr = AP::internalerror();
if (now - last_error_ms > 5000 && ierr.errors()) {
// display internal errors as DEBUG every 5s
last_error_ms = now;
can_printf("IERR 0x%x %u", ierr.errors(), ierr.last_error_line());
}

#if CONFIG_HAL_BOARD == HAL_BOARD_CHIBIOS && CH_DBG_ENABLE_STACK_CHECK == TRUE
static uint32_t last_debug_ms;
if (g.debug==1 && now - last_debug_ms > 5000) {
last_debug_ms = now;
show_stack_free();
}
#endif

#ifdef HAL_PERIPH_ENABLE_BATTERY
if (now - battery.last_read_ms >= 100) {
// update battery at 10Hz
Expand Down
3 changes: 3 additions & 0 deletions Tools/AP_Periph/AP_Periph.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,9 @@ class AP_Periph_FW {
uint32_t last_gps_update_ms;
uint32_t last_baro_update_ms;
uint32_t last_airspeed_update_ms;

// show stack as DEBUG msgs
void show_stack_free();
};

extern AP_Periph_FW periph;
Expand Down
6 changes: 4 additions & 2 deletions Tools/AP_Periph/Parameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ const AP_Param::Info AP_Periph_FW::var_info[] = {
// trigger bootloader flash
GSCALAR(flash_bootloader, "FLASH_BOOTLOADER", 0),
#endif


GSCALAR(debug, "DEBUG", 0),

#ifdef HAL_PERIPH_ENABLE_BUZZER
GSCALAR(buzz_volume, "BUZZER_VOLUME", 100),
#endif
Expand Down Expand Up @@ -104,7 +106,7 @@ const AP_Param::Info AP_Periph_FW::var_info[] = {
#ifdef HAL_PERIPH_ENABLE_HWESC
GSCALAR(esc_number, "ESC_NUMBER", 0),
#endif

AP_VAREND
};

Expand Down
5 changes: 4 additions & 1 deletion Tools/AP_Periph/Parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class Parameters {
k_param_baro_enable,
k_param_esc_number,
k_param_battery,
k_param_debug,
};

AP_Int16 format_version;
Expand Down Expand Up @@ -63,7 +64,9 @@ class Parameters {
#ifdef HAL_PERIPH_ENABLE_HWESC
AP_Int8 esc_number;
#endif


AP_Int8 debug;

Parameters() {}
};

Expand Down
18 changes: 18 additions & 0 deletions Tools/AP_Periph/can.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -902,7 +902,23 @@ static void process1HzTasks(uint64_t timestamp_usec)

#if !defined(HAL_NO_FLASH_SUPPORT) && !defined(HAL_NO_ROMFS_SUPPORT)
if (periph.g.flash_bootloader.get()) {
const uint8_t flash_bl = periph.g.flash_bootloader.get();
periph.g.flash_bootloader.set_and_save_ifchanged(0);
if (flash_bl == 42) {
tridge marked this conversation as resolved.
Show resolved Hide resolved
// magic developer value to test watchdog support with main loop lockup
while (true) {
can_printf("entering lockup\n");
hal.scheduler->delay(100);
}
}
if (flash_bl == 43) {
// magic developer value to test watchdog support with hard fault
can_printf("entering fault\n");
void *foo = (void*)0xE000ED38;
tridge marked this conversation as resolved.
Show resolved Hide resolved
typedef void (*fptr)();
fptr gptr = (fptr) (void *) foo;
gptr();
}
hal.scheduler->delay(1000);
AP_HAL::Util::FlashBootloader res = hal.util->flash_bootloader();
switch (res) {
Expand Down Expand Up @@ -1044,6 +1060,8 @@ void AP_Periph_FW::can_start()
PreferredNodeID = g.can_node;
}

periph.g.flash_bootloader.set_and_save_ifchanged(0);

can_iface.init(1000000, AP_HAL::CANIface::NormalMode);

canardInit(&canard, (uint8_t *)canard_memory_pool, sizeof(canard_memory_pool),
Expand Down
8 changes: 7 additions & 1 deletion libraries/AP_CANManager/AP_CANManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class AP_CANManager
return _singleton;
}

enum LogLevel {
enum LogLevel : uint8_t {
LOG_NONE,
LOG_ERROR,
LOG_WARNING,
Expand Down Expand Up @@ -76,6 +76,12 @@ class AP_CANManager
return nullptr;
}

// returns current log level
LogLevel get_log_level(void) const
{
return LogLevel(_loglevel.get());
}

// Method to log status and debug information for review while debugging
void log_text(AP_CANManager::LogLevel loglevel, const char *tag, const char *fmt, ...);

Expand Down
5 changes: 5 additions & 0 deletions libraries/AP_HAL_ChibiOS/CANIface.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@ class ChibiOS::CANIface : public AP_HAL::CANIface

ChibiOS::bxcan::CanType* can_;

// state for ISR RX handler. We put this in the class to avoid
// having to expand the stack size for all threads
AP_HAL::CANFrame isr_rx_frame;
tridge marked this conversation as resolved.
Show resolved Hide resolved
CanRxItem isr_rx_item;

CanRxItem rx_buffer[HAL_CAN_RX_QUEUE_SIZE];
ByteBuffer rx_bytebuffer_;
ObjectBuffer<CanRxItem> rx_queue_;
Expand Down
4 changes: 2 additions & 2 deletions libraries/AP_HAL_ChibiOS/CanIface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,7 @@ void CANIface::handleRxInterrupt(uint8_t fifo_index, uint64_t timestamp_us)
/*
* Read the frame contents
*/
AP_HAL::CANFrame frame;
AP_HAL::CANFrame &frame = isr_rx_frame;
const bxcan::RxMailboxType& rf = can_->RxMailbox[fifo_index];

if ((rf.RIR & bxcan::RIR_IDE) == 0) {
Expand Down Expand Up @@ -549,7 +549,7 @@ void CANIface::handleRxInterrupt(uint8_t fifo_index, uint64_t timestamp_us)
/*
* Store with timeout into the FIFO buffer and signal update event
*/
CanRxItem rx_item;
CanRxItem &rx_item = isr_rx_item;
rx_item.frame = frame;
rx_item.timestamp_us = timestamp_us;
rx_item.flags = 0;
Expand Down
2 changes: 0 additions & 2 deletions libraries/AP_HAL_ChibiOS/HAL_ChibiOS_Class.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,11 +228,9 @@ static void main_loop()
stm32_watchdog_init();
}

#ifndef HAL_NO_LOGGING
if (hal.util->was_watchdog_reset()) {
INTERNAL_ERROR(AP_InternalError::error_t::watchdog_reset);
}
#endif // HAL_NO_LOGGING
#endif // IOMCU_FW

schedulerInstance.watchdog_pat();
Expand Down
49 changes: 48 additions & 1 deletion libraries/AP_HAL_ChibiOS/Scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,7 @@ void Scheduler::_monitor_thread(void *arg)
if (loop_delay >= 200) {
// the main loop has been stuck for at least
// 200ms. Starting logging the main loop state
#ifndef HAL_NO_LOGGING
const AP_HAL::Util::PersistentData &pd = hal.util->persistent_data;
if (AP_Logger::get_singleton()) {
AP::logger().Write("MON", "TimeUS,LDelay,Task,IErr,IErrCnt,IErrLn,MavMsg,MavCmd,SemLine,SPICnt,I2CCnt", "QIbIHHHHHII",
Expand All @@ -390,6 +391,7 @@ void Scheduler::_monitor_thread(void *arg)
pd.spi_count,
pd.i2c_count);
}
#endif
}
if (loop_delay >= 500) {
// at 500ms we declare an internal error
Expand Down Expand Up @@ -471,23 +473,35 @@ void Scheduler::_io_thread(void* arg)
}
#ifndef HAL_NO_LOGGING
uint32_t last_sd_start_ms = AP_HAL::millis();
#endif
#if CH_DBG_ENABLE_STACK_CHECK == TRUE
uint32_t last_stack_check_ms = 0;
#endif
while (true) {
sched->delay_microseconds(1000);

// run registered IO processes
sched->_run_io();

#if !defined(HAL_NO_LOGGING) || CH_DBG_ENABLE_STACK_CHECK == TRUE
uint32_t now = AP_HAL::millis();
#endif

#ifndef HAL_NO_LOGGING
if (!hal.util->get_soft_armed()) {
// if sdcard hasn't mounted then retry it every 3s in the IO
// thread when disarmed
uint32_t now = AP_HAL::millis();
if (now - last_sd_start_ms > 3000) {
last_sd_start_ms = now;
AP::FS().retry_mount();
}
}
#endif
#if CH_DBG_ENABLE_STACK_CHECK == TRUE
if (now - last_stack_check_ms > 1000) {
last_stack_check_ms = now;
sched->check_stack_free();
}
#endif
}
}
Expand Down Expand Up @@ -691,4 +705,37 @@ void Scheduler::watchdog_pat(void)
last_watchdog_pat_ms = AP_HAL::millis();
}

#if CH_DBG_ENABLE_STACK_CHECK == TRUE
/*
check we have enough stack free on all threads and the ISR stack
*/
void Scheduler::check_stack_free(void)
{
// we raise an internal error stack_overflow when the available
// stack on any thread or the ISR stack drops below this
// threshold. This means we get an overflow error when we haven't
// yet completely run out of stack. This gives us a good
// pre-warning when we are getting too close
#if defined(STM32F1)
const uint32_t min_stack = 32;
#else
const uint32_t min_stack = 64;
#endif

if (stack_free(&__main_stack_base__) < min_stack) {
// use "line number" of 0xFFFF for ISR stack low
AP::internalerror().error(AP_InternalError::error_t::stack_overflow, 0xFFFF);
}

for (thread_t *tp = chRegFirstThread(); tp; tp = chRegNextThread(tp)) {
if (stack_free(tp->wabase) < min_stack) {
// use task priority for line number. This allows us to
// identify the task fairly reliably
AP::internalerror().error(AP_InternalError::error_t::stack_overflow, tp->prio);
}
}
}
#endif // CH_DBG_ENABLE_STACK_CHECK == TRUE


#endif // CH_CFG_USE_DYNAMIC
3 changes: 3 additions & 0 deletions libraries/AP_HAL_ChibiOS/Scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,5 +176,8 @@ class ChibiOS::Scheduler : public AP_HAL::Scheduler {
#if defined STM32H7
void check_low_memory_is_zero();
#endif

// check for free stack space
void check_stack_free(void);
};
#endif
41 changes: 23 additions & 18 deletions libraries/AP_HAL_ChibiOS/Util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -300,36 +300,41 @@ bool Util::was_watchdog_reset() const
*/
size_t Util::thread_info(char *buf, size_t bufsize)
{
thread_t *tp;
size_t total = 0;

// a header to allow for machine parsers to determine format
int n = snprintf(buf, bufsize, "ThreadsV1\n");
const uint32_t isr_stack_size = uint32_t((const uint8_t *)&__main_stack_end__ - (const uint8_t *)&__main_stack_base__);
int n = snprintf(buf, bufsize, "ThreadsV2\nISR PRI=255 sp=%p STACK=%u/%u\n",
&__main_stack_base__, stack_free(&__main_stack_base__), isr_stack_size);
if (n <= 0) {
return 0;
}
buf += n;
bufsize -= n;
total += n;

tp = chRegFirstThread();

do {
uint32_t stklimit = (uint32_t)tp->wabase;
uint8_t *p = (uint8_t *)tp->wabase;
while (*p == CH_DBG_STACK_FILL_VALUE) {
p++;
for (thread_t *tp = chRegFirstThread(); tp; tp = chRegNextThread(tp)) {
uint32_t total_stack;
if (tp->wabase == (void*)&__main_thread_stack_base__) {
// main thread has its stack separated from the thread context
total_stack = uint32_t((const uint8_t *)&__main_thread_stack_end__ - (const uint8_t *)&__main_thread_stack_base__);
} else {
// all other threads have their thread context pointer
// above the stack top
total_stack = uint32_t(tp) - uint32_t(tp->wabase);
}
uint32_t stack_left = ((uint32_t)p) - stklimit;
n = snprintf(buf, bufsize, "%-13.13s PRI=%3u STACK_LEFT=%u\n", tp->name, unsigned(tp->prio), unsigned(stack_left));
if (n <= 0) {
break;
if (bufsize > 0) {
n = snprintf(buf, bufsize, "%-13.13s PRI=%3u sp=%p STACK=%u/%u\n",
tp->name, unsigned(tp->prio), tp->wabase,
stack_free(tp->wabase), total_stack);
if (n > bufsize) {
n = bufsize;
}
buf += n;
bufsize -= n;
total += n;
}
buf += n;
bufsize -= n;
total += n;
tp = chRegNextThread(tp);
} while (tp != NULL);
}

return total;
}
Expand Down
2 changes: 1 addition & 1 deletion libraries/AP_HAL_ChibiOS/hwdef/ZubaxGNSS/hwdef.dat
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ define NO_DATAFLASH TRUE
define DMA_RESERVE_SIZE 0

define PERIPH_FW TRUE
MAIN_STACK 0x100
MAIN_STACK 0x200
PROCESS_STACK 0x600
define HAL_DISABLE_LOOP_DELAY

Expand Down
1 change: 1 addition & 0 deletions libraries/AP_HAL_ChibiOS/hwdef/common/chconf.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
#define CH_DBG_ENABLE_ASSERTS TRUE
#define CH_DBG_ENABLE_CHECKS TRUE
#define CH_DBG_SYSTEM_STATE_CHECK TRUE
#undef CH_DBG_ENABLE_STACK_CHECK
#define CH_DBG_ENABLE_STACK_CHECK TRUE
#endif

Expand Down
Loading