diff --git a/doc/userguide/capture-hardware/dpdk.rst b/doc/userguide/capture-hardware/dpdk.rst index 91ae1c876ca..3231372061c 100644 --- a/doc/userguide/capture-hardware/dpdk.rst +++ b/doc/userguide/capture-hardware/dpdk.rst @@ -95,3 +95,41 @@ management and worker CPU set. - worker-cpu-set: cpu: [ 2,4,6,8 ] ... + +Interrupt (power-saving) mode +----------------------------- + +The DPDK is traditionally recognized for its polling mode operation. +In this mode, CPU cores are continuously querying for packets from +the Network Interface Card (NIC). While this approach offers benefits like +reduced latency and improved performance, it might not be the most efficient +in scenarios with sporadic or low traffic. +The constant polling can lead to unnecessary CPU consumption. +To address this, DPDK offers an interrupt mode. + +The obvious advantage that interrupt mode brings is power efficiency. +In our testing (so far) we didn't observe decreased performance, actually +Suricata's performance has improved a bit. +The (IPS runmode) users should be aware that interrupts can +introduce non-deterministic latency. However, the latency should never be +higher than in other (e.g. AF_PACKET/AF_XDP/...) capture methods. + +Interrupt mode in DPDK can be configured on a per-interface basis. +This allows for a hybrid setup where some workers operate in polling mode, +while others utilize the interrupt mode. +The configuration for the interrupt mode can be found and modified in the +DPDK section of the suricata.yaml file. + +Below is a sample configuration that demonstrates how to enable the interrupt mode for a specific interface: + +:: + + ... + dpdk: + eal-params: + proc-type: primary + + interfaces: + - interface: 0000:3b:00.0 + interrupt-mode: yes + threads: 4 diff --git a/src/runmode-dpdk.c b/src/runmode-dpdk.c index feba401b413..8daae71eeaa 100644 --- a/src/runmode-dpdk.c +++ b/src/runmode-dpdk.c @@ -111,6 +111,7 @@ static void *ParseDpdkConfigAndConfigureDevice(const char *iface); static void DPDKDerefConfig(void *conf); #define DPDK_CONFIG_DEFAULT_THREADS "auto" +#define DPDK_CONFIG_DEFAULT_INTERRUPT_MODE 0 #define DPDK_CONFIG_DEFAULT_MEMPOOL_SIZE 65535 #define DPDK_CONFIG_DEFAULT_MEMPOOL_CACHE_SIZE "auto" #define DPDK_CONFIG_DEFAULT_RX_DESCRIPTORS 1024 @@ -126,6 +127,7 @@ static void DPDKDerefConfig(void *conf); DPDKIfaceConfigAttributes dpdk_yaml = { .threads = "threads", + .irq_mode = "interrupt-mode", .promisc = "promisc", .multicast = "multicast", .checksum_checks = "checksum-checks", @@ -434,6 +436,15 @@ static int ConfigSetThreads(DPDKIfaceConfig *iconf, const char *entry_str) SCReturnInt(0); } +static bool ConfigSetInterruptMode(DPDKIfaceConfig *iconf, int entry_bool) +{ + SCEnter(); + if (entry_bool) + iconf->flags |= DPDK_IRQ_MODE; + + SCReturnBool(true); +} + static int ConfigSetRxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues) { SCEnter(); @@ -695,6 +706,13 @@ static int ConfigLoad(DPDKIfaceConfig *iconf, const char *iface) if (retval < 0) SCReturnInt(retval); + retval = ConfGetChildValueBoolWithDefault( + if_root, if_default, dpdk_yaml.irq_mode, &entry_bool) != 1 + ? ConfigSetInterruptMode(iconf, DPDK_CONFIG_DEFAULT_INTERRUPT_MODE) + : ConfigSetInterruptMode(iconf, entry_bool); + if (retval != true) + SCReturnInt(-EINVAL); + // currently only mapping "1 thread == 1 RX (and 1 TX queue in IPS mode)" is supported retval = ConfigSetRxQueues(iconf, (uint16_t)iconf->threads); if (retval < 0) @@ -1106,6 +1124,11 @@ static void DeviceInitPortConf(const DPDKIfaceConfig *iconf, }, }; + if (iconf->flags & DPDK_IRQ_MODE) { + SCLogConfig("Switching to interrupt (power-saving) mode"); + port_conf->intr_conf.rxq = 1; + } + // configure RX offloads if (dev_info->rx_offload_capa & RTE_ETH_RX_OFFLOAD_RSS_HASH) { if (iconf->nb_rx_queues > 1) { diff --git a/src/runmode-dpdk.h b/src/runmode-dpdk.h index a00327ba9e2..152c1d68789 100644 --- a/src/runmode-dpdk.h +++ b/src/runmode-dpdk.h @@ -25,6 +25,7 @@ typedef struct DPDKIfaceConfigAttributes_ { const char *threads; + const char *irq_mode; const char *promisc; const char *multicast; const char *checksum_checks; diff --git a/src/source-dpdk.c b/src/source-dpdk.c index 54503e21227..b83a72acd25 100644 --- a/src/source-dpdk.c +++ b/src/source-dpdk.c @@ -91,6 +91,42 @@ TmEcode NoDPDKSupportExit(ThreadVars *tv, const void *initdata, void **data) #include "util-dpdk-bonding.h" #include +#define MIN_ZERO_POLL_COUNT 10U +#define MIN_ZERO_POLL_COUNT_TO_SLEEP 10U +#define MINIMUM_SLEEP_TIME_US 1U +#define STANDARD_SLEEP_TIME_US 100U +#define MAX_EPOLL_TIMEOUT_S 5U + +static int32_t InterruptsRXEnable(uint16_t portid, uint16_t queueid) +{ + uint32_t event_data = portid << UINT16_WIDTH | queueid; + int32_t ret = rte_eth_dev_rx_intr_ctl_q(portid, queueid, RTE_EPOLL_PER_THREAD, + RTE_INTR_EVENT_ADD, (void *)((uintptr_t)event_data)); + if (ret) { + SCLogError("%s-Q%d: rte_eth_dev_rx_intr_ctl_q failed: %s", DPDKGetPortNameByPortID(portid), + queueid, rte_strerror(-ret)); + return ret; + } + + return 0; +} + +static inline uint32_t InterruptsSleepHeuristic(uint32_t no_pkt_polls_count) +{ + if (no_pkt_polls_count < MIN_ZERO_POLL_COUNT_TO_SLEEP) + return MINIMUM_SLEEP_TIME_US; + + return STANDARD_SLEEP_TIME_US; +} + +static void InterruptsTurnOnOff(uint16_t port_id, uint16_t queue_id, bool on) +{ + if (on) + rte_eth_dev_rx_intr_enable(port_id, queue_id); + else + rte_eth_dev_rx_intr_disable(port_id, queue_id); +} + #define BURST_SIZE 32 static struct timeval machine_start_time = { 0, 0 }; @@ -104,6 +140,7 @@ typedef struct DPDKThreadVars_ { TmSlot *slot; LiveDevice *livedev; ChecksumValidationMode checksum_mode; + bool intr_en; /* references to packet and drop counters */ uint16_t capture_dpdk_packets; uint16_t capture_dpdk_rx_errs; @@ -375,8 +412,21 @@ static TmEcode ReceiveDPDKLoop(ThreadVars *tv, void *data, void *slot) TmThreadsSetFlag(tv, THV_RUNNING); PacketPoolWait(); + if (ptv->intr_en) { + if (InterruptsRXEnable(ptv->port_id, ptv->queue_id) == 0) { + SCLogDebug("Enabling interrupt for port %d queue %d", ptv->port_id, ptv->queue_id); + ptv->intr_en = true; + } else { + SCLogConfig( + "Failed to enable interrupt (power-saving) mode, falling back to polling mode"); + ptv->intr_en = false; + } + } rte_eth_stats_reset(ptv->port_id); rte_eth_xstats_reset(ptv->port_id); + + uint32_t pwd_zero_rx_packet_polls_count = 0; + while (1) { if (unlikely(suricata_ctl_flags != 0)) { SCLogDebug("Stopping Suricata!"); @@ -398,7 +448,26 @@ static TmEcode ReceiveDPDKLoop(ThreadVars *tv, void *data, void *slot) TmThreadsCaptureHandleTimeout(tv, NULL); last_timeout_msec = msecs; } - continue; + + if (ptv->intr_en) { + pwd_zero_rx_packet_polls_count++; + if (pwd_zero_rx_packet_polls_count <= MIN_ZERO_POLL_COUNT) + continue; + + uint32_t pwd_idle_hint = InterruptsSleepHeuristic(pwd_zero_rx_packet_polls_count); + + if (pwd_idle_hint < STANDARD_SLEEP_TIME_US) { + rte_delay_us(pwd_idle_hint); + } else { + InterruptsTurnOnOff(ptv->port_id, ptv->queue_id, 1); + struct rte_epoll_event event; + rte_epoll_wait(RTE_EPOLL_PER_THREAD, &event, 1, MAX_EPOLL_TIMEOUT_S); + InterruptsTurnOnOff(ptv->port_id, ptv->queue_id, 0); + continue; + } + } + } else if (pwd_zero_rx_packet_polls_count) { + pwd_zero_rx_packet_polls_count = 0; } ptv->pkts += (uint64_t)nb_rx; @@ -522,6 +591,7 @@ static TmEcode ReceiveDPDKThreadInit(ThreadVars *tv, const void *initdata, void ptv->checksum_mode = dpdk_config->checksum_mode; ptv->threads = dpdk_config->threads; + ptv->intr_en = (dpdk_config->flags & DPDK_IRQ_MODE) != 0; ptv->port_id = dpdk_config->port_id; ptv->out_port_id = dpdk_config->out_port_id; ptv->port_socket_id = dpdk_config->socket_id; diff --git a/src/source-dpdk.h b/src/source-dpdk.h index 3fdb63cb35d..b962d866d4b 100644 --- a/src/source-dpdk.h +++ b/src/source-dpdk.h @@ -38,6 +38,7 @@ typedef enum { DPDK_COPY_MODE_NONE, DPDK_COPY_MODE_TAP, DPDK_COPY_MODE_IPS } Dpd // General flags #define DPDK_PROMISC (1 << 0) /**< Promiscuous mode */ #define DPDK_MULTICAST (1 << 1) /**< Enable multicast packets */ +#define DPDK_IRQ_MODE (1 << 2) /**< Interrupt mode */ // Offloads #define DPDK_RX_CHECKSUM_OFFLOAD (1 << 4) /**< Enable chsum offload */ diff --git a/suricata.yaml.in b/suricata.yaml.in index 630399126db..5f97e828081 100644 --- a/suricata.yaml.in +++ b/suricata.yaml.in @@ -753,6 +753,7 @@ dpdk: # - auto takes all cores # in IPS mode it is required to specify the number of cores and the numbers on both interfaces must match threads: auto + interrupt-mode: no # yes to switch to interrupt mode promisc: true # promiscuous mode - capture all packets multicast: true # enables also detection on multicast packets checksum-checks: true # if Suricata should validate checksums