Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dpdk: add interrupt (power-saving) mode v2 #9595

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
38 changes: 38 additions & 0 deletions doc/userguide/capture-hardware/dpdk.rst
Expand Up @@ -95,3 +95,41 @@ management and worker CPU set.
- worker-cpu-set:
cpu: [ 2,4,6,8 ]
...

Interrupt (power-saving) mode
-----------------------------

The DPDK is traditionally recognized for its polling mode operation.
In this mode, CPU cores are continuously querying for packets from
the Network Interface Card (NIC). While this approach offers benefits like
reduced latency and improved performance, it might not be the most efficient
in scenarios with sporadic or low traffic.
The constant polling can lead to unnecessary CPU consumption.
To address this, DPDK offers an interrupt mode.
lukashino marked this conversation as resolved.
Show resolved Hide resolved

The obvious advantage that interrupt mode brings is power efficiency.
In our testing (so far) we didn't observe decreased performance, actually
Suricata's performance has improved a bit.
lukashino marked this conversation as resolved.
Show resolved Hide resolved
The (IPS runmode) users should be aware that interrupts can
introduce non-deterministic latency. However, the latency should never be
higher than in other (e.g. AF_PACKET/AF_XDP/...) capture methods.

Interrupt mode in DPDK can be configured on a per-interface basis.
This allows for a hybrid setup where some workers operate in polling mode,
while others utilize the interrupt mode.
The configuration for the interrupt mode can be found and modified in the
DPDK section of the suricata.yaml file.

Below is a sample configuration that demonstrates how to enable the interrupt mode for a specific interface:

::

...
dpdk:
eal-params:
proc-type: primary

interfaces:
- interface: 0000:3b:00.0
interrupt-mode: yes
lukashino marked this conversation as resolved.
Show resolved Hide resolved
threads: 4
23 changes: 23 additions & 0 deletions src/runmode-dpdk.c
Expand Up @@ -111,6 +111,7 @@ static void *ParseDpdkConfigAndConfigureDevice(const char *iface);
static void DPDKDerefConfig(void *conf);

#define DPDK_CONFIG_DEFAULT_THREADS "auto"
#define DPDK_CONFIG_DEFAULT_INTERRUPT_MODE 0
#define DPDK_CONFIG_DEFAULT_MEMPOOL_SIZE 65535
#define DPDK_CONFIG_DEFAULT_MEMPOOL_CACHE_SIZE "auto"
#define DPDK_CONFIG_DEFAULT_RX_DESCRIPTORS 1024
Expand All @@ -126,6 +127,7 @@ static void DPDKDerefConfig(void *conf);

DPDKIfaceConfigAttributes dpdk_yaml = {
.threads = "threads",
.irq_mode = "interrupt-mode",
.promisc = "promisc",
.multicast = "multicast",
.checksum_checks = "checksum-checks",
Expand Down Expand Up @@ -434,6 +436,15 @@ static int ConfigSetThreads(DPDKIfaceConfig *iconf, const char *entry_str)
SCReturnInt(0);
}

static bool ConfigSetInterruptMode(DPDKIfaceConfig *iconf, int entry_bool)
{
SCEnter();
if (entry_bool)
iconf->flags |= DPDK_IRQ_MODE;

SCReturnBool(true);
}

static int ConfigSetRxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues)
{
SCEnter();
Expand Down Expand Up @@ -695,6 +706,13 @@ static int ConfigLoad(DPDKIfaceConfig *iconf, const char *iface)
if (retval < 0)
SCReturnInt(retval);

retval = ConfGetChildValueBoolWithDefault(
if_root, if_default, dpdk_yaml.irq_mode, &entry_bool) != 1
? ConfigSetInterruptMode(iconf, DPDK_CONFIG_DEFAULT_INTERRUPT_MODE)
: ConfigSetInterruptMode(iconf, entry_bool);
if (retval != true)
SCReturnInt(-EINVAL);

// currently only mapping "1 thread == 1 RX (and 1 TX queue in IPS mode)" is supported
retval = ConfigSetRxQueues(iconf, (uint16_t)iconf->threads);
if (retval < 0)
Expand Down Expand Up @@ -1106,6 +1124,11 @@ static void DeviceInitPortConf(const DPDKIfaceConfig *iconf,
},
};

if (iconf->flags & DPDK_IRQ_MODE) {
SCLogConfig("Switching to interrupt (power-saving) mode");
port_conf->intr_conf.rxq = 1;
}

// configure RX offloads
if (dev_info->rx_offload_capa & RTE_ETH_RX_OFFLOAD_RSS_HASH) {
if (iconf->nb_rx_queues > 1) {
Expand Down
1 change: 1 addition & 0 deletions src/runmode-dpdk.h
Expand Up @@ -25,6 +25,7 @@

typedef struct DPDKIfaceConfigAttributes_ {
const char *threads;
const char *irq_mode;
const char *promisc;
const char *multicast;
const char *checksum_checks;
Expand Down
72 changes: 71 additions & 1 deletion src/source-dpdk.c
Expand Up @@ -91,6 +91,42 @@ TmEcode NoDPDKSupportExit(ThreadVars *tv, const void *initdata, void **data)
#include "util-dpdk-bonding.h"
#include <numa.h>

#define MIN_ZERO_POLL_COUNT 10U
#define MIN_ZERO_POLL_COUNT_TO_SLEEP 10U
#define MINIMUM_SLEEP_TIME_US 1U
#define STANDARD_SLEEP_TIME_US 100U
#define MAX_EPOLL_TIMEOUT_S 5U

static int32_t InterruptsRXEnable(uint16_t portid, uint16_t queueid)
{
uint32_t event_data = portid << UINT16_WIDTH | queueid;
int32_t ret = rte_eth_dev_rx_intr_ctl_q(portid, queueid, RTE_EPOLL_PER_THREAD,
RTE_INTR_EVENT_ADD, (void *)((uintptr_t)event_data));
if (ret) {
SCLogError("%s-Q%d: rte_eth_dev_rx_intr_ctl_q failed: %s", DPDKGetPortNameByPortID(portid),
queueid, rte_strerror(-ret));
return ret;
}

return 0;
}

static inline uint32_t InterruptsSleepHeuristic(uint32_t no_pkt_polls_count)
{
if (no_pkt_polls_count < MIN_ZERO_POLL_COUNT_TO_SLEEP)
return MINIMUM_SLEEP_TIME_US;

return STANDARD_SLEEP_TIME_US;
}

static void InterruptsTurnOnOff(uint16_t port_id, uint16_t queue_id, bool on)
{
if (on)
rte_eth_dev_rx_intr_enable(port_id, queue_id);
else
rte_eth_dev_rx_intr_disable(port_id, queue_id);
}

#define BURST_SIZE 32
static struct timeval machine_start_time = { 0, 0 };

Expand All @@ -104,6 +140,7 @@ typedef struct DPDKThreadVars_ {
TmSlot *slot;
LiveDevice *livedev;
ChecksumValidationMode checksum_mode;
bool intr_en;
/* references to packet and drop counters */
uint16_t capture_dpdk_packets;
uint16_t capture_dpdk_rx_errs;
Expand Down Expand Up @@ -375,8 +412,21 @@ static TmEcode ReceiveDPDKLoop(ThreadVars *tv, void *data, void *slot)
TmThreadsSetFlag(tv, THV_RUNNING);
PacketPoolWait();

if (ptv->intr_en) {
if (InterruptsRXEnable(ptv->port_id, ptv->queue_id) == 0) {
SCLogDebug("Enabling interrupt for port %d queue %d", ptv->port_id, ptv->queue_id);
ptv->intr_en = true;
} else {
SCLogConfig(
"Failed to enable interrupt (power-saving) mode, falling back to polling mode");
ptv->intr_en = false;
}
}
rte_eth_stats_reset(ptv->port_id);
rte_eth_xstats_reset(ptv->port_id);

uint32_t pwd_zero_rx_packet_polls_count = 0;

while (1) {
if (unlikely(suricata_ctl_flags != 0)) {
SCLogDebug("Stopping Suricata!");
Expand All @@ -398,7 +448,26 @@ static TmEcode ReceiveDPDKLoop(ThreadVars *tv, void *data, void *slot)
TmThreadsCaptureHandleTimeout(tv, NULL);
last_timeout_msec = msecs;
}
continue;

if (ptv->intr_en) {
pwd_zero_rx_packet_polls_count++;
if (pwd_zero_rx_packet_polls_count <= MIN_ZERO_POLL_COUNT)
continue;

uint32_t pwd_idle_hint = InterruptsSleepHeuristic(pwd_zero_rx_packet_polls_count);

if (pwd_idle_hint < STANDARD_SLEEP_TIME_US) {
rte_delay_us(pwd_idle_hint);
} else {
InterruptsTurnOnOff(ptv->port_id, ptv->queue_id, 1);
struct rte_epoll_event event;
rte_epoll_wait(RTE_EPOLL_PER_THREAD, &event, 1, MAX_EPOLL_TIMEOUT_S);
InterruptsTurnOnOff(ptv->port_id, ptv->queue_id, 0);
continue;
}
}
} else if (pwd_zero_rx_packet_polls_count) {
pwd_zero_rx_packet_polls_count = 0;
}

ptv->pkts += (uint64_t)nb_rx;
Expand Down Expand Up @@ -522,6 +591,7 @@ static TmEcode ReceiveDPDKThreadInit(ThreadVars *tv, const void *initdata, void
ptv->checksum_mode = dpdk_config->checksum_mode;

ptv->threads = dpdk_config->threads;
ptv->intr_en = (dpdk_config->flags & DPDK_IRQ_MODE) != 0;
ptv->port_id = dpdk_config->port_id;
ptv->out_port_id = dpdk_config->out_port_id;
ptv->port_socket_id = dpdk_config->socket_id;
Expand Down
1 change: 1 addition & 0 deletions src/source-dpdk.h
Expand Up @@ -38,6 +38,7 @@ typedef enum { DPDK_COPY_MODE_NONE, DPDK_COPY_MODE_TAP, DPDK_COPY_MODE_IPS } Dpd
// General flags
#define DPDK_PROMISC (1 << 0) /**< Promiscuous mode */
#define DPDK_MULTICAST (1 << 1) /**< Enable multicast packets */
#define DPDK_IRQ_MODE (1 << 2) /**< Interrupt mode */
// Offloads
#define DPDK_RX_CHECKSUM_OFFLOAD (1 << 4) /**< Enable chsum offload */

Expand Down
1 change: 1 addition & 0 deletions suricata.yaml.in
Expand Up @@ -753,6 +753,7 @@ dpdk:
# - auto takes all cores
# in IPS mode it is required to specify the number of cores and the numbers on both interfaces must match
threads: auto
interrupt-mode: no # yes to switch to interrupt mode
lukashino marked this conversation as resolved.
Show resolved Hide resolved
promisc: true # promiscuous mode - capture all packets
multicast: true # enables also detection on multicast packets
checksum-checks: true # if Suricata should validate checksums
Expand Down