Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

backports-7.0.x: dpdk: add interrupt (power-saving) mode v2 #10558

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
38 changes: 38 additions & 0 deletions doc/userguide/capture-hardware/dpdk.rst
Expand Up @@ -146,3 +146,41 @@ management and worker CPU set.
- worker-cpu-set:
cpu: [ 2,4,6,8 ]
...

Interrupt (power-saving) mode
-----------------------------

The DPDK is traditionally recognized for its polling mode operation.
In this mode, CPU cores are continuously querying for packets from
the Network Interface Card (NIC). While this approach offers benefits like
reduced latency and improved performance, it might not be the most efficient
in scenarios with sporadic or low traffic.
The constant polling can lead to unnecessary CPU consumption.
To address this, DPDK offers an `interrupt` mode.

The obvious advantage that interrupt mode brings is power efficiency.
So far in our tests, we haven't observed a decrease in performance. Suricata's
performance has actually seen a slight improvement.
The (IPS runmode) users should be aware that interrupts can
introduce non-deterministic latency. However, the latency should never be
higher than in other (e.g. AF_PACKET/AF_XDP/...) capture methods.

Interrupt mode in DPDK can be configured on a per-interface basis.
This allows for a hybrid setup where some workers operate in polling mode,
while others utilize the interrupt mode.
The configuration for the interrupt mode can be found and modified in the
DPDK section of the suricata.yaml file.

Below is a sample configuration that demonstrates how to enable the interrupt mode for a specific interface:

::

...
dpdk:
eal-params:
proc-type: primary

interfaces:
- interface: 0000:3b:00.0
interrupt-mode: true
threads: 4
27 changes: 27 additions & 0 deletions src/runmode-dpdk.c
Expand Up @@ -111,6 +111,7 @@ static void *ParseDpdkConfigAndConfigureDevice(const char *iface);
static void DPDKDerefConfig(void *conf);

#define DPDK_CONFIG_DEFAULT_THREADS "auto"
#define DPDK_CONFIG_DEFAULT_INTERRUPT_MODE false
#define DPDK_CONFIG_DEFAULT_MEMPOOL_SIZE 65535
#define DPDK_CONFIG_DEFAULT_MEMPOOL_CACHE_SIZE "auto"
#define DPDK_CONFIG_DEFAULT_RX_DESCRIPTORS 1024
Expand All @@ -126,6 +127,7 @@ static void DPDKDerefConfig(void *conf);

DPDKIfaceConfigAttributes dpdk_yaml = {
.threads = "threads",
.irq_mode = "interrupt-mode",
.promisc = "promisc",
.multicast = "multicast",
.checksum_checks = "checksum-checks",
Expand Down Expand Up @@ -434,6 +436,15 @@ static int ConfigSetThreads(DPDKIfaceConfig *iconf, const char *entry_str)
SCReturnInt(0);
}

static bool ConfigSetInterruptMode(DPDKIfaceConfig *iconf, bool enable)
{
SCEnter();
if (enable)
iconf->flags |= DPDK_IRQ_MODE;

SCReturnBool(true);
}

static int ConfigSetRxQueues(DPDKIfaceConfig *iconf, uint16_t nb_queues)
{
SCEnter();
Expand Down Expand Up @@ -695,6 +706,17 @@ static int ConfigLoad(DPDKIfaceConfig *iconf, const char *iface)
if (retval < 0)
SCReturnInt(retval);

bool irq_enable;
retval = ConfGetChildValueBoolWithDefault(if_root, if_default, dpdk_yaml.irq_mode, &entry_bool);
if (retval != 1) {
irq_enable = DPDK_CONFIG_DEFAULT_INTERRUPT_MODE;
} else {
irq_enable = entry_bool ? true : false;
}
retval = ConfigSetInterruptMode(iconf, irq_enable);
if (retval != true)
SCReturnInt(-EINVAL);

// currently only mapping "1 thread == 1 RX (and 1 TX queue in IPS mode)" is supported
retval = ConfigSetRxQueues(iconf, (uint16_t)iconf->threads);
if (retval < 0)
Expand Down Expand Up @@ -1106,6 +1128,11 @@ static void DeviceInitPortConf(const DPDKIfaceConfig *iconf,
},
};

SCLogConfig("%s: interrupt mode is %s", iconf->iface,
iconf->flags & DPDK_IRQ_MODE ? "enabled" : "disabled");
if (iconf->flags & DPDK_IRQ_MODE)
port_conf->intr_conf.rxq = 1;

// configure RX offloads
if (dev_info->rx_offload_capa & RTE_ETH_RX_OFFLOAD_RSS_HASH) {
if (iconf->nb_rx_queues > 1) {
Expand Down
1 change: 1 addition & 0 deletions src/runmode-dpdk.h
Expand Up @@ -25,6 +25,7 @@

typedef struct DPDKIfaceConfigAttributes_ {
const char *threads;
const char *irq_mode;
const char *promisc;
const char *multicast;
const char *checksum_checks;
Expand Down
73 changes: 72 additions & 1 deletion src/source-dpdk.c
Expand Up @@ -93,6 +93,13 @@ TmEcode NoDPDKSupportExit(ThreadVars *tv, const void *initdata, void **data)

#define BURST_SIZE 32
static struct timeval machine_start_time = { 0, 0 };
// interrupt mode constants
#define MIN_ZERO_POLL_COUNT 10U
#define MIN_ZERO_POLL_COUNT_TO_SLEEP 10U
#define MINIMUM_SLEEP_TIME_US 1U
#define STANDARD_SLEEP_TIME_US 100U
#define MAX_EPOLL_TIMEOUT_MS 500U
static rte_spinlock_t intr_lock[RTE_MAX_ETHPORTS];

/**
* \brief Structure to hold thread specific variables.
Expand All @@ -104,6 +111,7 @@ typedef struct DPDKThreadVars_ {
TmSlot *slot;
LiveDevice *livedev;
ChecksumValidationMode checksum_mode;
bool intr_enabled;
/* references to packet and drop counters */
uint16_t capture_dpdk_packets;
uint16_t capture_dpdk_rx_errs;
Expand Down Expand Up @@ -142,6 +150,40 @@ static uint64_t CyclesToSeconds(uint64_t cycles);
static void DPDKFreeMbufArray(struct rte_mbuf **mbuf_array, uint16_t mbuf_cnt, uint16_t offset);
static uint64_t DPDKGetSeconds(void);

static bool InterruptsRXEnable(uint16_t port_id, uint16_t queue_id)
{
uint32_t event_data = port_id << UINT16_WIDTH | queue_id;
int32_t ret = rte_eth_dev_rx_intr_ctl_q(port_id, queue_id, RTE_EPOLL_PER_THREAD,
RTE_INTR_EVENT_ADD, (void *)((uintptr_t)event_data));

if (ret != 0) {
SCLogError("%s-Q%d: failed to enable interrupt mode: %s", DPDKGetPortNameByPortID(port_id),
queue_id, rte_strerror(-ret));
return false;
}
return true;
}

static inline uint32_t InterruptsSleepHeuristic(uint32_t no_pkt_polls_count)
{
if (no_pkt_polls_count < MIN_ZERO_POLL_COUNT_TO_SLEEP)
return MINIMUM_SLEEP_TIME_US;

return STANDARD_SLEEP_TIME_US;
}

static inline void InterruptsTurnOnOff(uint16_t port_id, uint16_t queue_id, bool on)
{
rte_spinlock_lock(&(intr_lock[port_id]));

if (on)
rte_eth_dev_rx_intr_enable(port_id, queue_id);
else
rte_eth_dev_rx_intr_disable(port_id, queue_id);

rte_spinlock_unlock(&(intr_lock[port_id]));
}

static void DPDKFreeMbufArray(struct rte_mbuf **mbuf_array, uint16_t mbuf_cnt, uint16_t offset)
{
for (int i = offset; i < mbuf_cnt; i++) {
Expand Down Expand Up @@ -377,6 +419,11 @@ static TmEcode ReceiveDPDKLoop(ThreadVars *tv, void *data, void *slot)

rte_eth_stats_reset(ptv->port_id);
rte_eth_xstats_reset(ptv->port_id);

uint32_t pwd_zero_rx_packet_polls_count = 0;
if (ptv->intr_enabled && !InterruptsRXEnable(ptv->port_id, ptv->queue_id))
SCReturnInt(TM_ECODE_FAILED);

while (1) {
if (unlikely(suricata_ctl_flags != 0)) {
SCLogDebug("Stopping Suricata!");
Expand All @@ -398,7 +445,27 @@ static TmEcode ReceiveDPDKLoop(ThreadVars *tv, void *data, void *slot)
TmThreadsCaptureHandleTimeout(tv, NULL);
last_timeout_msec = msecs;
}
continue;

if (!ptv->intr_enabled)
continue;

pwd_zero_rx_packet_polls_count++;
if (pwd_zero_rx_packet_polls_count <= MIN_ZERO_POLL_COUNT)
continue;

uint32_t pwd_idle_hint = InterruptsSleepHeuristic(pwd_zero_rx_packet_polls_count);

if (pwd_idle_hint < STANDARD_SLEEP_TIME_US) {
rte_delay_us(pwd_idle_hint);
} else {
InterruptsTurnOnOff(ptv->port_id, ptv->queue_id, true);
struct rte_epoll_event event;
rte_epoll_wait(RTE_EPOLL_PER_THREAD, &event, 1, MAX_EPOLL_TIMEOUT_MS);
InterruptsTurnOnOff(ptv->port_id, ptv->queue_id, false);
continue;
}
} else if (ptv->intr_enabled && pwd_zero_rx_packet_polls_count) {
pwd_zero_rx_packet_polls_count = 0;
}

ptv->pkts += (uint64_t)nb_rx;
Expand Down Expand Up @@ -522,6 +589,7 @@ static TmEcode ReceiveDPDKThreadInit(ThreadVars *tv, const void *initdata, void
ptv->checksum_mode = dpdk_config->checksum_mode;

ptv->threads = dpdk_config->threads;
ptv->intr_enabled = (dpdk_config->flags & DPDK_IRQ_MODE) ? true : false;
ptv->port_id = dpdk_config->port_id;
ptv->out_port_id = dpdk_config->out_port_id;
ptv->port_socket_id = dpdk_config->socket_id;
Expand Down Expand Up @@ -569,6 +637,9 @@ static TmEcode ReceiveDPDKThreadInit(ThreadVars *tv, const void *initdata, void
"%s: unable to determine NIC's NUMA node, degraded performance can be expected",
dpdk_config->iface);
}
if (ptv->intr_enabled) {
rte_spinlock_init(&intr_lock[ptv->port_id]);
}
}

*data = (void *)ptv;
Expand Down
1 change: 1 addition & 0 deletions src/source-dpdk.h
Expand Up @@ -38,6 +38,7 @@ typedef enum { DPDK_COPY_MODE_NONE, DPDK_COPY_MODE_TAP, DPDK_COPY_MODE_IPS } Dpd
// General flags
#define DPDK_PROMISC (1 << 0) /**< Promiscuous mode */
#define DPDK_MULTICAST (1 << 1) /**< Enable multicast packets */
#define DPDK_IRQ_MODE (1 << 2) /**< Interrupt mode */
// Offloads
#define DPDK_RX_CHECKSUM_OFFLOAD (1 << 4) /**< Enable chsum offload */

Expand Down
1 change: 1 addition & 0 deletions suricata.yaml.in
Expand Up @@ -753,6 +753,7 @@ dpdk:
# - auto takes all cores
# in IPS mode it is required to specify the number of cores and the numbers on both interfaces must match
threads: auto
# interrupt-mode: false # true to switch to interrupt mode
promisc: true # promiscuous mode - capture all packets
multicast: true # enables also detection on multicast packets
checksum-checks: true # if Suricata should validate checksums
Expand Down