Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improving DPDK capture interface and docs v7 #8859

Closed
wants to merge 9 commits into from
220 changes: 140 additions & 80 deletions doc/userguide/configuration/suricata-yaml.rst

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions doc/userguide/install.rst
Expand Up @@ -57,6 +57,9 @@ Common configure options

Enables GeoIP support for detection.

.. option:: --enable-dpdk

Enables `DPDK <https://www.dpdk.org/>` packet capture method.

Dependencies
^^^^^^^^^^^^
Expand Down
44 changes: 38 additions & 6 deletions src/runmode-dpdk.c
Expand Up @@ -930,7 +930,7 @@ static int DeviceValidateMTU(const DPDKIfaceConfig *iconf, const struct rte_eth_
SCReturnInt(-ERANGE);
}

#if RTE_VER_YEAR < 21 || RTE_VER_YEAR == 21 && RTE_VER_MONTH < 11
#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
// check if jumbo frames are set and are available
if (iconf->mtu > RTE_ETHER_MAX_LEN &&
!(dev_info->rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME)) {
Expand All @@ -944,7 +944,7 @@ static int DeviceValidateMTU(const DPDKIfaceConfig *iconf, const struct rte_eth_

static void DeviceSetMTU(struct rte_eth_conf *port_conf, uint16_t mtu)
{
#if RTE_VER_YEAR > 21 || RTE_VER_YEAR == 21 && RTE_VER_MONTH == 11
#if RTE_VERSION >= RTE_VERSION_NUM(21, 11, 0, 0)
port_conf->rxmode.mtu = mtu;
#else
port_conf->rxmode.max_rx_pkt_len = mtu;
Expand All @@ -954,6 +954,24 @@ static void DeviceSetMTU(struct rte_eth_conf *port_conf, uint16_t mtu)
#endif
}

/**
* \param port_id - queried port
* \param socket_id - socket ID of the queried port
* \return positive number on success, negative on failure (errno)
*/
static int32_t DeviceSetSocketID(uint16_t port_id, int32_t *socket_id)
{
rte_errno = 0;
int retval = rte_eth_dev_socket_id(port_id);
*socket_id = retval;

#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0) // DPDK API changed since 22.11
retval = -rte_errno;
#endif

return retval;
}

static void DeviceInitPortConf(const DPDKIfaceConfig *iconf,
const struct rte_eth_dev_info *dev_info, struct rte_eth_conf *port_conf)
{
Expand Down Expand Up @@ -1002,7 +1020,8 @@ static void DeviceInitPortConf(const DPDKIfaceConfig *iconf,

if (iconf->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
SCLogConfig("%s: checksum validation disabled", iconf->iface);
} else if (dev_info->rx_offload_capa & RTE_ETH_RX_OFFLOAD_CHECKSUM) {
} else if ((dev_info->rx_offload_capa & RTE_ETH_RX_OFFLOAD_CHECKSUM) ==
RTE_ETH_RX_OFFLOAD_CHECKSUM) { // multibit comparison to make sure all bits are set
if (iconf->checksum_mode == CHECKSUM_VALIDATION_ENABLE &&
iconf->flags & DPDK_RX_CHECKSUM_OFFLOAD) {
SCLogConfig("%s: IP, TCP and UDP checksum validation offloaded", iconf->iface);
Expand Down Expand Up @@ -1150,7 +1169,14 @@ static int DeviceConfigureIPS(DPDKIfaceConfig *iconf)
SCReturnInt(retval);
}

if (rte_eth_dev_socket_id(iconf->port_id) != rte_eth_dev_socket_id(iconf->out_port_id)) {
int32_t out_port_socket_id;
retval = DeviceSetSocketID(iconf->port_id, &out_port_socket_id);
if (retval < 0) {
SCLogError("%s: invalid socket id (err=%d)", iconf->out_iface, retval);
SCReturnInt(retval);
}

if (iconf->socket_id != out_port_socket_id) {
SCLogWarning("%s: out iface %s is not on the same NUMA node", iconf->iface,
iconf->out_iface);
}
Expand Down Expand Up @@ -1190,12 +1216,11 @@ static int DeviceConfigure(DPDKIfaceConfig *iconf)
SCReturnInt(retval);
}

retval = rte_eth_dev_socket_id(iconf->port_id);
retval = DeviceSetSocketID(iconf->port_id, &iconf->socket_id);
if (retval < 0) {
SCLogError("%s: invalid socket id (err=%d)", iconf->iface, retval);
SCReturnInt(retval);
}
iconf->socket_id = retval;

retval = rte_eth_dev_info_get(iconf->port_id, &dev_info);
if (retval != 0) {
Expand Down Expand Up @@ -1337,6 +1362,13 @@ static void *ParseDpdkConfigAndConfigureDevice(const char *iface)
// This counter is increased by worker threads that individually pick queue IDs.
SC_ATOMIC_RESET(iconf->queue_id);
SC_ATOMIC_RESET(iconf->inconsitent_numa_cnt);

// initialize LiveDev DPDK values
LiveDevice *ldev_instance = LiveGetDevice(iface);
if (ldev_instance == NULL) {
FatalError("Device %s is not registered as a live device", iface);
}
ldev_instance->dpdk_vars.pkt_mp = iconf->pkt_mempool;
return iconf;
}

Expand Down
43 changes: 30 additions & 13 deletions src/source-dpdk.c
Expand Up @@ -121,6 +121,7 @@ typedef struct DPDKThreadVars_ {
uint64_t dropped;
uint16_t port_id;
uint16_t queue_id;
int32_t port_socket_id;
struct rte_mempool *pkt_mempool;
struct rte_mbuf *received_mbufs[BURST_SIZE];
} DPDKThreadVars;
Expand Down Expand Up @@ -267,17 +268,17 @@ void TmModuleDecodeDPDKRegister(void)

static inline void DPDKDumpCounters(DPDKThreadVars *ptv)
{
struct rte_eth_stats eth_stats;
int retval = rte_eth_stats_get(ptv->port_id, &eth_stats);
if (unlikely(retval != 0)) {
SCLogError("Failed to get stats for port id %d: %s", ptv->port_id, rte_strerror(-retval));
return;
}

/* Some NICs (e.g. Intel) do not support queue statistics and the drops can be fetched only on
* the port level. Therefore setting it to the first worker to have at least continuous update
* on the dropped packets. */
if (ptv->queue_id == 0) {
struct rte_eth_stats eth_stats;
int retval = rte_eth_stats_get(ptv->port_id, &eth_stats);
if (unlikely(retval != 0)) {
SCLogError("%s: failed to get stats: %s", ptv->livedev->dev, rte_strerror(-retval));
return;
}

StatsSetUI64(ptv->tv, ptv->capture_dpdk_packets,
ptv->pkts + eth_stats.imissed + eth_stats.ierrors + eth_stats.rx_nombuf);
SC_ATOMIC_SET(ptv->livedev->pkts,
Expand Down Expand Up @@ -344,6 +345,7 @@ static TmEcode ReceiveDPDKLoop(ThreadVars *tv, void *data, void *slot)
uint16_t nb_rx;
time_t last_dump = 0;
time_t current_time;
bool segmented_mbufs_warned = 0;

DPDKThreadVars *ptv = (DPDKThreadVars *)data;
TmSlot *s = (TmSlot *)slot;
Expand Down Expand Up @@ -408,6 +410,23 @@ static TmEcode ReceiveDPDKLoop(ThreadVars *tv, void *data, void *slot)
}
}

if (!rte_pktmbuf_is_contiguous(p->dpdk_v.mbuf) && !segmented_mbufs_warned) {
char warn_s[] = "Segmented mbufs detected! Redmine Ticket #6012 "
"Check your configuration or report the issue";
enum rte_proc_type_t eal_t = rte_eal_process_type();
if (eal_t == RTE_PROC_SECONDARY) {
SCLogWarning("%s. To avoid segmented mbufs, "
"try to increase mbuf size in your primary application",
warn_s);
} else if (eal_t == RTE_PROC_PRIMARY) {
SCLogWarning("%s. To avoid segmented mbufs, "
"try to increase MTU in your suricata.yaml",
warn_s);
}

segmented_mbufs_warned = 1;
}

PacketSetData(p, rte_pktmbuf_mtod(p->dpdk_v.mbuf, uint8_t *),
rte_pktmbuf_pkt_len(p->dpdk_v.mbuf));
if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
Expand Down Expand Up @@ -473,15 +492,16 @@ static TmEcode ReceiveDPDKThreadInit(ThreadVars *tv, const void *initdata, void
ptv->threads = dpdk_config->threads;
ptv->port_id = dpdk_config->port_id;
ptv->out_port_id = dpdk_config->out_port_id;
ptv->port_socket_id = dpdk_config->socket_id;
// pass the pointer to the mempool and then forget about it. Mempool is freed in thread deinit.
ptv->pkt_mempool = dpdk_config->pkt_mempool;
dpdk_config->pkt_mempool = NULL;

thread_numa = GetNumaNode();
if (thread_numa >= 0 && thread_numa != rte_eth_dev_socket_id(ptv->port_id)) {
if (thread_numa >= 0 && thread_numa != ptv->port_socket_id) {
SC_ATOMIC_ADD(dpdk_config->inconsitent_numa_cnt, 1);
SCLogPerf("%s: NIC is on NUMA %d, thread on NUMA %d", dpdk_config->iface,
rte_eth_dev_socket_id(ptv->port_id), thread_numa);
ptv->port_socket_id, thread_numa);
}

uint16_t queue_id = SC_ATOMIC_ADD(dpdk_config->queue_id, 1);
Expand Down Expand Up @@ -644,10 +664,7 @@ static TmEcode ReceiveDPDKThreadDeinit(ThreadVars *tv, void *data)
rte_eth_dev_stop(ptv->out_port_id);
}

if (ptv->queue_id == 0 && ptv->pkt_mempool != NULL) {
rte_mempool_free(ptv->pkt_mempool);
ptv->pkt_mempool = NULL;
}
ptv->pkt_mempool = NULL; // MP is released when device is closed

SCFree(ptv);
SCReturnInt(TM_ECODE_OK);
Expand Down
2 changes: 1 addition & 1 deletion src/source-dpdk.h
Expand Up @@ -46,7 +46,7 @@ typedef struct DPDKIfaceConfig_ {
#ifdef HAVE_DPDK
char iface[RTE_ETH_NAME_MAX_LEN];
uint16_t port_id;
uint16_t socket_id;
int32_t socket_id;
/* number of threads - zero means all available */
int threads;
/* IPS mode */
Expand Down
14 changes: 14 additions & 0 deletions src/util-device.h
Expand Up @@ -18,6 +18,10 @@
#ifndef __UTIL_DEVICE_H__
#define __UTIL_DEVICE_H__

#ifdef HAVE_DPDK
#include <rte_mempool.h>
#endif /* HAVE_DPDK */

#include "queue.h"

#define OFFLOAD_FLAG_SG (1<<0)
Expand All @@ -35,6 +39,12 @@ int LiveGetOffload(void);

#define MAX_DEVNAME 10

#ifdef HAVE_DPDK
typedef struct {
struct rte_mempool *pkt_mp;
} DPDKDeviceResources;
#endif /* HAVE_DPDK */

/** storage for live device names */
typedef struct LiveDevice_ {
char *dev; /**< the device (e.g. "eth0") */
Expand All @@ -51,6 +61,10 @@ typedef struct LiveDevice_ {

uint32_t tenant_id; /**< tenant id in multi-tenancy */
uint32_t offload_orig; /**< original offload settings to restore @exit */
#ifdef HAVE_DPDK
// DPDK resources that needs to be cleaned after workers are stopped and devices closed
DPDKDeviceResources dpdk_vars;
#endif
} LiveDevice;

typedef struct LiveDeviceName_ {
Expand Down
28 changes: 14 additions & 14 deletions src/util-dpdk-i40e.c
Expand Up @@ -38,7 +38,7 @@

#define I40E_RSS_HKEY_LEN 52

#if RTE_VER_YEAR <= 19
#if RTE_VERSION < RTE_VERSION_NUM(20, 0, 0, 0)
static int i40eDeviceEnableSymHash(
int port_id, const char *port_name, uint32_t ftype, enum rte_eth_hash_function function)
{
Expand Down Expand Up @@ -349,7 +349,7 @@ static int i40eDeviceSetRSSWithFlows(int port_id, const char *port_name, int nb_
return 0;
}

#endif /* RTE_VER_YEAR < 19 */
#endif /* RTE_VERSION < RTE_VERSION_NUM(20,0,0,0) */

int i40eDeviceSetRSS(int port_id, int nb_rx_queues)
{
Expand All @@ -364,25 +364,25 @@ int i40eDeviceSetRSS(int port_id, int nb_rx_queues)
return retval;
}

#if RTE_VER_YEAR <= 19
i40eDeviceSetRSSWithFilter(port_id, port_name);
#else
#if RTE_VERSION >= RTE_VERSION_NUM(20, 0, 0, 0)
i40eDeviceSetRSSWithFlows(port_id, port_name, nb_rx_queues);
#else
i40eDeviceSetRSSWithFilter(port_id, port_name);
#endif
return 0;
}

void i40eDeviceSetRSSHashFunction(uint64_t *rss_hf)
{
if (RTE_VER_YEAR <= 19)
*rss_hf = RTE_ETH_RSS_FRAG_IPV4 | RTE_ETH_RSS_NONFRAG_IPV4_TCP |
RTE_ETH_RSS_NONFRAG_IPV4_UDP | RTE_ETH_RSS_NONFRAG_IPV4_SCTP |
RTE_ETH_RSS_NONFRAG_IPV4_OTHER | RTE_ETH_RSS_FRAG_IPV6 |
RTE_ETH_RSS_NONFRAG_IPV6_TCP | RTE_ETH_RSS_NONFRAG_IPV6_UDP |
RTE_ETH_RSS_NONFRAG_IPV6_SCTP | RTE_ETH_RSS_NONFRAG_IPV6_OTHER | RTE_ETH_RSS_SCTP;
else
*rss_hf = RTE_ETH_RSS_FRAG_IPV4 | RTE_ETH_RSS_NONFRAG_IPV4_OTHER | RTE_ETH_RSS_FRAG_IPV6 |
RTE_ETH_RSS_NONFRAG_IPV6_OTHER;
#if RTE_VERSION >= RTE_VERSION_NUM(20, 0, 0, 0)
*rss_hf = RTE_ETH_RSS_FRAG_IPV4 | RTE_ETH_RSS_NONFRAG_IPV4_OTHER | RTE_ETH_RSS_FRAG_IPV6 |
RTE_ETH_RSS_NONFRAG_IPV6_OTHER;
#else
*rss_hf = RTE_ETH_RSS_FRAG_IPV4 | RTE_ETH_RSS_NONFRAG_IPV4_TCP | RTE_ETH_RSS_NONFRAG_IPV4_UDP |
RTE_ETH_RSS_NONFRAG_IPV4_SCTP | RTE_ETH_RSS_NONFRAG_IPV4_OTHER |
RTE_ETH_RSS_FRAG_IPV6 | RTE_ETH_RSS_NONFRAG_IPV6_TCP | RTE_ETH_RSS_NONFRAG_IPV6_UDP |
RTE_ETH_RSS_NONFRAG_IPV6_SCTP | RTE_ETH_RSS_NONFRAG_IPV6_OTHER | RTE_ETH_RSS_SCTP;
#endif
}

#endif /* HAVE_DPDK */
Expand Down
13 changes: 7 additions & 6 deletions src/util-dpdk-ice.c
Expand Up @@ -37,12 +37,13 @@

void iceDeviceSetRSSHashFunction(uint64_t *rss_hf)
{
if (RTE_VER_YEAR <= 19)
*rss_hf = RTE_ETH_RSS_FRAG_IPV4 | RTE_ETH_RSS_NONFRAG_IPV4_OTHER | RTE_ETH_RSS_FRAG_IPV6 |
RTE_ETH_RSS_NONFRAG_IPV6_OTHER;
else
*rss_hf = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 | RTE_ETH_RSS_NONFRAG_IPV4_OTHER |
RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 | RTE_ETH_RSS_NONFRAG_IPV6_OTHER;
#if RTE_VERSION < RTE_VERSION_NUM(20, 0, 0, 0)
*rss_hf = RTE_ETH_RSS_FRAG_IPV4 | RTE_ETH_RSS_NONFRAG_IPV4_OTHER | RTE_ETH_RSS_FRAG_IPV6 |
RTE_ETH_RSS_NONFRAG_IPV6_OTHER;
#else
*rss_hf = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 | RTE_ETH_RSS_NONFRAG_IPV4_OTHER |
RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 | RTE_ETH_RSS_NONFRAG_IPV6_OTHER;
#endif
}

#endif /* HAVE_DPDK */
Expand Down
3 changes: 3 additions & 0 deletions src/util-dpdk.c
Expand Up @@ -51,6 +51,9 @@ void DPDKCloseDevice(LiveDevice *ldev)

SCLogInfo("%s: closing device", ldev->dev);
rte_eth_dev_close(port_id);

SCLogInfo("%s: releasing packet mempool", ldev->dev);
rte_mempool_free(ldev->dpdk_vars.pkt_mp);
}
#endif
}
6 changes: 3 additions & 3 deletions src/util-dpdk.h
Expand Up @@ -34,13 +34,13 @@
#include <rte_mempool.h>
#include <rte_mbuf.h>
#include <rte_flow.h>
#include <rte_version.h>

#if RTE_VER_YEAR < 22
#if RTE_VERSION < RTE_VERSION_NUM(22, 0, 0, 0)
#define RTE_ETH_MQ_RX_RSS ETH_MQ_RX_RSS

#endif

#if RTE_VER_YEAR < 21 || RTE_VER_YEAR == 21 && RTE_VER_MONTH < 11
#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
#define RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE DEV_TX_OFFLOAD_MBUF_FAST_FREE

#define RTE_ETH_RX_OFFLOAD_CHECKSUM DEV_RX_OFFLOAD_CHECKSUM
Expand Down