Skip to content

Commit

Permalink
dpdk: Implement Rx offloads module
Browse files Browse the repository at this point in the history
Now, when the user specifies "--offloads" in the dpdk command line,
the next RX offloads will accelarate the RX path:

1. RSS on inner 5-tupple for MPLSoGRE packets.
2. Destination my outer IP lookup.
3. MPLS nexthop lookup.
4. VXLAN nexthop lookup.
5. Inner flow lookup.

Partial-Bug: #1781402
Change-Id: Idaa8aa72ebc5a3c47c031881898ea61a6c8080c1
Signed-off-by: matan <matan@mellanox.com>
  • Loading branch information
matan committed Nov 25, 2018
1 parent b09e738 commit 687637e
Show file tree
Hide file tree
Showing 6 changed files with 526 additions and 39 deletions.
12 changes: 12 additions & 0 deletions dpdk/dpdk_vrouter.c
Expand Up @@ -30,13 +30,15 @@
#include "vr_bridge.h"
#include "vr_mem.h"
#include "nl_util.h"
#include "vr_offloads.h"

#include <rte_version.h>
#include <rte_errno.h>
#include <rte_ethdev.h>
#include <rte_kni.h>
#include <rte_timer.h>


/* vRouter/DPDK command-line options. */
enum vr_opt_index {
#define NO_DAEMON_OPT "no-daemon"
Expand Down Expand Up @@ -91,6 +93,8 @@ enum vr_opt_index {
NETLINK_PORT_OPT_INDEX,
#define SOCKET_MEM_OPT "socket-mem"
SOCKET_MEM_OPT_INDEX,
#define OFFLOADS_OPT "offloads"
OFFLOADS_OPT_INDEX,
#define LCORES_OPT "lcores"
LCORES_OPT_INDEX,
#define MEMORY_ALLOC_CHECKS_OPT "vr_memory_alloc_checks"
Expand All @@ -104,6 +108,7 @@ extern unsigned int vr_bridge_oentries;
extern unsigned int vr_mpls_labels;
extern unsigned int vr_nexthops;
extern unsigned int vr_vrfs;
extern unsigned int datapath_offloads;

static int no_daemon_set;
static int no_gro_set = 0;
Expand Down Expand Up @@ -970,6 +975,8 @@ static struct option long_options[] = {
NULL, 0},
[SOCKET_MEM_OPT_INDEX] = {SOCKET_MEM_OPT, required_argument,
NULL, 0},
[OFFLOADS_OPT_INDEX] = {OFFLOADS_OPT, no_argument,
NULL, 0},
[MEMORY_ALLOC_CHECKS_OPT_INDEX] = {MEMORY_ALLOC_CHECKS_OPT, no_argument,
NULL, 0},
[MAX_OPT_INDEX] = {NULL, 0,
Expand All @@ -992,6 +999,7 @@ Usage(void)
" (ex: --"VDEV_OPT" eth_bond0,mode=4,slave=0000:04:00.0)\n"
" --"SOCKET_MEM_OPT" MB,... Memory to allocate on sockets.\n"
" (ex: --"SOCKET_MEM_OPT" 256,256)\n"
" --"OFFLOADS_OPT" Use smart nic HW offloads.\n"
"\n"
" --"VLAN_TCI_OPT" TCI VLAN tag control information to use\n"
" It may be a value between 0 and 4095\n"
Expand Down Expand Up @@ -1175,6 +1183,10 @@ parse_long_opts(int opt_flow_index, char *optarg)
dpdk_log_file[sizeof(dpdk_log_file) - 1] = '\0';
break;

case OFFLOADS_OPT_INDEX:
printf("Use datapath offloads\n");
datapath_offloads = 1;
break;

case HELP_OPT_INDEX:
default:
Expand Down
31 changes: 18 additions & 13 deletions dpdk/vr_dpdk_ethdev.c
Expand Up @@ -30,6 +30,7 @@
#include <rte_udp.h>

extern int vr_rxd_sz, vr_txd_sz;
extern unsigned int datapath_offloads;

struct rte_eth_conf ethdev_conf = {
#if (RTE_VERSION >= RTE_VERSION_NUM(17, 2, 0, 0))
Expand Down Expand Up @@ -1107,7 +1108,7 @@ vr_dpdk_ethdev_rx_emulate(struct vr_interface *vif,
{
uint64_t mask_to_distribute = 0, mask_to_distribute_ret = 0,
mask_to_drop = 0;
unsigned i, nb_pkts_ret = 0;
unsigned i, offload_en, nb_pkts_ret = 0;
int ret;

/* prefetch the mbufs */
Expand All @@ -1134,20 +1135,24 @@ vr_dpdk_ethdev_rx_emulate(struct vr_interface *vif,
if (unlikely(vr_dpdk.nb_fwd_lcores == 1))
return 0;

offload_en = vif_is_fabric(vif) && datapath_offloads;
/* parse packet headers and emulate RSS hash */
for (i = 0; i < *nb_pkts; i++) {
ret = dpdk_mbuf_parse_and_hash_packets(pkts[i]);

/**
* ret:
* -1 -> packet is invalid and needs to be dropped
* 1 -> packet to be distributed (bit in mask_to_distribute set)
* 0 -> packet to be routed (bit in mask_to_distribute not set)
*/
if (ret == 1) {
mask_to_distribute |= 1ULL << i;
} else if (unlikely(ret == -1)) {
mask_to_drop |= 1ULL << i;
/* datapath offloads calculated the RSS for flow tagged packets */
if (!(offload_en && (pkts[i]->ol_flags & PKT_RX_FDIR_ID))) {
ret = dpdk_mbuf_parse_and_hash_packets(pkts[i]);

/**
* ret:
* -1 -> packet is invalid and needs to be dropped
* 1 -> packet to be distributed (bit in mask_to_distribute set)
* 0 -> packet to be routed (bit in mask_to_distribute not set)
*/
if (ret == 1) {
mask_to_distribute |= 1ULL << i;
} else if (unlikely(ret == -1)) {
mask_to_drop |= 1ULL << i;
}
}
}

Expand Down
4 changes: 4 additions & 0 deletions dpdk/vr_dpdk_host.c
Expand Up @@ -19,6 +19,7 @@
#include "vr_hash.h"
#include "vr_proto.h"
#include "vr_sandesh.h"
#include "vr_dpdk_offloads.h"

#include <linux/if_ether.h>
#include <netinet/ip.h>
Expand Down Expand Up @@ -1471,6 +1472,9 @@ struct host_os dpdk_host = {
.hos_is_frag_limit_exceeded = dpdk_is_frag_limit_exceeded,
.hos_register_nic = dpdk_register_nic, /* not used with DPDK */
.hos_nl_broadcast_supported = false,
.hos_offload_flow_create = dpdk_offload_flow_create,
.hos_offload_flow_destroy = dpdk_offload_flow_destroy,
.hos_offload_prepare = dpdk_offload_prepare,
};

struct host_os *
Expand Down
71 changes: 45 additions & 26 deletions dpdk/vr_dpdk_lcore.c
Expand Up @@ -21,6 +21,7 @@
#include "vr_dpdk_virtio.h"
#include "vr_uvhost.h"
#include "vr_dpdk_gro.h"
#include "vr_dpdk_offloads.h"

#include <signal.h>

Expand All @@ -31,6 +32,8 @@
#include <rte_timer.h>
#include <rte_kni.h>

extern unsigned int datapath_offloads;

/* Returns the least used lcore or VR_MAX_CPUS */
unsigned
vr_dpdk_lcore_least_used_get(void)
Expand Down Expand Up @@ -790,11 +793,18 @@ vr_dpdk_lcore_vroute(struct vr_dpdk_lcore *lcore, struct vr_interface *vif,
struct vr_packet *pkt;
struct vr_dpdk_queue *monitoring_tx_queue;
struct rte_mbuf *p_copy;
struct vr_offload_flow *oflows[VR_DPDK_RX_BURST_SZ];
struct vr_offload_flow **oflow = &oflows[0];
unsigned short vlan_id = VLAN_ID_INVALID;
bool fabric = vif_is_fabric(vif);
bool offloads = fabric && datapath_offloads;

RTE_LOG_DP(DEBUG, VROUTER, "%s: RX %" PRIu32 " packet(s) from interface %s\n",
__func__, nb_pkts, vif->vif_name);

if (offloads)
dpdk_offload_flow_burst_prefetch(pkts, oflows, nb_pkts);

if (unlikely(vif->vif_flags & VIF_FLAG_MONITORED)) {
monitoring_tx_queue =
&lcore->lcore_tx_queues[vr_dpdk.monitorings[vif->vif_idx]][0];
Expand All @@ -818,35 +828,44 @@ vr_dpdk_lcore_vroute(struct vr_dpdk_lcore *lcore, struct vr_interface *vif,

for (i = 0; i < nb_pkts; i++) {
mbuf = pkts[i];
rte_prefetch0(rte_pktmbuf_mtod(mbuf, char *));

/*
* If vRouter works in VLAN, we check if the packet received on the
* physical interface belongs to our VLAN. If it does, the tag should
* be stripped. If not (untagged or another tag), it should be
* forwarded to the kernel.
*/
if (unlikely(vr_dpdk.vlan_tag != VLAN_ID_INVALID &&
vif_is_fabric(vif))) {
if ((mbuf->vlan_tci & 0xFFF) != vr_dpdk.vlan_tag) {
if (vr_dpdk.vlan_ring == NULL || rte_vlan_insert(&mbuf)) {
vr_dpdk_pfree(mbuf, vif, VP_DROP_VLAN_FWD_ENQ);
if (fabric) {
if (offloads) {
rte_prefetch0(pkts[i+1]);
rte_prefetch0((char*)pkts[i + 1] + RTE_CACHE_LINE_SIZE);
rte_prefetch0((char*)pkts[i + 1] + (RTE_CACHE_LINE_SIZE << 1));
if (*oflow)
vr_dpdk_offloads_flow_prefetch(*oflow);
oflow++;
} else
rte_prefetch0(rte_pktmbuf_mtod(mbuf, char *));
/*
* If vRouter works in VLAN, we check if the packet received on the
* physical interface belongs to our VLAN. If it does, the tag should
* be stripped. If not (untagged or another tag), it should be
* forwarded to the kernel.
*/
if (unlikely(vr_dpdk.vlan_tag != VLAN_ID_INVALID)) {
if ((mbuf->vlan_tci & 0xFFF) != vr_dpdk.vlan_tag) {
if (vr_dpdk.vlan_ring == NULL || rte_vlan_insert(&mbuf)) {
vr_dpdk_pfree(mbuf, vif, VP_DROP_VLAN_FWD_ENQ);
continue;
}
/* Packets will be dequeued in dpdk_lcore_fwd_io() */
if (rte_ring_mp_enqueue(vr_dpdk.vlan_ring, mbuf) != 0)
vr_dpdk_pfree(mbuf, vif, VP_DROP_VLAN_FWD_ENQ);
/* Nothing to route, take the next packet. */
continue;
} else {
/* Clear the VLAN flag for the case when the received packet
* belongs to vRouter's VLAN. This resembles the kernel vRouter
* behaviour, in which case a separate vlanX interface (that
* the vRouter is binded to) strips the tag and vRouter gets
* clean ethernet frames from fabric interface. If we did not
* do this, the VLAN tag would be passed to dp-core processing
* and vhost connectivity would be corrupted. */
mbuf->ol_flags &= ~PKT_RX_VLAN;
}
/* Packets will be dequeued in dpdk_lcore_fwd_io() */
if (rte_ring_mp_enqueue(vr_dpdk.vlan_ring, mbuf) != 0)
vr_dpdk_pfree(mbuf, vif, VP_DROP_VLAN_FWD_ENQ);
/* Nothing to route, take the next packet. */
continue;
} else {
/* Clear the VLAN flag for the case when the received packet
* belongs to vRouter's VLAN. This resembles the kernel vRouter
* behaviour, in which case a separate vlanX interface (that
* the vRouter is binded to) strips the tag and vRouter gets
* clean ethernet frames from fabric interface. If we did not
* do this, the VLAN tag would be passed to dp-core processing
* and vhost connectivity would be corrupted. */
mbuf->ol_flags &= ~PKT_RX_VLAN;
}
}

Expand Down

0 comments on commit 687637e

Please sign in to comment.