Skip to content

Commit

Permalink
Merge pull request #12556 from Adirl/support_roceV2
Browse files Browse the repository at this point in the history
msg/async/rdma: Support for RoCE v2 and SL

Reviewed-by: Haomai Wang <haomai@xsky.com>
  • Loading branch information
yuyuyu101 committed Dec 22, 2016
2 parents 94bb715 + 8faa809 commit 436d5e6
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 18 deletions.
3 changes: 3 additions & 0 deletions src/common/config_opts.h
Expand Up @@ -223,6 +223,9 @@ OPTION(ms_async_rdma_send_buffers, OPT_U32, 10240)
OPTION(ms_async_rdma_receive_buffers, OPT_U32, 10240)
OPTION(ms_async_rdma_port_num, OPT_U32, 1)
OPTION(ms_async_rdma_polling_us, OPT_U32, 1000)
OPTION(ms_async_rdma_local_gid, OPT_STR, "") // GID format: "fe80:0000:0000:0000:7efe:90ff:fe72:6efe", no zero folding
OPTION(ms_async_rdma_roce_ver, OPT_INT, 2) // 2=RoCEv2, 1=RoCEv1.5, 0=RoCEv1
OPTION(ms_async_rdma_sl, OPT_INT, 3) // in RoCE, this means PCP

OPTION(ms_dpdk_port_id, OPT_INT, 0)
OPTION(ms_dpdk_coremask, OPT_STR, "1")
Expand Down
53 changes: 53 additions & 0 deletions src/msg/async/rdma/Infiniband.cc
Expand Up @@ -46,6 +46,59 @@ Device::Device(CephContext *cct, ibv_device* d): device(d), device_attr(new ibv_
}
}

Port::Port(CephContext *cct, struct ibv_context* ictxt, uint8_t ipn): ctxt(ictxt), port_num(ipn), port_attr(new ibv_port_attr) {
union ibv_gid cgid;
struct ibv_exp_gid_attr gid_attr;

int r = ibv_query_port(ctxt, port_num, port_attr);
if (r == -1) {
lderr(cct) << __func__ << " query port failed " << cpp_strerror(errno) << dendl;
ceph_abort();
}

lid = port_attr->lid;

// search for requested GID in GIDs table
ldout(cct, 1) << __func__ << " looking for local GID " << (cct->_conf->ms_async_rdma_local_gid)
<< " of type " << (cct->_conf->ms_async_rdma_roce_ver) << dendl;
sscanf(cct->_conf->ms_async_rdma_local_gid.c_str(),
"%02hhx%02hhx:%02hhx%02hhx:%02hhx%02hhx:%02hhx%02hhx"
":%02hhx%02hhx:%02hhx%02hhx:%02hhx%02hhx:%02hhx%02hhx",
&cgid.raw[ 0], &cgid.raw[ 1],
&cgid.raw[ 2], &cgid.raw[ 3],
&cgid.raw[ 4], &cgid.raw[ 5],
&cgid.raw[ 6], &cgid.raw[ 7],
&cgid.raw[ 8], &cgid.raw[ 9],
&cgid.raw[10], &cgid.raw[11],
&cgid.raw[12], &cgid.raw[13],
&cgid.raw[14], &cgid.raw[15]);

gid_attr.comp_mask = IBV_EXP_QUERY_GID_ATTR_TYPE;

for (gid_idx = 0; gid_idx < port_attr->gid_tbl_len; gid_idx++) {
r = ibv_query_gid(ctxt, port_num, gid_idx, &gid);
if (r) {
lderr(cct) << __func__ << " query gid of port " << port_num << " index " << gid_idx << " failed " << cpp_strerror(errno) << dendl;
ceph_abort();
}
r = ibv_exp_query_gid_attr(ctxt, port_num, gid_idx, &gid_attr);
if (r) {
lderr(cct) << __func__ << " query gid attributes of port " << port_num << " index " << gid_idx << " failed " << cpp_strerror(errno) << dendl;
ceph_abort();
}
if ( (gid_attr.type == cct->_conf->ms_async_rdma_roce_ver) &&
(memcmp(&gid, &cgid, 16) == 0) ) {
ldout(cct, 1) << __func__ << " found at index " << gid_idx << dendl;
break;
}
}

if (gid_idx == port_attr->gid_tbl_len) {
lderr(cct) << __func__ << " Requested local GID was not found in GID table" << dendl;
ceph_abort();
}
}

void Device::binding_port(CephContext *cct, uint8_t port_num) {
port_cnt = device_attr->phys_port_cnt;
ports = new Port*[port_cnt];
Expand Down
20 changes: 4 additions & 16 deletions src/msg/async/rdma/Infiniband.h
Expand Up @@ -49,30 +49,17 @@ class Port {
struct ibv_context* ctxt;
uint8_t port_num;
struct ibv_port_attr* port_attr;
int gid_tbl_len;
uint16_t lid;
int gid_idx;
union ibv_gid gid;

public:
explicit Port(CephContext *cct, struct ibv_context* ictxt, uint8_t ipn): ctxt(ictxt), port_num(ipn), port_attr(new ibv_port_attr) {
int r = ibv_query_port(ctxt, port_num, port_attr);
if (r == -1) {
lderr(cct) << __func__ << " query port failed " << cpp_strerror(errno) << dendl;
ceph_abort();
}

lid = port_attr->lid;
r = ibv_query_gid(ctxt, port_num, 0, &gid);
if (r) {
lderr(cct) << __func__ << " query gid failed " << cpp_strerror(errno) << dendl;
ceph_abort();
}
}

explicit Port(CephContext *cct, struct ibv_context* ictxt, uint8_t ipn);
uint16_t get_lid() { return lid; }
ibv_gid get_gid() { return gid; }
uint8_t get_port_num() { return port_num; }
ibv_port_attr* get_port_attr() { return port_attr; }
int get_gid_idx() { return gid_idx; }
};


Expand All @@ -92,6 +79,7 @@ class Device {
const char* get_name() { return name;}
uint16_t get_lid() { return active_port->get_lid(); }
ibv_gid get_gid() { return active_port->get_gid(); }
int get_gid_idx() { return active_port->get_gid_idx(); }
void binding_port(CephContext *c, uint8_t port_num);
struct ibv_context *ctxt;
ibv_device_attr *device_attr;
Expand Down
7 changes: 5 additions & 2 deletions src/msg/async/rdma/RDMAConnectedSocketImpl.cc
Expand Up @@ -37,13 +37,16 @@ int RDMAConnectedSocketImpl::activate()
qpa.ah_attr.is_global = 1;
qpa.ah_attr.grh.hop_limit = 6;
qpa.ah_attr.grh.dgid = peer_msg.gid;
qpa.ah_attr.grh.sgid_index = 0;

qpa.ah_attr.grh.sgid_index = infiniband->get_device()->get_gid_idx();

qpa.ah_attr.dlid = peer_msg.lid;
qpa.ah_attr.sl = 0;
qpa.ah_attr.sl = cct->_conf->ms_async_rdma_sl;
qpa.ah_attr.src_path_bits = 0;
qpa.ah_attr.port_num = (uint8_t)(infiniband->get_ib_physical_port());

ldout(cct, 20) << __func__ << " Choosing gid_index " << (int)qpa.ah_attr.grh.sgid_index << ", sl " << (int)qpa.ah_attr.sl << dendl;

r = ibv_modify_qp(qp->get_qp(), &qpa, IBV_QP_STATE |
IBV_QP_AV |
IBV_QP_PATH_MTU |
Expand Down

0 comments on commit 436d5e6

Please sign in to comment.