Skip to content
Permalink
Browse files
rdma_rxe: Added functional bind and invalidate MW ops
Replaced bind MW and invalidate MW stubs with functional versions
Added rules checking for these operations based on the InfiniBand
architecture document.
Added an extra flags field in the rxe WQE to indicate whether the
bind operation came from the ibv_bind_verbs API or the
ibv_post_send API to enforce the rules on type 1 and 2 MWs.

Signed-off-by: Bob Pearson <rpearson@hpe.com>
  • Loading branch information
Bob Pearson authored and intel-lab-lkp committed Aug 19, 2020
1 parent 6d63b4f commit 01db430643deac98d0b280d7d6a0939c5c0d7856
Show file tree
Hide file tree
Showing 4 changed files with 314 additions and 18 deletions.
@@ -601,7 +601,8 @@ void rxe_mr_cleanup(struct rxe_pool_entry *arg)
struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);
int i;

ib_umem_release(mr->umem);
if (mr->umem)
ib_umem_release(mr->umem);

if (mr->map) {
for (i = 0; i < mr->num_map; i++)
@@ -25,7 +25,7 @@ static void rxe_set_mw_rkey(struct rxe_mw *mw)
(rxe_add_key(mw, &rkey) == 0)))
return;
} while (tries++ < 10);
pr_err("unable to get random rkey for mw\n");
pr_err_once("unable to get random rkey for mw\n");
}

/* this temporary code to test ibv_alloc_mw, ibv_dealloc_mw */
@@ -38,7 +38,7 @@ struct ib_mw *rxe_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type,
struct rxe_alloc_mw_resp __user *uresp = NULL;

if (udata) {
if (udata->outlen < sizeof(*uresp))
if (unlikely(udata->outlen < sizeof(*uresp)))
return ERR_PTR(-EINVAL);
uresp = udata->outbuf;
}
@@ -70,43 +70,307 @@ struct ib_mw *rxe_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type,
RXE_MEM_STATE_VALID;

if (uresp) {
if (copy_to_user(&uresp->index, &mw->pelem.index,
sizeof(uresp->index))) {
if (unlikely(copy_to_user(&uresp->index, &mw->pelem.index,
sizeof(uresp->index)))) {
rxe_drop_ref(mw);
rxe_drop_ref(pd);
return ERR_PTR(-EFAULT);
}
}

return &mw->ibmw;
}

/* cleanup mw in case someone is still holding a ref */
static void do_dealloc_mw(struct rxe_mw *mw)
{
if (mw->mr) {
rxe_drop_ref(mw->mr);
atomic_dec(&mw->mr->num_mw);
mw->mr = NULL;
}

mw->qp = NULL;
mw->access = 0;
mw->addr = 0;
mw->length = 0;
mw->state = RXE_MEM_STATE_INVALID;
}

int rxe_dealloc_mw(struct ib_mw *ibmw)
{
struct rxe_mw *mw = to_rmw(ibmw);
unsigned long flags;

spin_lock_irqsave(&mw->lock, flags);
mw->state = RXE_MEM_STATE_INVALID;

do_dealloc_mw(mw);

spin_unlock_irqrestore(&mw->lock, flags);

rxe_drop_ref(mw);

return 0;
}

/* stub for bind mw */
/* Check the rules for bind MW oepration. */
static int check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
struct rxe_mw *mw, struct rxe_mr *mr)
{
/* check to see if bind operation came through
* ibv_bind_mw verbs API.
*/
switch (mw->ibmw.type) {
case IB_MW_TYPE_1:
/* o10-37.2.34 */
if (unlikely(!(wqe->wr.wr.umw.flags & RXE_BIND_MW))) {
pr_err_once("attempt to bind type 1 MW with send WR\n");
return -EINVAL;
}
break;
case IB_MW_TYPE_2:
/* o10-37.2.35 */
if (unlikely(wqe->wr.wr.umw.flags & RXE_BIND_MW)) {
pr_err_once("attempt to bind type 2 MW with verbs API\n");
return -EINVAL;
}

/* C10-72 */
if (unlikely(qp->pd != to_rpd(mw->ibmw.pd))) {
pr_err_once("attempt to bind type 2 MW with qp with different PD\n");
return -EINVAL;
}

/* o10-37.2.40 */
if (unlikely(wqe->wr.wr.umw.length == 0)) {
pr_err_once("attempt to invalidate type 2 MW by binding with zero length\n");
return -EINVAL;
}

if (unlikely(!mr)) {
pr_err_once("attempt to bind MW to a NULL mr\n");
return -EINVAL;
}
break;
default:
return -EINVAL;
}

if (unlikely((mw->ibmw.type == IB_MW_TYPE_1) &&
(mw->state != RXE_MEM_STATE_VALID))) {
pr_err_once("attempt to bind a type 1 MW not in the valid state\n");
return -EINVAL;
}

/* o10-36.2.2 */
if (unlikely((mw->access & IB_ZERO_BASED) &&
(mw->ibmw.type == IB_MW_TYPE_1))) {
pr_err_once("attempt to bind a zero based type 1 MW\n");
return -EINVAL;
}

if (unlikely((wqe->wr.wr.umw.rkey & 0xff) == (mw->ibmw.rkey & 0xff))) {
pr_err_once("attempt to bind MW with same key\n");
return -EINVAL;
}

/* remaining checks only apply to a nonzero MR */
if (!mr)
return 0;

if (unlikely(mr->access & IB_ZERO_BASED)) {
pr_err_once("attempt to bind MW to zero based MR\n");
return -EINVAL;
}

/* o10-37.2.30 */
if (unlikely((mw->ibmw.type == IB_MW_TYPE_2) &&
(mw->state != RXE_MEM_STATE_FREE))) {
pr_err_once("attempt to bind a type 2 MW not in the free state\n");
return -EINVAL;
}

/* C10-73 */
if (unlikely(!(mr->access & IB_ACCESS_MW_BIND))) {
pr_err_once("attempt to bind an MW to an MR without bind access\n");
return -EINVAL;
}

/* C10-74 */
if (unlikely((mw->access & (IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_ATOMIC)) &&
!(mr->access & IB_ACCESS_LOCAL_WRITE))) {
pr_err_once("attempt to bind an writeable MW to an MR without local write access\n");
return -EINVAL;
}

/* C10-75 */
if (mw->access & IB_ZERO_BASED) {
if (unlikely(wqe->wr.wr.umw.length > mr->length)) {
pr_err_once("attempt to bind a ZB MW outside of the MR\n");
return -EINVAL;
}
} else {
if (unlikely((wqe->wr.wr.umw.addr < mr->iova) ||
((wqe->wr.wr.umw.addr + wqe->wr.wr.umw.length) >
(mr->iova + mr->length)))) {
pr_err_once("attempt to bind a VA MW outside of the MR\n");
return -EINVAL;
}
}

return 0;
}

static int do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
struct rxe_mw *mw, struct rxe_mr *mr)
{
int ret;
u32 rkey;
u32 new_rkey;
struct rxe_mw *duplicate_mw;
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);

/* key part of new rkey is provided by user for type 2
* and ibv_bind_mw() for type 1 MWs
* there is a very rare chance that the new rkey will
* collide with an existing MW. Return an error if this
* occurs
*/
rkey = mw->ibmw.rkey;
new_rkey = (rkey & 0xffffff00) | (wqe->wr.wr.umw.rkey & 0x000000ff);
duplicate_mw = rxe_pool_get_key(&rxe->mw_pool, &new_rkey);
if (duplicate_mw) {
pr_err_once("new MW key is a duplicate, try another\n");
rxe_drop_ref(duplicate_mw);
return -EINVAL;
}

rxe_drop_key(mw);
ret = rxe_add_key(mw, &new_rkey);

mw->access = wqe->wr.wr.umw.access;
mw->state = RXE_MEM_STATE_VALID;
mw->addr = wqe->wr.wr.umw.addr;
mw->length = wqe->wr.wr.umw.length;

if (mw->mr) {
rxe_drop_ref(mw->mr);
atomic_dec(&mw->mr->num_mw);
mw->mr = NULL;
}

if (mw->length) {
mw->mr = mr;
atomic_inc(&mr->num_mw);
rxe_add_ref(mr);
}

if (mw->ibmw.type == IB_MW_TYPE_2)
mw->qp = qp;

return 0;
}

int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
{
pr_err_once("%s: not implemented\n", __func__);
return -EINVAL;
int ret;
struct rxe_mw *mw;
struct rxe_mr *mr;
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
unsigned long flags;

if (qp->is_user) {
mw = rxe_pool_get_index(&rxe->mw_pool,
wqe->wr.wr.umw.mw_index);
if (!mw) {
pr_err_once("mw with index = %d not found\n",
wqe->wr.wr.umw.mw_index);
ret = -EINVAL;
goto err1;
}
mr = rxe_pool_get_index(&rxe->mr_pool,
wqe->wr.wr.umw.mr_index);
if (!mr && wqe->wr.wr.umw.length) {
pr_err_once("mr with index = %d not found\n",
wqe->wr.wr.umw.mr_index);
ret = -EINVAL;
goto err2;
}
} else {
mw = to_rmw(wqe->wr.wr.kmw.mw);
rxe_add_ref(mw);
if (wqe->wr.wr.kmw.mr) {
mr = to_rmr(wqe->wr.wr.kmw.mr);
rxe_add_ref(mr);
} else {
mr = NULL;
}
}

spin_lock_irqsave(&mw->lock, flags);

ret = check_bind_mw(qp, wqe, mw, mr);
if (ret)
goto err3;

ret = do_bind_mw(qp, wqe, mw, mr);
err3:
spin_unlock_irqrestore(&mw->lock, flags);

if (mr)
rxe_drop_ref(mr);
err2:
rxe_drop_ref(mw);
err1:
return ret;
}

static int check_invalidate_mw(struct rxe_qp *qp, struct rxe_mw *mw)
{
if (unlikely(mw->state != RXE_MEM_STATE_VALID)) {
pr_err_once("attempt to invalidate a MW that is not valid\n");
return -EINVAL;
}

/* o10-37.2.26 */
if (unlikely(mw->ibmw.type == IB_MW_TYPE_1)) {
pr_err_once("attempt to invalidate a type 1 MW\n");
return -EINVAL;
}

return 0;
}

static void do_invalidate_mw(struct rxe_mw *mw)
{
mw->qp = NULL;

rxe_drop_ref(mw->mr);
atomic_dec(&mw->mr->num_mw);
mw->mr = NULL;

mw->access = 0;
mw->addr = 0;
mw->length = 0;
mw->state = RXE_MEM_STATE_FREE;
}

/* stub for invalidate MW */
int rxe_invalidate_mw(struct rxe_qp *qp, struct rxe_mw *mw)
{
pr_err_once("%s: not implemented\n", __func__);
return -EINVAL;
int ret;
unsigned long flags;

spin_lock_irqsave(&mw->lock, flags);

ret = check_invalidate_mw(qp, mw);
if (ret)
goto err;

do_invalidate_mw(mw);
err:
spin_unlock_irqrestore(&mw->lock, flags);

return ret;
}

void rxe_mw_cleanup(struct rxe_pool_entry *arg)
@@ -329,17 +329,24 @@ struct rxe_mr {
u32 max_buf;
u32 num_map;

atomic_t num_mw;

struct rxe_map **map;
};

enum rxe_send_flags {
/* flag indicaes bind call came through verbs API */
RXE_BIND_MW = (1 << 0),
};

/* use high order bit to separate MW and MR rkeys */
#define IS_MW (1 << 31)

struct rxe_mw {
struct rxe_pool_entry pelem;
struct ib_mw ibmw;
struct rxe_qp *qp; /* type 2B only */
struct rxe_mem *mr;
struct rxe_mr *mr;
spinlock_t lock;
enum rxe_mem_state state;
u32 access;

0 comments on commit 01db430

Please sign in to comment.