Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
LU-17062 lnet: Update lnet_peer_*_decref_locked usage
Move decref's to occur after last reference to prevent
use after free.

Cherry-picked-from-change: https://review.whamcloud.com/52184
Cherry-picked-from: 76a9ae640f3ab27590f0bf34b8c422e2c770dd79

HPE-bug-id: LUS-11799
Signed-off-by: Shaun Tancheff <shaun.tancheff@hpe.com>
Change-Id: I2382ece560039383f644b6aee73a9481d6bb5673
Signed-off-by: Gian-Carlo DeFazio <defazio1@llnl.gov>
  • Loading branch information
Shaun Tancheff authored and ofaaland committed Dec 28, 2023
1 parent f115d76 commit 2e27193
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 24 deletions.
33 changes: 22 additions & 11 deletions lnet/lnet/lib-move.c
Expand Up @@ -2152,7 +2152,7 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
struct lnet_peer_ni **gw_lpni,
struct lnet_peer **gw_peer)
{
int rc;
int rc = 0;
struct lnet_peer *gw;
struct lnet_peer *lp;
struct lnet_peer_net *lpn;
Expand All @@ -2163,6 +2163,7 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
struct lnet_peer_ni *lpni = NULL;
struct lnet_peer_ni *gwni = NULL;
bool route_found = false;
bool gwni_decref = false;
struct lnet_nid *src_nid =
!LNET_NID_IS_ANY(&sd->sd_src_nid) || !sd->sd_best_ni
? &sd->sd_src_nid
Expand All @@ -2180,8 +2181,8 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
if (!LNET_NID_IS_ANY(&sd->sd_rtr_nid)) {
gwni = lnet_peer_ni_find_locked(&sd->sd_rtr_nid);
if (gwni) {
gwni_decref = true;
gw = gwni->lpni_peer_net->lpn_peer;
lnet_peer_ni_decref_locked(gwni);
if (gw->lp_rtr_refcount)
route_found = true;
} else {
Expand All @@ -2205,7 +2206,8 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
"any local NI" :
libcfs_nidstr(src_nid),
libcfs_nidstr(&sd->sd_dst_nid));
return -EHOSTUNREACH;
rc = -EHOSTUNREACH;
goto out;
}
} else {
/* we've already looked up the initial lpni using
Expand Down Expand Up @@ -2251,7 +2253,8 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
if (!best_lpn) {
CERROR("peer %s has no available nets\n",
libcfs_nidstr(&sd->sd_dst_nid));
return -EHOSTUNREACH;
rc = -EHOSTUNREACH;
goto out;
}

sd->sd_best_lpni = lnet_find_best_lpni(sd->sd_best_ni,
Expand All @@ -2261,7 +2264,8 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
if (!sd->sd_best_lpni) {
CERROR("peer %s is unreachable\n",
libcfs_nidstr(&sd->sd_dst_nid));
return -EHOSTUNREACH;
rc = -EHOSTUNREACH;
goto out;
}

/* We're attempting to round robin over the remote peer
Expand Down Expand Up @@ -2293,14 +2297,16 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
CERROR("no route to %s from %s\n",
libcfs_nidstr(dst_nid),
libcfs_nidstr(src_nid));
return -EHOSTUNREACH;
rc = -EHOSTUNREACH;
goto out;
}

if (!gwni) {
CERROR("Internal Error. Route expected to %s from %s\n",
libcfs_nidstr(dst_nid),
libcfs_nidstr(src_nid));
return -EFAULT;
rc = -EFAULT;
goto out;
}

gw = best_route->lr_gateway;
Expand All @@ -2323,7 +2329,7 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
if (alive_router_check_interval <= 0) {
rc = lnet_initiate_peer_discovery(gwni, sd->sd_msg, sd->sd_cpt);
if (rc)
return rc;
goto out;
}

if (!sd->sd_best_ni) {
Expand All @@ -2335,7 +2341,8 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
CERROR("Internal Error. Expected local ni on %s but non found: %s\n",
libcfs_net2str(lpn->lpn_net_id),
libcfs_nidstr(&sd->sd_src_nid));
return -EFAULT;
rc = -EFAULT;
goto out;
}
}

Expand All @@ -2353,7 +2360,11 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
best_lpn->lpn_seq++;
}

return 0;
out:
if (gwni_decref && gwni)
lnet_peer_ni_decref_locked(gwni);

return rc;
}

/*
Expand Down Expand Up @@ -2995,7 +3006,6 @@ lnet_select_pathway(struct lnet_nid *src_nid,
lnet_net_unlock(cpt);
return rc;
}
lnet_peer_ni_decref_locked(lpni);

peer = lpni->lpni_peer_net->lpn_peer;

Expand Down Expand Up @@ -3095,6 +3105,7 @@ lnet_select_pathway(struct lnet_nid *src_nid,
* updated as a result of calling lnet_handle_send_case_locked().
*/
cpt = send_data.sd_cpt;
lnet_peer_ni_decref_locked(lpni);

if (rc == REPEAT_SEND)
goto again;
Expand Down
27 changes: 15 additions & 12 deletions lnet/lnet/peer.c
Expand Up @@ -561,9 +561,9 @@ lnet_peer_del_nid(struct lnet_peer *lp, lnet_nid_t nid4, unsigned int flags)
rc = -ENOENT;
goto out;
}
lnet_peer_ni_decref_locked(lpni);
if (lp != lpni->lpni_peer_net->lpn_peer) {
rc = -ECHILD;
lnet_peer_ni_decref_locked(lpni);
goto out;
}

Expand All @@ -573,6 +573,7 @@ lnet_peer_del_nid(struct lnet_peer *lp, lnet_nid_t nid4, unsigned int flags)
*/
if (nid_same(&nid, &lp->lp_primary_nid) && lp->lp_nnis != 1 && !force) {
rc = -EBUSY;
lnet_peer_ni_decref_locked(lpni);
goto out;
}

Expand All @@ -586,6 +587,7 @@ lnet_peer_del_nid(struct lnet_peer *lp, lnet_nid_t nid4, unsigned int flags)
lp->lp_primary_nid = lpni2->lpni_nid;
}
rc = lnet_peer_ni_del_locked(lpni, force);
lnet_peer_ni_decref_locked(lpni);

lnet_net_unlock(LNET_LOCK_EX);

Expand Down Expand Up @@ -2034,8 +2036,8 @@ __must_hold(&the_lnet.ln_api_mutex)
lpni = lnet_find_peer_ni_locked(prim_nid);
if (!lpni)
return -ENOENT;
lnet_peer_ni_decref_locked(lpni);
lp = lpni->lpni_peer_net->lpn_peer;
lnet_peer_ni_decref_locked(lpni);

/* Peer must have been configured. */
if ((flags & LNET_PEER_CONFIGURED) &&
Expand Down Expand Up @@ -2139,8 +2141,8 @@ lnet_del_peer_ni(lnet_nid_t prim_nid, lnet_nid_t nid, int force)
lpni = lnet_find_peer_ni_locked(prim_nid);
if (!lpni)
return -ENOENT;
lnet_peer_ni_decref_locked(lpni);
lp = lpni->lpni_peer_net->lpn_peer;
lnet_peer_ni_decref_locked(lpni);

if (prim_nid != lnet_nid_to_nid4(&lp->lp_primary_nid)) {
CDEBUG(D_NET, "prim_nid %s is not primary for peer %s\n",
Expand Down Expand Up @@ -2720,11 +2722,13 @@ int
lnet_discover_peer_locked(struct lnet_peer_ni *lpni, int cpt, bool block)
{
DEFINE_WAIT(wait);
struct lnet_peer *lp;
struct lnet_peer *lp = NULL;
int rc = 0;
int count = 0;

again:
if (lp)
lnet_peer_decref_locked(lp);
lnet_net_unlock(cpt);
lnet_net_lock(LNET_LOCK_EX);
lp = lpni->lpni_peer_net->lpn_peer;
Expand Down Expand Up @@ -2781,7 +2785,6 @@ lnet_discover_peer_locked(struct lnet_peer_ni *lpni, int cpt, bool block)

lnet_net_unlock(LNET_LOCK_EX);
lnet_net_lock(cpt);
lnet_peer_decref_locked(lp);
/*
* The peer may have changed, so re-check and rediscover if that turns
* out to have been the case. The reference count on lp ensured that
Expand Down Expand Up @@ -2809,6 +2812,7 @@ lnet_discover_peer_locked(struct lnet_peer_ni *lpni, int cpt, bool block)
(lp ? libcfs_nidstr(&lp->lp_primary_nid) : "(none)"),
libcfs_nidstr(&lpni->lpni_nid), rc,
(!block) ? "pending discovery" : "discovery complete");
lnet_peer_decref_locked(lp);

return rc;
}
Expand Down Expand Up @@ -3112,11 +3116,6 @@ static void lnet_discovery_event_handler(struct lnet_event *event)
LBUG();
}
lnet_net_lock(LNET_LOCK_EX);
if (event->unlinked) {
pbuf = LNET_PING_INFO_TO_BUFFER(event->md_start);
lnet_ping_buffer_decref(pbuf);
lnet_peer_decref_locked(lp);
}

/* put peer back at end of request queue, if discovery not already
* done */
Expand All @@ -3125,6 +3124,11 @@ static void lnet_discovery_event_handler(struct lnet_event *event)
list_move_tail(&lp->lp_dc_list, &the_lnet.ln_dc_request);
wake_up(&the_lnet.ln_dc_waitq);
}
if (event->unlinked) {
pbuf = LNET_PING_INFO_TO_BUFFER(event->md_start);
lnet_ping_buffer_decref(pbuf);
lnet_peer_decref_locked(lp);
}
lnet_net_unlock(LNET_LOCK_EX);
}

Expand Down Expand Up @@ -3290,8 +3294,6 @@ static int lnet_peer_merge_data(struct lnet_peer *lp,
goto out;
}

lnet_peer_ni_decref_locked(lpni);

lpn = lpni->lpni_peer_net;
if (lpn->lpn_peer_nets.prev != &lp->lp_peer_nets)
list_move(&lpn->lpn_peer_nets, &lp->lp_peer_nets);
Expand All @@ -3300,6 +3302,7 @@ static int lnet_peer_merge_data(struct lnet_peer *lp,
list_move(&lpni->lpni_peer_nis,
&lpni->lpni_peer_net->lpn_peer_nis);

lnet_peer_ni_decref_locked(lpni);
/*
* Errors other than -ENOMEM are due to peers having been
* configured with DLC. Ignore these because DLC overrides
Expand Down
4 changes: 3 additions & 1 deletion lnet/lnet/router.c
Expand Up @@ -874,12 +874,13 @@ lnet_del_route(__u32 net, struct lnet_nid *gw)
LASSERT(lp);
gw_nid = lp->lp_primary_nid;
gw = &gw_nid;
lnet_peer_ni_decref_locked(lpni);
}

if (net != LNET_NET_ANY) {
rnet = lnet_find_rnet_locked(net);
if (!rnet) {
if (lpni)
lnet_peer_ni_decref_locked(lpni);
lnet_net_unlock(LNET_LOCK_EX);
return -ENOENT;
}
Expand Down Expand Up @@ -912,6 +913,7 @@ lnet_del_route(__u32 net, struct lnet_nid *gw)
if (lpni) {
if (list_empty(&lp->lp_routes))
lp->lp_disc_net_id = 0;
lnet_peer_ni_decref_locked(lpni);
}

lnet_net_unlock(LNET_LOCK_EX);
Expand Down

0 comments on commit 2e27193

Please sign in to comment.