Skip to content

Commit a63d915

Browse files
isilencegregkh
authored andcommitted
io_uring: stop SQPOLL submit on creator's death
[ Upstream commit d9d0521 ] When the creator of SQPOLL io_uring dies (i.e. sqo_task), we don't want its internals like ->files and ->mm to be poked by the SQPOLL task, it have never been nice and recently got racy. That can happen when the owner undergoes destruction and SQPOLL tasks tries to submit new requests in parallel, and so calls io_sq_thread_acquire*(). That patch halts SQPOLL submissions when sqo_task dies by introducing sqo_dead flag. Once set, the SQPOLL task must not do any submission, which is synchronised by uring_lock as well as the new flag. The tricky part is to make sure that disabling always happens, that means either the ring is discovered by creator's do_exit() -> cancel, or if the final close() happens before it's done by the creator. The last is guaranteed by the fact that for SQPOLL the creator task and only it holds exactly one file note, so either it pins up to do_exit() or removed by the creator on the final put in flush. (see comments in uring_flush() around file->f_count == 2). One more place that can trigger io_sq_thread_acquire_*() is __io_req_task_submit(). Shoot off requests on sqo_dead there, even though actually we don't need to. That's because cancellation of sqo_task should wait for the request before going any further. note 1: io_disable_sqo_submit() does io_ring_set_wakeup_flag() so the caller would enter the ring to get an error, but it still doesn't guarantee that the flag won't be cleared. note 2: if final __userspace__ close happens not from the creator task, the file note will pin the ring until the task dies. Cc: stable@vger.kernel.org # 5.5+ Fixed: b1b6b5a ("kernel/io_uring: cancel io_uring before task works") Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent da67631 commit a63d915

File tree

1 file changed

+50
-8
lines changed

1 file changed

+50
-8
lines changed

fs/io_uring.c

Lines changed: 50 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,7 @@ struct io_ring_ctx {
260260
unsigned int drain_next: 1;
261261
unsigned int eventfd_async: 1;
262262
unsigned int restricted: 1;
263+
unsigned int sqo_dead: 1;
263264

264265
/*
265266
* Ring buffer of indices into array of io_uring_sqe, which is
@@ -2083,11 +2084,9 @@ static void io_req_task_cancel(struct callback_head *cb)
20832084
static void __io_req_task_submit(struct io_kiocb *req)
20842085
{
20852086
struct io_ring_ctx *ctx = req->ctx;
2086-
bool fail;
20872087

2088-
fail = __io_sq_thread_acquire_mm(ctx);
20892088
mutex_lock(&ctx->uring_lock);
2090-
if (!fail)
2089+
if (!ctx->sqo_dead && !__io_sq_thread_acquire_mm(ctx))
20912090
__io_queue_sqe(req, NULL);
20922091
else
20932092
__io_req_task_cancel(req, -EFAULT);
@@ -6796,7 +6795,7 @@ static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx,
67966795
to_submit = 8;
67976796

67986797
mutex_lock(&ctx->uring_lock);
6799-
if (likely(!percpu_ref_is_dying(&ctx->refs)))
6798+
if (likely(!percpu_ref_is_dying(&ctx->refs) && !ctx->sqo_dead))
68006799
ret = io_submit_sqes(ctx, to_submit);
68016800
mutex_unlock(&ctx->uring_lock);
68026801

@@ -8487,6 +8486,10 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
84878486
mutex_lock(&ctx->uring_lock);
84888487
percpu_ref_kill(&ctx->refs);
84898488
/* if force is set, the ring is going away. always drop after that */
8489+
8490+
if (WARN_ON_ONCE((ctx->flags & IORING_SETUP_SQPOLL) && !ctx->sqo_dead))
8491+
ctx->sqo_dead = 1;
8492+
84908493
ctx->cq_overflow_flushed = 1;
84918494
if (ctx->rings)
84928495
__io_cqring_overflow_flush(ctx, true, NULL, NULL);
@@ -8745,6 +8748,18 @@ static bool __io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
87458748
return ret;
87468749
}
87478750

8751+
static void io_disable_sqo_submit(struct io_ring_ctx *ctx)
8752+
{
8753+
WARN_ON_ONCE(ctx->sqo_task != current);
8754+
8755+
mutex_lock(&ctx->uring_lock);
8756+
ctx->sqo_dead = 1;
8757+
mutex_unlock(&ctx->uring_lock);
8758+
8759+
/* make sure callers enter the ring to get error */
8760+
io_ring_set_wakeup_flag(ctx);
8761+
}
8762+
87488763
/*
87498764
* We need to iteratively cancel requests, in case a request has dependent
87508765
* hard links. These persist even for failure of cancelations, hence keep
@@ -8756,6 +8771,8 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
87568771
struct task_struct *task = current;
87578772

87588773
if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
8774+
/* for SQPOLL only sqo_task has task notes */
8775+
io_disable_sqo_submit(ctx);
87598776
task = ctx->sq_data->thread;
87608777
atomic_inc(&task->io_uring->in_idle);
87618778
io_sq_thread_park(ctx->sq_data);
@@ -8927,6 +8944,7 @@ void __io_uring_task_cancel(void)
89278944
static int io_uring_flush(struct file *file, void *data)
89288945
{
89298946
struct io_uring_task *tctx = current->io_uring;
8947+
struct io_ring_ctx *ctx = file->private_data;
89308948

89318949
if (!tctx)
89328950
return 0;
@@ -8942,7 +8960,16 @@ static int io_uring_flush(struct file *file, void *data)
89428960
if (atomic_long_read(&file->f_count) != 2)
89438961
return 0;
89448962

8945-
io_uring_del_task_file(file);
8963+
if (ctx->flags & IORING_SETUP_SQPOLL) {
8964+
/* there is only one file note, which is owned by sqo_task */
8965+
WARN_ON_ONCE((ctx->sqo_task == current) ==
8966+
!xa_load(&tctx->xa, (unsigned long)file));
8967+
8968+
io_disable_sqo_submit(ctx);
8969+
}
8970+
8971+
if (!(ctx->flags & IORING_SETUP_SQPOLL) || ctx->sqo_task == current)
8972+
io_uring_del_task_file(file);
89468973
return 0;
89478974
}
89488975

@@ -9016,8 +9043,9 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
90169043

90179044
#endif /* !CONFIG_MMU */
90189045

9019-
static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
9046+
static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
90209047
{
9048+
int ret = 0;
90219049
DEFINE_WAIT(wait);
90229050

90239051
do {
@@ -9026,13 +9054,20 @@ static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
90269054

90279055
prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE);
90289056

9057+
if (unlikely(ctx->sqo_dead)) {
9058+
ret = -EOWNERDEAD;
9059+
goto out;
9060+
}
9061+
90299062
if (!io_sqring_full(ctx))
90309063
break;
90319064

90329065
schedule();
90339066
} while (!signal_pending(current));
90349067

90359068
finish_wait(&ctx->sqo_sq_wait, &wait);
9069+
out:
9070+
return ret;
90369071
}
90379072

90389073
SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
@@ -9076,10 +9111,16 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
90769111
if (ctx->flags & IORING_SETUP_SQPOLL) {
90779112
io_cqring_overflow_flush(ctx, false, NULL, NULL);
90789113

9114+
ret = -EOWNERDEAD;
9115+
if (unlikely(ctx->sqo_dead))
9116+
goto out;
90799117
if (flags & IORING_ENTER_SQ_WAKEUP)
90809118
wake_up(&ctx->sq_data->wait);
9081-
if (flags & IORING_ENTER_SQ_WAIT)
9082-
io_sqpoll_wait_sq(ctx);
9119+
if (flags & IORING_ENTER_SQ_WAIT) {
9120+
ret = io_sqpoll_wait_sq(ctx);
9121+
if (ret)
9122+
goto out;
9123+
}
90839124
submitted = to_submit;
90849125
} else if (to_submit) {
90859126
ret = io_uring_add_task_file(ctx, f.file);
@@ -9498,6 +9539,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
94989539
trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
94999540
return ret;
95009541
err:
9542+
io_disable_sqo_submit(ctx);
95019543
io_ring_ctx_wait_and_kill(ctx);
95029544
return ret;
95039545
}

0 commit comments

Comments
 (0)