Skip to content

Commit

Permalink
drm/i915: Use vma resources for async unbinding
Browse files Browse the repository at this point in the history
Implement async (non-blocking) unbinding by not syncing the vma before
calling unbind on the vma_resource.
Add the resulting unbind fence to the object's dma_resv from where it is
picked up by the ttm migration code.
Ideally these unbind fences should be coalesced with the migration blit
fence to avoid stalling the migration blit waiting for unbind, as they
can certainly go on in parallel, but since we don't yet have a
reasonable data structure to use to coalesce fences and attach the
resulting fence to a timeline, we defer that for now.

Note that with async unbinding, even while the unbind waits for the
preceding bind to complete before unbinding, the vma itself might have been
destroyed in the process, clearing the vma pages. Therefore we can
only allow async unbinding if we have a refcounted sg-list and keep a
refcount on that for the vma resource pages to stay intact until
binding occurs. If this condition is not met, a request for an async
unbind is diverted to a sync unbind.

v2:
- Use a separate kmem_cache for vma resources for now to isolate their
  memory allocation and aid debugging.
- Move the check for vm closed to the actual unbinding thread. Regardless
  of whether the vm is closed, we need the unbind fence to properly wait
  for capture.
- Clear vma_res::vm on unbind and update its documentation.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
  • Loading branch information
Thomas Hellström authored and intel-lab-lkp committed Dec 17, 2021
1 parent c107a1d commit 45f1249
Show file tree
Hide file tree
Showing 11 changed files with 519 additions and 63 deletions.
11 changes: 10 additions & 1 deletion drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
Expand Up @@ -142,7 +142,16 @@ int i915_ttm_move_notify(struct ttm_buffer_object *bo)
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
int ret;

ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
/*
* Note: The async unbinding here will actually transform the
* blocking wait for unbind into a wait before finally submitting
* evict / migration blit and thus stall the migration timeline
* which may not be good for overall throughput. We should make
* sure we await the unbind fences *after* the migration blit
* instead of *before* as we currently do.
*/
ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE |
I915_GEM_OBJECT_UNBIND_ASYNC);
if (ret)
return ret;

Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/i915/gt/intel_ggtt.c
Expand Up @@ -145,7 +145,7 @@ void i915_ggtt_suspend_vm(struct i915_address_space *vm)
continue;

if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
__i915_vma_evict(vma);
__i915_vma_evict(vma, false);
drm_mm_remove_node(&vma->node);
}
}
Expand Down
4 changes: 4 additions & 0 deletions drivers/gpu/drm/i915/gt/intel_gtt.c
Expand Up @@ -161,6 +161,9 @@ static void __i915_vm_release(struct work_struct *work)
struct i915_address_space *vm =
container_of(work, struct i915_address_space, release_work);

/* Synchronize async unbinds. */
i915_vma_resource_bind_dep_sync_all(vm);

vm->cleanup(vm);
i915_address_space_fini(vm);

Expand Down Expand Up @@ -189,6 +192,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
if (!kref_read(&vm->resv_ref))
kref_init(&vm->resv_ref);

vm->pending_unbind = RB_ROOT_CACHED;
INIT_WORK(&vm->release_work, __i915_vm_release);
atomic_set(&vm->open, 1);

Expand Down
3 changes: 3 additions & 0 deletions drivers/gpu/drm/i915/gt/intel_gtt.h
Expand Up @@ -267,6 +267,9 @@ struct i915_address_space {
/* Flags used when creating page-table objects for this vm */
unsigned long lmem_pt_obj_flags;

/* Interval tree for pending unbind vma resources */
struct rb_root_cached pending_unbind;

struct drm_i915_gem_object *
(*alloc_pt_dma)(struct i915_address_space *vm, int sz);
struct drm_i915_gem_object *
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/i915/i915_drv.h
Expand Up @@ -1666,6 +1666,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
#define I915_GEM_OBJECT_UNBIND_BARRIER BIT(1)
#define I915_GEM_OBJECT_UNBIND_TEST BIT(2)
#define I915_GEM_OBJECT_UNBIND_VM_TRYLOCK BIT(3)
#define I915_GEM_OBJECT_UNBIND_ASYNC BIT(4)

void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);

Expand Down
3 changes: 3 additions & 0 deletions drivers/gpu/drm/i915/i915_gem.c
Expand Up @@ -165,6 +165,9 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
} else {
ret = -EBUSY;
}
} else if (flags & I915_GEM_OBJECT_UNBIND_ASYNC) {
assert_object_held(vma->obj);
ret = i915_vma_unbind_async(vma);
} else {
ret = i915_vma_unbind(vma);
}
Expand Down
3 changes: 3 additions & 0 deletions drivers/gpu/drm/i915/i915_module.c
Expand Up @@ -17,6 +17,7 @@
#include "i915_scheduler.h"
#include "i915_selftest.h"
#include "i915_vma.h"
#include "i915_vma_resource.h"

static int i915_check_nomodeset(void)
{
Expand Down Expand Up @@ -64,6 +65,8 @@ static const struct {
.exit = i915_scheduler_module_exit },
{ .init = i915_vma_module_init,
.exit = i915_vma_module_exit },
{ .init = i915_vma_resource_module_init,
.exit = i915_vma_resource_module_exit },
{ .init = i915_mock_selftests },
{ .init = i915_pmu_init,
.exit = i915_pmu_exit },
Expand Down
181 changes: 152 additions & 29 deletions drivers/gpu/drm/i915/i915_vma.c
Expand Up @@ -286,20 +286,22 @@ struct i915_vma_work {
struct dma_fence_work base;
struct i915_address_space *vm;
struct i915_vm_pt_stash stash;
struct i915_vma *vma;
struct i915_vma_resource *vma_res;
struct drm_i915_gem_object *pinned;
struct i915_sw_dma_fence_cb cb;
struct i915_refct_sgt *rsgt;
enum i915_cache_level cache_level;
unsigned int flags;
};

static void __vma_bind(struct dma_fence_work *work)
{
struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
struct i915_vma *vma = vw->vma;
struct i915_vma_resource *vma_res = vw->vma_res;

vma_res->ops->bind_vma(vma_res->vm, &vw->stash,
vma_res, vw->cache_level, vw->flags);

vma->ops->bind_vma(vw->vm, &vw->stash,
vma->resource, vw->cache_level, vw->flags);
}

static void __vma_release(struct dma_fence_work *work)
Expand All @@ -313,6 +315,10 @@ static void __vma_release(struct dma_fence_work *work)

i915_vm_free_pt_stash(vw->vm, &vw->stash);
i915_vm_put(vw->vm);
if (vw->vma_res)
i915_vma_resource_put(vw->vma_res);
if (vw->rsgt)
i915_refct_sgt_put(vw->rsgt);
}

static const struct dma_fence_work_ops bind_ops = {
Expand Down Expand Up @@ -386,13 +392,11 @@ i915_vma_resource_init_from_vma(struct i915_vma_resource *vma_res,
{
struct drm_i915_gem_object *obj = vma->obj;

i915_vma_resource_init(vma_res, vma->pages, &vma->page_sizes,
i915_vma_resource_init(vma_res, vma->vm, vma->pages, &vma->page_sizes,
i915_gem_object_is_readonly(obj),
i915_gem_object_is_lmem(obj),
vma->private,
vma->node.start,
vma->node.size,
vma->size);
vma->ops, vma->private, vma->node.start,
vma->node.size, vma->size);
}

/**
Expand All @@ -416,6 +420,7 @@ int i915_vma_bind(struct i915_vma *vma,
{
u32 bind_flags;
u32 vma_flags;
int ret;

lockdep_assert_held(&vma->vm->mutex);
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
Expand All @@ -424,12 +429,12 @@ int i915_vma_bind(struct i915_vma *vma,
if (GEM_DEBUG_WARN_ON(range_overflows(vma->node.start,
vma->node.size,
vma->vm->total))) {
kfree(vma_res);
i915_vma_resource_free(vma_res);
return -ENODEV;
}

if (GEM_DEBUG_WARN_ON(!flags)) {
kfree(vma_res);
i915_vma_resource_free(vma_res);
return -EINVAL;
}

Expand All @@ -441,12 +446,30 @@ int i915_vma_bind(struct i915_vma *vma,

bind_flags &= ~vma_flags;
if (bind_flags == 0) {
kfree(vma_res);
i915_vma_resource_free(vma_res);
return 0;
}

GEM_BUG_ON(!vma->pages);

/* Wait for or await async unbinds touching our range */
if (work && bind_flags & vma->vm->bind_async_flags)
ret = i915_vma_resource_bind_dep_await(vma->vm,
&work->base.chain,
vma->node.start,
vma->node.size,
true,
GFP_NOWAIT |
__GFP_RETRY_MAYFAIL |
__GFP_NOWARN);
else
ret = i915_vma_resource_bind_dep_sync(vma->vm, vma->node.start,
vma->node.size, true);
if (ret) {
i915_vma_resource_free(vma_res);
return ret;
}

if (vma->resource || !vma_res) {
/* Rebinding with an additional I915_VMA_*_BIND */
GEM_WARN_ON(!vma_flags);
Expand All @@ -459,9 +482,11 @@ int i915_vma_bind(struct i915_vma *vma,
if (work && bind_flags & vma->vm->bind_async_flags) {
struct dma_fence *prev;

work->vma = vma;
work->vma_res = i915_vma_resource_get(vma->resource);
work->cache_level = cache_level;
work->flags = bind_flags;
if (vma->obj->mm.rsgt)
work->rsgt = i915_refct_sgt_get(vma->obj->mm.rsgt);

/*
* Note we only want to chain up to the migration fence on
Expand Down Expand Up @@ -489,8 +514,12 @@ int i915_vma_bind(struct i915_vma *vma,
int ret;

ret = i915_gem_object_wait_moving_fence(vma->obj, true);
if (ret)
if (ret) {
i915_vma_resource_free(vma->resource);
vma->resource = NULL;

return ret;
}
}
vma->ops->bind_vma(vma->vm, NULL, vma->resource, cache_level,
bind_flags);
Expand Down Expand Up @@ -1361,7 +1390,7 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
return 0;
}

void __i915_vma_evict(struct i915_vma *vma)
struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async)
{
struct dma_fence *unbind_fence;

Expand Down Expand Up @@ -1395,27 +1424,35 @@ void __i915_vma_evict(struct i915_vma *vma)
GEM_BUG_ON(vma->fence);
GEM_BUG_ON(i915_vma_has_userfault(vma));

if (likely(atomic_read(&vma->vm->open))) {
trace_i915_vma_unbind(vma);
vma->ops->unbind_vma(vma->vm, vma->resource);
}
atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE),
&vma->flags);
/* Object backend must be async capable. */
GEM_WARN_ON(async && !vma->obj->mm.rsgt);

trace_i915_vma_unbind(vma);
unbind_fence = i915_vma_resource_unbind(vma->resource);
i915_vma_resource_put(vma->resource);
vma->resource = NULL;

atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE),
&vma->flags);

/* Object backend must be async capable. */
GEM_WARN_ON(async && !vma->obj->mm.rsgt);

i915_vma_detach(vma);
vma_unbind_pages(vma);

if (!async && unbind_fence) {
dma_fence_wait(unbind_fence, false);
dma_fence_put(unbind_fence);
unbind_fence = NULL;
}

/*
* This uninterruptible wait under the vm mutex is currently
* only ever blocking while the vma is being captured from.
* With async unbinding, this wait here will be removed.
* Binding itself may not have completed until the unbind fence signals,
* so don't drop the pages until that happens, unless the resource is
* async_capable.
*/
dma_fence_wait(unbind_fence, false);
dma_fence_put(unbind_fence);

vma_unbind_pages(vma);
return unbind_fence;
}

int __i915_vma_unbind(struct i915_vma *vma)
Expand All @@ -1442,12 +1479,51 @@ int __i915_vma_unbind(struct i915_vma *vma)
return ret;

GEM_BUG_ON(i915_vma_is_active(vma));
__i915_vma_evict(vma);
__i915_vma_evict(vma, false);

drm_mm_remove_node(&vma->node); /* pairs with i915_vma_release() */
return 0;
}

static struct dma_fence *__i915_vma_unbind_async(struct i915_vma *vma)
{
struct dma_fence *fence;

lockdep_assert_held(&vma->vm->mutex);

if (!drm_mm_node_allocated(&vma->node))
return NULL;

if (i915_vma_is_pinned(vma)) {
vma_print_allocator(vma, "is pinned");
return ERR_PTR(-EAGAIN);
}

/*
* We probably need to replace this with awaiting the fences of the
* object's dma_resv when the vma active goes away. When doing that
* we need to be careful to not add the vma_resource unbind fence
* immediately to the object's dma_resv, because then unbinding
* the next vma from the object, in case there are many, will
* actually await the unbinding of the previous vmas, which is
* undesirable.
*/
if (i915_sw_fence_await_active(&vma->resource->chain, &vma->active,
I915_ACTIVE_AWAIT_EXCL |
I915_ACTIVE_AWAIT_ACTIVE) < 0) {
int ret = i915_vma_sync(vma);

if (ret)
return ERR_PTR(ret);
}

fence = __i915_vma_evict(vma, true);

drm_mm_remove_node(&vma->node); /* pairs with i915_vma_release() */

return fence;
}

int i915_vma_unbind(struct i915_vma *vma)
{
struct i915_address_space *vm = vma->vm;
Expand Down Expand Up @@ -1484,6 +1560,53 @@ int i915_vma_unbind(struct i915_vma *vma)
return err;
}

int i915_vma_unbind_async(struct i915_vma *vma)
{
struct drm_i915_gem_object *obj = vma->obj;
struct i915_address_space *vm = vma->vm;
struct dma_fence *fence;
int err;

/*
* We need the dma-resv lock since we add the
* unbind fence to the dma-resv object.
*/
assert_object_held(obj);

if (!drm_mm_node_allocated(&vma->node))
return 0;

if (i915_vma_is_pinned(vma)) {
vma_print_allocator(vma, "is pinned");
return -EAGAIN;
}

if (!obj->mm.rsgt)
return i915_vma_unbind(vma);

if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
/* Might require pm. Not handled yet.*/
return i915_vma_unbind(vma);

err = dma_resv_reserve_shared(obj->base.resv, 1);
if (err)
return i915_vma_unbind(vma);

err = mutex_lock_interruptible(&vma->vm->mutex);
if (err)
return err;

fence = __i915_vma_unbind_async(vma);
mutex_unlock(&vm->mutex);
if (IS_ERR_OR_NULL(fence))
return PTR_ERR_OR_ZERO(fence);

dma_resv_add_shared_fence(obj->base.resv, fence);
dma_fence_put(fence);

return 0;
}

struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma)
{
i915_gem_object_make_unshrinkable(vma->obj);
Expand Down

0 comments on commit 45f1249

Please sign in to comment.