Skip to content
Permalink
Browse files
drm/i915: Use vma resources for async unbinding
Implement async (non-blocking) unbinding by not syncing the vma before
calling unbind on the vma_resource.
Add the resulting unbind fence to the object's dma_resv from where it is
picked up by the ttm migration code.
Ideally these unbind fences should be coalesced with the migration blit
fence to avoid stalling the migration blit waiting for unbind, as they
can certainly go on in parallel, but since we don't yet have a
reasonable data structure to use to coalesce fences and attach the
resulting fence to a timeline, we defer that for now.

Note that with async unbinding, even while the unbind waits for the
preceding bind to complete before unbinding, the vma itself might have been
destroyed in the process, clearing the vma pages. Therefore we can
only allow async unbinding if we have a refcounted sg-list and keep a
refcount on that for the vma resource pages to stay intact until
binding occurs. If this condition is not met, a request for an async
unbind is diverted to a sync unbind.

v2:
- Use a separate kmem_cache for vma resources for now to isolate their
  memory allocation and aid debugging.
- Move the check for vm closed to the actual unbinding thread. Regardless
  of whether the vm is closed, we need the unbind fence to properly wait
  for capture.
- Clear vma_res::vm on unbind and update its documentation.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
  • Loading branch information
Thomas Hellström authored and intel-lab-lkp committed Dec 17, 2021
1 parent c107a1d commit 45f1249183a30dea38defee195b33c7f6753d9f9
Show file tree
Hide file tree
Showing 11 changed files with 519 additions and 63 deletions.
@@ -142,7 +142,16 @@ int i915_ttm_move_notify(struct ttm_buffer_object *bo)
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
int ret;

ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
/*
* Note: The async unbinding here will actually transform the
* blocking wait for unbind into a wait before finally submitting
* evict / migration blit and thus stall the migration timeline
* which may not be good for overall throughput. We should make
* sure we await the unbind fences *after* the migration blit
* instead of *before* as we currently do.
*/
ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE |
I915_GEM_OBJECT_UNBIND_ASYNC);
if (ret)
return ret;

@@ -145,7 +145,7 @@ void i915_ggtt_suspend_vm(struct i915_address_space *vm)
continue;

if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
__i915_vma_evict(vma);
__i915_vma_evict(vma, false);
drm_mm_remove_node(&vma->node);
}
}
@@ -161,6 +161,9 @@ static void __i915_vm_release(struct work_struct *work)
struct i915_address_space *vm =
container_of(work, struct i915_address_space, release_work);

/* Synchronize async unbinds. */
i915_vma_resource_bind_dep_sync_all(vm);

vm->cleanup(vm);
i915_address_space_fini(vm);

@@ -189,6 +192,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
if (!kref_read(&vm->resv_ref))
kref_init(&vm->resv_ref);

vm->pending_unbind = RB_ROOT_CACHED;
INIT_WORK(&vm->release_work, __i915_vm_release);
atomic_set(&vm->open, 1);

@@ -267,6 +267,9 @@ struct i915_address_space {
/* Flags used when creating page-table objects for this vm */
unsigned long lmem_pt_obj_flags;

/* Interval tree for pending unbind vma resources */
struct rb_root_cached pending_unbind;

struct drm_i915_gem_object *
(*alloc_pt_dma)(struct i915_address_space *vm, int sz);
struct drm_i915_gem_object *
@@ -1666,6 +1666,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
#define I915_GEM_OBJECT_UNBIND_BARRIER BIT(1)
#define I915_GEM_OBJECT_UNBIND_TEST BIT(2)
#define I915_GEM_OBJECT_UNBIND_VM_TRYLOCK BIT(3)
#define I915_GEM_OBJECT_UNBIND_ASYNC BIT(4)

void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);

@@ -165,6 +165,9 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
} else {
ret = -EBUSY;
}
} else if (flags & I915_GEM_OBJECT_UNBIND_ASYNC) {
assert_object_held(vma->obj);
ret = i915_vma_unbind_async(vma);
} else {
ret = i915_vma_unbind(vma);
}
@@ -17,6 +17,7 @@
#include "i915_scheduler.h"
#include "i915_selftest.h"
#include "i915_vma.h"
#include "i915_vma_resource.h"

static int i915_check_nomodeset(void)
{
@@ -64,6 +65,8 @@ static const struct {
.exit = i915_scheduler_module_exit },
{ .init = i915_vma_module_init,
.exit = i915_vma_module_exit },
{ .init = i915_vma_resource_module_init,
.exit = i915_vma_resource_module_exit },
{ .init = i915_mock_selftests },
{ .init = i915_pmu_init,
.exit = i915_pmu_exit },
@@ -286,20 +286,22 @@ struct i915_vma_work {
struct dma_fence_work base;
struct i915_address_space *vm;
struct i915_vm_pt_stash stash;
struct i915_vma *vma;
struct i915_vma_resource *vma_res;
struct drm_i915_gem_object *pinned;
struct i915_sw_dma_fence_cb cb;
struct i915_refct_sgt *rsgt;
enum i915_cache_level cache_level;
unsigned int flags;
};

static void __vma_bind(struct dma_fence_work *work)
{
struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
struct i915_vma *vma = vw->vma;
struct i915_vma_resource *vma_res = vw->vma_res;

vma_res->ops->bind_vma(vma_res->vm, &vw->stash,
vma_res, vw->cache_level, vw->flags);

vma->ops->bind_vma(vw->vm, &vw->stash,
vma->resource, vw->cache_level, vw->flags);
}

static void __vma_release(struct dma_fence_work *work)
@@ -313,6 +315,10 @@ static void __vma_release(struct dma_fence_work *work)

i915_vm_free_pt_stash(vw->vm, &vw->stash);
i915_vm_put(vw->vm);
if (vw->vma_res)
i915_vma_resource_put(vw->vma_res);
if (vw->rsgt)
i915_refct_sgt_put(vw->rsgt);
}

static const struct dma_fence_work_ops bind_ops = {
@@ -386,13 +392,11 @@ i915_vma_resource_init_from_vma(struct i915_vma_resource *vma_res,
{
struct drm_i915_gem_object *obj = vma->obj;

i915_vma_resource_init(vma_res, vma->pages, &vma->page_sizes,
i915_vma_resource_init(vma_res, vma->vm, vma->pages, &vma->page_sizes,
i915_gem_object_is_readonly(obj),
i915_gem_object_is_lmem(obj),
vma->private,
vma->node.start,
vma->node.size,
vma->size);
vma->ops, vma->private, vma->node.start,
vma->node.size, vma->size);
}

/**
@@ -416,6 +420,7 @@ int i915_vma_bind(struct i915_vma *vma,
{
u32 bind_flags;
u32 vma_flags;
int ret;

lockdep_assert_held(&vma->vm->mutex);
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
@@ -424,12 +429,12 @@ int i915_vma_bind(struct i915_vma *vma,
if (GEM_DEBUG_WARN_ON(range_overflows(vma->node.start,
vma->node.size,
vma->vm->total))) {
kfree(vma_res);
i915_vma_resource_free(vma_res);
return -ENODEV;
}

if (GEM_DEBUG_WARN_ON(!flags)) {
kfree(vma_res);
i915_vma_resource_free(vma_res);
return -EINVAL;
}

@@ -441,12 +446,30 @@ int i915_vma_bind(struct i915_vma *vma,

bind_flags &= ~vma_flags;
if (bind_flags == 0) {
kfree(vma_res);
i915_vma_resource_free(vma_res);
return 0;
}

GEM_BUG_ON(!vma->pages);

/* Wait for or await async unbinds touching our range */
if (work && bind_flags & vma->vm->bind_async_flags)
ret = i915_vma_resource_bind_dep_await(vma->vm,
&work->base.chain,
vma->node.start,
vma->node.size,
true,
GFP_NOWAIT |
__GFP_RETRY_MAYFAIL |
__GFP_NOWARN);
else
ret = i915_vma_resource_bind_dep_sync(vma->vm, vma->node.start,
vma->node.size, true);
if (ret) {
i915_vma_resource_free(vma_res);
return ret;
}

if (vma->resource || !vma_res) {
/* Rebinding with an additional I915_VMA_*_BIND */
GEM_WARN_ON(!vma_flags);
@@ -459,9 +482,11 @@ int i915_vma_bind(struct i915_vma *vma,
if (work && bind_flags & vma->vm->bind_async_flags) {
struct dma_fence *prev;

work->vma = vma;
work->vma_res = i915_vma_resource_get(vma->resource);
work->cache_level = cache_level;
work->flags = bind_flags;
if (vma->obj->mm.rsgt)
work->rsgt = i915_refct_sgt_get(vma->obj->mm.rsgt);

/*
* Note we only want to chain up to the migration fence on
@@ -489,8 +514,12 @@ int i915_vma_bind(struct i915_vma *vma,
int ret;

ret = i915_gem_object_wait_moving_fence(vma->obj, true);
if (ret)
if (ret) {
i915_vma_resource_free(vma->resource);
vma->resource = NULL;

return ret;
}
}
vma->ops->bind_vma(vma->vm, NULL, vma->resource, cache_level,
bind_flags);
@@ -1361,7 +1390,7 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
return 0;
}

void __i915_vma_evict(struct i915_vma *vma)
struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async)
{
struct dma_fence *unbind_fence;

@@ -1395,27 +1424,35 @@ void __i915_vma_evict(struct i915_vma *vma)
GEM_BUG_ON(vma->fence);
GEM_BUG_ON(i915_vma_has_userfault(vma));

if (likely(atomic_read(&vma->vm->open))) {
trace_i915_vma_unbind(vma);
vma->ops->unbind_vma(vma->vm, vma->resource);
}
atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE),
&vma->flags);
/* Object backend must be async capable. */
GEM_WARN_ON(async && !vma->obj->mm.rsgt);

trace_i915_vma_unbind(vma);
unbind_fence = i915_vma_resource_unbind(vma->resource);
i915_vma_resource_put(vma->resource);
vma->resource = NULL;

atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE),
&vma->flags);

/* Object backend must be async capable. */
GEM_WARN_ON(async && !vma->obj->mm.rsgt);

i915_vma_detach(vma);
vma_unbind_pages(vma);

if (!async && unbind_fence) {
dma_fence_wait(unbind_fence, false);
dma_fence_put(unbind_fence);
unbind_fence = NULL;
}

/*
* This uninterruptible wait under the vm mutex is currently
* only ever blocking while the vma is being captured from.
* With async unbinding, this wait here will be removed.
* Binding itself may not have completed until the unbind fence signals,
* so don't drop the pages until that happens, unless the resource is
* async_capable.
*/
dma_fence_wait(unbind_fence, false);
dma_fence_put(unbind_fence);

vma_unbind_pages(vma);
return unbind_fence;
}

int __i915_vma_unbind(struct i915_vma *vma)
@@ -1442,12 +1479,51 @@ int __i915_vma_unbind(struct i915_vma *vma)
return ret;

GEM_BUG_ON(i915_vma_is_active(vma));
__i915_vma_evict(vma);
__i915_vma_evict(vma, false);

drm_mm_remove_node(&vma->node); /* pairs with i915_vma_release() */
return 0;
}

static struct dma_fence *__i915_vma_unbind_async(struct i915_vma *vma)
{
struct dma_fence *fence;

lockdep_assert_held(&vma->vm->mutex);

if (!drm_mm_node_allocated(&vma->node))
return NULL;

if (i915_vma_is_pinned(vma)) {
vma_print_allocator(vma, "is pinned");
return ERR_PTR(-EAGAIN);
}

/*
* We probably need to replace this with awaiting the fences of the
* object's dma_resv when the vma active goes away. When doing that
* we need to be careful to not add the vma_resource unbind fence
* immediately to the object's dma_resv, because then unbinding
* the next vma from the object, in case there are many, will
* actually await the unbinding of the previous vmas, which is
* undesirable.
*/
if (i915_sw_fence_await_active(&vma->resource->chain, &vma->active,
I915_ACTIVE_AWAIT_EXCL |
I915_ACTIVE_AWAIT_ACTIVE) < 0) {
int ret = i915_vma_sync(vma);

if (ret)
return ERR_PTR(ret);
}

fence = __i915_vma_evict(vma, true);

drm_mm_remove_node(&vma->node); /* pairs with i915_vma_release() */

return fence;
}

int i915_vma_unbind(struct i915_vma *vma)
{
struct i915_address_space *vm = vma->vm;
@@ -1484,6 +1560,53 @@ int i915_vma_unbind(struct i915_vma *vma)
return err;
}

int i915_vma_unbind_async(struct i915_vma *vma)
{
struct drm_i915_gem_object *obj = vma->obj;
struct i915_address_space *vm = vma->vm;
struct dma_fence *fence;
int err;

/*
* We need the dma-resv lock since we add the
* unbind fence to the dma-resv object.
*/
assert_object_held(obj);

if (!drm_mm_node_allocated(&vma->node))
return 0;

if (i915_vma_is_pinned(vma)) {
vma_print_allocator(vma, "is pinned");
return -EAGAIN;
}

if (!obj->mm.rsgt)
return i915_vma_unbind(vma);

if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
/* Might require pm. Not handled yet.*/
return i915_vma_unbind(vma);

err = dma_resv_reserve_shared(obj->base.resv, 1);
if (err)
return i915_vma_unbind(vma);

err = mutex_lock_interruptible(&vma->vm->mutex);
if (err)
return err;

fence = __i915_vma_unbind_async(vma);
mutex_unlock(&vm->mutex);
if (IS_ERR_OR_NULL(fence))
return PTR_ERR_OR_ZERO(fence);

dma_resv_add_shared_fence(obj->base.resv, fence);
dma_fence_put(fence);

return 0;
}

struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma)
{
i915_gem_object_make_unshrinkable(vma->obj);

0 comments on commit 45f1249

Please sign in to comment.