Skip to content

Commit

Permalink
drm/i915: Avoid using the i915_fence_array when collecting dependencies
Browse files Browse the repository at this point in the history
Since the gt migration code was using only a single fence for
dependencies, these were collected in a dma_fence_array. However, it
turns out that it's illegal to use some dma_fences in a dma_fence_array,
in particular other dma_fence_arrays and dma_fence_chains, and this
causes trouble for us moving forward.

Have the gt migration code instead take a const struct i915_deps for
dependencies. This means we can skip the dma_fence_array creation
and instead pass the struct i915_deps instead to circumvent the
problem.

Fixes: 5652df8 ("drm/i915/ttm: Update i915_gem_obj_copy_ttm() to be asynchronous")
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
  • Loading branch information
Thomas Hellström authored and intel-lab-lkp committed Dec 15, 2021
1 parent 636384b commit 0f61eb0
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 103 deletions.
115 changes: 28 additions & 87 deletions drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
Expand Up @@ -3,8 +3,6 @@
* Copyright © 2021 Intel Corporation
*/

#include <linux/dma-fence-array.h>

#include <drm/ttm/ttm_bo_driver.h>

#include "i915_drv.h"
Expand Down Expand Up @@ -65,32 +63,13 @@ void i915_ttm_migrate_set_failure_modes(bool gpu_migration,
* A struct i915_deps need to be initialized using i915_deps_init().
* If i915_deps_add_dependency() or i915_deps_add_resv() return an
* error code they will internally call i915_deps_fini(), which frees
* all internal references and allocations. After a call to
* i915_deps_to_fence(), or i915_deps_sync(), the struct should similarly
* be viewed as uninitialized.
* all internal references and allocations.
*
* We might want to break this out into a separate file as a utility.
*/

#define I915_DEPS_MIN_ALLOC_CHUNK 8U

/**
* struct i915_deps - Collect dependencies into a single dma-fence
* @single: Storage for pointer if the collection is a single fence.
* @fence: Allocated array of fence pointers if more than a single fence;
* otherwise points to the address of @single.
* @num_deps: Current number of dependency fences.
* @fences_size: Size of the @fences array in number of pointers.
* @gfp: Allocation mode.
*/
struct i915_deps {
struct dma_fence *single;
struct dma_fence **fences;
unsigned int num_deps;
unsigned int fences_size;
gfp_t gfp;
};

static void i915_deps_reset_fences(struct i915_deps *deps)
{
if (deps->fences != &deps->single)
Expand Down Expand Up @@ -163,7 +142,7 @@ static int i915_deps_grow(struct i915_deps *deps, struct dma_fence *fence,
return ret;
}

static int i915_deps_sync(struct i915_deps *deps,
static int i915_deps_sync(const struct i915_deps *deps,
const struct ttm_operation_ctx *ctx)
{
struct dma_fence **fences = deps->fences;
Expand All @@ -183,7 +162,6 @@ static int i915_deps_sync(struct i915_deps *deps,
break;
}

i915_deps_fini(deps);
return ret;
}

Expand Down Expand Up @@ -221,34 +199,6 @@ static int i915_deps_add_dependency(struct i915_deps *deps,
return i915_deps_grow(deps, fence, ctx);
}

static struct dma_fence *i915_deps_to_fence(struct i915_deps *deps,
const struct ttm_operation_ctx *ctx)
{
struct dma_fence_array *array;

if (deps->num_deps == 0)
return NULL;

if (deps->num_deps == 1) {
deps->num_deps = 0;
return deps->fences[0];
}

/*
* TODO: Alter the allocation mode here to not try too hard to
* make things async.
*/
array = dma_fence_array_create(deps->num_deps, deps->fences, 0, 0,
false);
if (!array)
return ERR_PTR(i915_deps_sync(deps, ctx));

deps->fences = NULL;
i915_deps_reset_fences(deps);

return &array->base;
}

static int i915_deps_add_resv(struct i915_deps *deps, struct dma_resv *resv,
bool all, const bool no_excl,
const struct ttm_operation_ctx *ctx)
Expand Down Expand Up @@ -387,7 +337,7 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
struct ttm_resource *dst_mem,
struct ttm_tt *dst_ttm,
struct sg_table *dst_st,
struct dma_fence *dep)
const struct i915_deps *deps)
{
struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
bdev);
Expand All @@ -411,7 +361,7 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
return ERR_PTR(-EINVAL);

intel_engine_pm_get(i915->gt.migrate.context->engine);
ret = intel_context_migrate_clear(i915->gt.migrate.context, dep,
ret = intel_context_migrate_clear(i915->gt.migrate.context, deps,
dst_st->sgl, dst_level,
i915_ttm_gtt_binds_lmem(dst_mem),
0, &rq);
Expand All @@ -425,7 +375,7 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm);
intel_engine_pm_get(i915->gt.migrate.context->engine);
ret = intel_context_migrate_copy(i915->gt.migrate.context,
dep, src_rsgt->table.sgl,
deps, src_rsgt->table.sgl,
src_level,
i915_ttm_gtt_binds_lmem(bo->resource),
dst_st->sgl, dst_level,
Expand Down Expand Up @@ -610,18 +560,19 @@ i915_ttm_memcpy_work_arm(struct i915_ttm_memcpy_work *work,
}

static struct dma_fence *
__i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
__i915_ttm_move(struct ttm_buffer_object *bo,
const struct ttm_operation_ctx *ctx, bool clear,
struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm,
struct i915_refct_sgt *dst_rsgt, bool allow_accel,
struct dma_fence *move_dep)
const struct i915_deps *move_deps)
{
struct i915_ttm_memcpy_work *copy_work = NULL;
struct i915_ttm_memcpy_arg _arg, *arg = &_arg;
struct dma_fence *fence = ERR_PTR(-EINVAL);

if (allow_accel) {
fence = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm,
&dst_rsgt->table, move_dep);
&dst_rsgt->table, move_deps);

/*
* We only need to intercept the error when moving to lmem.
Expand Down Expand Up @@ -655,8 +606,8 @@ __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,

if (!IS_ERR(fence))
goto out;
} else if (move_dep) {
int err = dma_fence_wait(move_dep, true);
} else if (move_deps) {
int err = i915_deps_sync(move_deps, ctx);

if (err)
return ERR_PTR(err);
Expand All @@ -680,29 +631,20 @@ __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
return fence;
}

static struct dma_fence *prev_fence(struct ttm_buffer_object *bo,
struct ttm_operation_ctx *ctx)
int prev_deps(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
struct i915_deps *deps)
{
struct i915_deps deps;
int ret;

/*
* Instead of trying hard with GFP_KERNEL to allocate memory,
* the dependency collection will just sync if it doesn't
* succeed.
*/
i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
ret = i915_deps_add_dependency(&deps, bo->moving, ctx);
ret = i915_deps_add_dependency(deps, bo->moving, ctx);
if (!ret)
/*
* TODO: Only await excl fence here, and shared fences before
* signaling the migration fence.
*/
ret = i915_deps_add_resv(&deps, bo->base.resv, true, false, ctx);
if (ret)
return ERR_PTR(ret);
ret = i915_deps_add_resv(deps, bo->base.resv, true, false, ctx);

return i915_deps_to_fence(&deps, ctx);
return ret;
}

/**
Expand Down Expand Up @@ -756,16 +698,18 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,

clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm));
if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) {
struct dma_fence *dep = prev_fence(bo, ctx);
struct i915_deps deps;

if (IS_ERR(dep)) {
i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
ret = prev_deps(bo, ctx, &deps);
if (ret) {
i915_refct_sgt_put(dst_rsgt);
return PTR_ERR(dep);
return ret;
}

migration_fence = __i915_ttm_move(bo, clear, dst_mem, bo->ttm,
dst_rsgt, true, dep);
dma_fence_put(dep);
migration_fence = __i915_ttm_move(bo, ctx, clear, dst_mem, bo->ttm,
dst_rsgt, true, &deps);
i915_deps_fini(&deps);
}

/* We can possibly get an -ERESTARTSYS here */
Expand Down Expand Up @@ -826,7 +770,7 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
.interruptible = intr,
};
struct i915_refct_sgt *dst_rsgt;
struct dma_fence *copy_fence, *dep_fence;
struct dma_fence *copy_fence;
struct i915_deps deps;
int ret, shared_err;

Expand All @@ -847,15 +791,12 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
if (ret)
return ret;

dep_fence = i915_deps_to_fence(&deps, &ctx);
if (IS_ERR(dep_fence))
return PTR_ERR(dep_fence);

dst_rsgt = i915_ttm_resource_get_st(dst, dst_bo->resource);
copy_fence = __i915_ttm_move(src_bo, false, dst_bo->resource,
copy_fence = __i915_ttm_move(src_bo, &ctx, false, dst_bo->resource,
dst_bo->ttm, dst_rsgt, allow_accel,
dep_fence);
&deps);

i915_deps_fini(&deps);
i915_refct_sgt_put(dst_rsgt);
if (IS_ERR_OR_NULL(copy_fence))
return PTR_ERR_OR_ZERO(copy_fence);
Expand Down
17 changes: 17 additions & 0 deletions drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h
Expand Up @@ -18,6 +18,23 @@ struct ttm_tt;
struct drm_i915_gem_object;
struct i915_refct_sgt;

/**
* struct i915_deps - Collect dependencies into a single dma-fence
* @single: Storage for pointer if the collection is a single fence.
* @fences: Allocated array of fence pointers if more than a single fence;
* otherwise points to the address of @single.
* @num_deps: Current number of dependency fences.
* @fences_size: Size of the @fences array in number of pointers.
* @gfp: Allocation mode.
*/
struct i915_deps {
struct dma_fence *single;
struct dma_fence **fences;
unsigned int num_deps;
unsigned int fences_size;
gfp_t gfp;
};

int i915_ttm_move_notify(struct ttm_buffer_object *bo);

I915_SELFTEST_DECLARE(void i915_ttm_migrate_set_failure_modes(bool gpu_migration,
Expand Down
24 changes: 12 additions & 12 deletions drivers/gpu/drm/i915/gt/intel_migrate.c
Expand Up @@ -404,7 +404,7 @@ static int emit_copy(struct i915_request *rq, int size)

int
intel_context_migrate_copy(struct intel_context *ce,
struct dma_fence *await,
const struct i915_deps *deps,
struct scatterlist *src,
enum i915_cache_level src_cache_level,
bool src_is_lmem,
Expand All @@ -431,8 +431,8 @@ intel_context_migrate_copy(struct intel_context *ce,
goto out_ce;
}

if (await) {
err = i915_request_await_dma_fence(rq, await);
if (deps) {
err = i915_request_await_deps(rq, deps);
if (err)
goto out_rq;

Expand All @@ -442,7 +442,7 @@ intel_context_migrate_copy(struct intel_context *ce,
goto out_rq;
}

await = NULL;
deps = NULL;
}

/* The PTE updates + copy must not be interrupted. */
Expand Down Expand Up @@ -525,7 +525,7 @@ static int emit_clear(struct i915_request *rq, int size, u32 value)

int
intel_context_migrate_clear(struct intel_context *ce,
struct dma_fence *await,
const struct i915_deps *deps,
struct scatterlist *sg,
enum i915_cache_level cache_level,
bool is_lmem,
Expand All @@ -550,8 +550,8 @@ intel_context_migrate_clear(struct intel_context *ce,
goto out_ce;
}

if (await) {
err = i915_request_await_dma_fence(rq, await);
if (deps) {
err = i915_request_await_deps(rq, deps);
if (err)
goto out_rq;

Expand All @@ -561,7 +561,7 @@ intel_context_migrate_clear(struct intel_context *ce,
goto out_rq;
}

await = NULL;
deps = NULL;
}

/* The PTE updates + clear must not be interrupted. */
Expand Down Expand Up @@ -599,7 +599,7 @@ intel_context_migrate_clear(struct intel_context *ce,

int intel_migrate_copy(struct intel_migrate *m,
struct i915_gem_ww_ctx *ww,
struct dma_fence *await,
const struct i915_deps *deps,
struct scatterlist *src,
enum i915_cache_level src_cache_level,
bool src_is_lmem,
Expand All @@ -624,7 +624,7 @@ int intel_migrate_copy(struct intel_migrate *m,
if (err)
goto out;

err = intel_context_migrate_copy(ce, await,
err = intel_context_migrate_copy(ce, deps,
src, src_cache_level, src_is_lmem,
dst, dst_cache_level, dst_is_lmem,
out);
Expand All @@ -638,7 +638,7 @@ int intel_migrate_copy(struct intel_migrate *m,
int
intel_migrate_clear(struct intel_migrate *m,
struct i915_gem_ww_ctx *ww,
struct dma_fence *await,
const struct i915_deps *deps,
struct scatterlist *sg,
enum i915_cache_level cache_level,
bool is_lmem,
Expand All @@ -661,7 +661,7 @@ intel_migrate_clear(struct intel_migrate *m,
if (err)
goto out;

err = intel_context_migrate_clear(ce, await, sg, cache_level,
err = intel_context_migrate_clear(ce, deps, sg, cache_level,
is_lmem, value, out);

intel_context_unpin(ce);
Expand Down

0 comments on commit 0f61eb0

Please sign in to comment.