Skip to content

Commit

Permalink
drm/i915/selftests: Basic stress test for rapid context switching
Browse files Browse the repository at this point in the history
We need to exercise the HW and submission paths for switching contexts
rapidly to check that features such as execlists' wa_tail are adequate.
Plus it's an interesting baseline latency metric.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
  • Loading branch information
ickle authored and intel-lab-lkp committed Aug 31, 2018
1 parent 47f6dd4 commit 8ec0505
Showing 1 changed file with 185 additions and 0 deletions.
185 changes: 185 additions & 0 deletions drivers/gpu/drm/i915/selftests/i915_gem_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
*
*/

#include <linux/prime_numbers.h>

#include "../i915_selftest.h"
#include "i915_random.h"
#include "igt_flush_test.h"
Expand All @@ -32,6 +34,188 @@

#define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))

struct live_test {
struct drm_i915_private *i915;
const char *func;
const char *name;

unsigned int reset_count;
};

static int begin_live_test(struct live_test *t,
struct drm_i915_private *i915,
const char *func,
const char *name)
{
int err;

t->i915 = i915;
t->func = func;
t->name = name;

err = i915_gem_wait_for_idle(i915,
I915_WAIT_LOCKED,
MAX_SCHEDULE_TIMEOUT);
if (err) {
pr_err("%s(%s): failed to idle before, with err=%d!",
func, name, err);
return err;
}

i915->gpu_error.missed_irq_rings = 0;
t->reset_count = i915_reset_count(&i915->gpu_error);

return 0;
}

static int end_live_test(struct live_test *t)
{
struct drm_i915_private *i915 = t->i915;

i915_retire_requests(i915);

if (wait_for(intel_engines_are_idle(i915), 10)) {
pr_err("%s(%s): GPU not idle\n", t->func, t->name);
return -EIO;
}

if (t->reset_count != i915_reset_count(&i915->gpu_error)) {
pr_err("%s(%s): GPU was reset %d times!\n",
t->func, t->name,
i915_reset_count(&i915->gpu_error) - t->reset_count);
return -EIO;
}

if (i915->gpu_error.missed_irq_rings) {
pr_err("%s(%s): Missed interrupts on engines %lx\n",
t->func, t->name, i915->gpu_error.missed_irq_rings);
return -EIO;
}

return 0;
}

static int live_nop_switch(void *arg)
{
const unsigned int nctx = 1024;
struct drm_i915_private *i915 = arg;
struct intel_engine_cs *engine;
struct i915_gem_context **ctx;
enum intel_engine_id id;
struct drm_file *file;
struct live_test t;
unsigned long n;
int err = -ENODEV;

/*
* Create as many contexts as we can feasibly get away with
* and check we can switch between them rapidly.
*
* Serves as very simple stress test for submission and HW switching
* between contexts.
*/

if (!DRIVER_CAPS(i915)->has_logical_contexts)
return 0;

file = mock_file(i915);
if (IS_ERR(file))
return PTR_ERR(file);

mutex_lock(&i915->drm.struct_mutex);

ctx = kcalloc(sizeof(*ctx), nctx, GFP_KERNEL);
if (!ctx) {
err = -ENOMEM;
goto out_unlock;
}

for (n = 0; n < nctx; n++) {
ctx[n] = i915_gem_create_context(i915, file->driver_priv);
if (IS_ERR(ctx[n])) {
err = PTR_ERR(ctx[n]);
goto out_unlock;
}
}

for_each_engine(engine, i915, id) {
struct i915_request *request;
unsigned long end_time, prime;
ktime_t times[2] = {};

times[0] = ktime_get_raw();
for (n = 0; n < nctx; n++) {
request = i915_request_alloc(engine, ctx[n]);
i915_request_add(request);
}
i915_request_wait(request,
I915_WAIT_LOCKED,
MAX_SCHEDULE_TIMEOUT);
times[1] = ktime_get_raw();

pr_info("Populated %d contexts on %s in %lluns\n",
nctx, engine->name, ktime_to_ns(times[1] - times[0]));

err = begin_live_test(&t, i915, __func__, engine->name);
if (err)
goto out_unlock;

end_time = jiffies + i915_selftest.timeout_jiffies;
for_each_prime_number_from(prime, 2, 8192) {
times[1] = ktime_get_raw();

for (n = 0; n < prime; n++) {
request = i915_request_alloc(engine,
ctx[n % nctx]);
if (IS_ERR(request)) {
err = PTR_ERR(request);
goto out_unlock;
}

/*
* This space is left intentionally blank.
*
* We do not actually want to perform any
* action with this request, we just want
* to measure the latency in allocation
* and submission of our breadcrumbs -
* ensuring that the bare request is sufficient
* for the system to work (i.e. proper HEAD
* tracking of the rings, interrupt handling,
* etc). It also gives us the lowest bounds
* for latency.
*/

i915_request_add(request);
}
i915_request_wait(request,
I915_WAIT_LOCKED,
MAX_SCHEDULE_TIMEOUT);

times[1] = ktime_sub(ktime_get_raw(), times[1]);
if (prime == 2)
times[0] = times[1];

if (__igt_timeout(end_time, NULL))
break;
}

err = end_live_test(&t);
if (err)
goto out_unlock;

pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
engine->name,
ktime_to_ns(times[0]),
prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
}

out_unlock:
mutex_unlock(&i915->drm.struct_mutex);
mock_file_free(i915, file);
return err;
}

static struct i915_vma *
gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value)
{
Expand Down Expand Up @@ -713,6 +897,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
{
static const struct i915_subtest tests[] = {
SUBTEST(igt_switch_to_kernel_context),
SUBTEST(live_nop_switch),
SUBTEST(igt_ctx_exec),
SUBTEST(igt_ctx_readonly),
};
Expand Down

0 comments on commit 8ec0505

Please sign in to comment.