Skip to content

Commit

Permalink
sched/topology: Introduce fallback LLC
Browse files Browse the repository at this point in the history
On some systems, LLC sizes may be too small. Some of these systems may
also support multiple cache access latency levels i.e between the
previous LLC and waker LLC, there could be other LLCs that have a lesser
cache access latency to waker LLC. If the waker LLC is busy, then
scheduler could choose to scheduler a task on such LLC.

Here is one approach to identity a static fallback LLC for each LLC for
systems that support multiple cache access latency levels. In this
approach, the fallback LLCs are decided at boot/CPU bring up time. There
is a one-to-one mapping between the LLC and fallback LLC.  The fallback
LLC will only be used if wakeup is a sync wakeup and the current LLC is
more busy than the fallback LLC. Also scheduler will not choose fallback
LLC if the previous LLC has same cache access latency as fallback LLC.

It is expected that fallback LLC has to be part of parent domain of
LLC domain. Archs can choose to use fallback LLC by setting the
SD_FALLBACK_LLC flag.

Cc: LKML <linux-kernel@vger.kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Gautham R Shenoy <ego@linux.vnet.ibm.com>
Cc: Parth Shah <parth@linux.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Rik van Riel <riel@surriel.com>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
  • Loading branch information
srikard authored and intel-lab-lkp committed Apr 22, 2021
1 parent 4f735d1 commit dcb7525
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 6 deletions.
7 changes: 7 additions & 0 deletions include/linux/sched/sd_flags.h
Expand Up @@ -129,6 +129,13 @@ SD_FLAG(SD_SERIALIZE, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS)
*/
SD_FLAG(SD_ASYM_PACKING, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS)

/*
* Consider waking task on near-by idle LLC.
*
* NEEDS_GROUPS: Load balancing flag.
*/
SD_FLAG(SD_FALLBACK_LLC, SDF_NEEDS_GROUPS)

/*
* Prefer to place tasks in a sibling domain
*
Expand Down
1 change: 1 addition & 0 deletions include/linux/sched/topology.h
Expand Up @@ -74,6 +74,7 @@ struct sched_domain_shared {
atomic_t ref;
atomic_t nr_busy_cpus;
int idle_core;
int fallback_llc_id;
};

struct sched_domain {
Expand Down
43 changes: 39 additions & 4 deletions kernel/sched/fair.c
Expand Up @@ -5873,7 +5873,8 @@ static int wake_affine_idler_llc(struct task_struct *p, int this_cpu, int prev_c
{
int pnr_busy, pllc_size, tnr_busy, tllc_size;
struct sched_domain_shared *tsds, *psds;
int diff;
bool try_fallback = false;
int diff, fcpu = -1;

tsds = rcu_dereference(per_cpu(sd_llc_shared, this_cpu));
psds = rcu_dereference(per_cpu(sd_llc_shared, prev_cpu));
Expand All @@ -5890,6 +5891,43 @@ static int wake_affine_idler_llc(struct task_struct *p, int this_cpu, int prev_c
}
}

tnr_busy = atomic_read(&tsds->nr_busy_cpus);
tllc_size = per_cpu(sd_llc_size, this_cpu);

if (sync) {
struct sched_domain *sd = rcu_dereference(per_cpu(sd_llc, this_cpu));

/*
* task is a target of *sync* wakeup. However there are no
* idle cores in the waking CPU. Ignore fallback LLC if the
* previous CPU is part of the LLC's parent domain.
*/
try_fallback = !cpumask_test_cpu(prev_cpu, sched_domain_span(sd->parent));
fcpu = tsds->fallback_llc_id;
}

if (try_fallback && fcpu != -1 && cpumask_test_cpu(fcpu, p->cpus_ptr)) {
struct sched_domain_shared *fsds;
int fnr_busy, fllc_size;

fsds = rcu_dereference(per_cpu(sd_llc_shared, fcpu));
if (fsds && fsds != psds) {
if (fsds->idle_core != -1) {
if (cpumask_test_cpu(fsds->idle_core, p->cpus_ptr))
return fsds->idle_core;
return fcpu;
}

fnr_busy = atomic_read(&fsds->nr_busy_cpus);
fllc_size = per_cpu(sd_llc_size, fcpu);
if (fnr_busy * tllc_size < tnr_busy * fllc_size) {
tnr_busy = fnr_busy;
tllc_size = fllc_size;
this_cpu = fcpu;
}
}
}

if (available_idle_cpu(prev_cpu) || sched_idle_cpu(prev_cpu))
return prev_cpu;
if (psds->idle_core != -1) {
Expand All @@ -5908,10 +5946,7 @@ static int wake_affine_idler_llc(struct task_struct *p, int this_cpu, int prev_c
}
}

tnr_busy = atomic_read(&tsds->nr_busy_cpus);
pnr_busy = atomic_read(&psds->nr_busy_cpus);

tllc_size = per_cpu(sd_llc_size, this_cpu);
pllc_size = per_cpu(sd_llc_size, prev_cpu);

if (pnr_busy == pllc_size && tnr_busy == tllc_size) {
Expand Down
45 changes: 43 additions & 2 deletions kernel/sched/topology.c
Expand Up @@ -603,15 +603,27 @@ static void free_sched_groups(struct sched_group *sg, int free_sgc)

static void destroy_sched_domain(struct sched_domain *sd)
{
struct sched_domain_shared *sds = sd->shared;

/*
* A normal sched domain may have multiple group references, an
* overlapping domain, having private groups, only one. Iterate,
* dropping group/capacity references, freeing where none remain.
*/
free_sched_groups(sd->groups, 1);

if (sd->shared && atomic_dec_and_test(&sd->shared->ref))
kfree(sd->shared);
if (sds && atomic_dec_and_test(&sds->ref)) {
struct sched_domain_shared *next_sds;

if (sds->fallback_llc_id != -1) {
next_sds = rcu_dereference(per_cpu(sd_llc_shared, sds->fallback_llc_id));
if (next_sds && next_sds->fallback_llc_id != -1)
next_sds->fallback_llc_id = -1;

sds->fallback_llc_id = -1;
}
kfree(sds);
}
kfree(sd);
}

Expand Down Expand Up @@ -663,9 +675,36 @@ static void update_top_cache_domain(int cpu)

sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
if (sd) {
struct sched_domain *sd_parent = sd->parent;

id = cpumask_first(sched_domain_span(sd));
size = cpumask_weight(sched_domain_span(sd));
sds = sd->shared;

if (sds->fallback_llc_id == -1 && sd_parent &&
sd_parent->flags & SD_FALLBACK_LLC) {
const struct cpumask *parent_span = sched_domain_span(sd->parent);
struct cpumask *span = sched_domains_tmpmask;
int fcpu;

/*
* If LLC's parent domain has SD_FALLBACK_LLC flag
* set and this LLC's fallback_llc_id is not yet
* set, then walk through the LLC parent's domain to
* find a fallback_llc.
*/
cpumask_andnot(span, parent_span, sched_domain_span(sd));
for_each_cpu_wrap(fcpu, span, cpu) {
struct sched_domain_shared *next_sds;

next_sds = rcu_dereference(per_cpu(sd_llc_shared, fcpu));
if (next_sds && next_sds->fallback_llc_id == -1) {
sds->fallback_llc_id = fcpu;
next_sds->fallback_llc_id = cpu;
break;
}
}
}
}

rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
Expand Down Expand Up @@ -1370,6 +1409,7 @@ int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE;
#define TOPOLOGY_SD_FLAGS \
(SD_SHARE_CPUCAPACITY | \
SD_SHARE_PKG_RESOURCES | \
SD_FALLBACK_LLC | \
SD_NUMA | \
SD_ASYM_PACKING)

Expand Down Expand Up @@ -1475,6 +1515,7 @@ sd_init(struct sched_domain_topology_level *tl,
atomic_inc(&sd->shared->ref);
atomic_set(&sd->shared->nr_busy_cpus, sd_weight);
sd->shared->idle_core = -1;
sd->shared->fallback_llc_id = -1;
}

sd->private = sdd;
Expand Down

0 comments on commit dcb7525

Please sign in to comment.