Skip to content

Commit

Permalink
powerpc/smp: Add support detecting thread-groups sharing L2 cache
Browse files Browse the repository at this point in the history
On POWER systems, groups of threads within a core sharing the L2-cache
can be indicated by the "ibm,thread-groups" property array with the
identifier "2".

This patch adds support for detecting this, and when present, populate
the populating the cpu_l2_cache_mask of every CPU to the core-siblings
which share L2 with the CPU as specified in the by the
"ibm,thread-groups" property array.

On a platform with the following "ibm,thread-group" configuration
		 00000001 00000002 00000004 00000000
		 00000002 00000004 00000006 00000001
		 00000003 00000005 00000007 00000002
		 00000002 00000004 00000000 00000002
		 00000004 00000006 00000001 00000003
		 00000005 00000007

Without this patch, the sched-domain hierarchy for CPUs 0,1 would be
	CPU0 attaching sched-domain(s):
	domain-0: span=0,2,4,6 level=SMT
	domain-1: span=0-7 level=CACHE
	domain-2: span=0-15,24-39,48-55 level=MC
	domain-3: span=0-55 level=DIE

	CPU1 attaching sched-domain(s):
	domain-0: span=1,3,5,7 level=SMT
	domain-1: span=0-7 level=CACHE
	domain-2: span=0-15,24-39,48-55 level=MC
	domain-3: span=0-55 level=DIE

The CACHE domain at 0-7 is incorrect since the ibm,thread-groups
sub-array
[00000002 00000002 00000004
 00000000 00000002 00000004 00000006
 00000001 00000003 00000005 00000007]
indicates that L2 (Property "2") is shared only between the threads of a single
group. There are "2" groups of threads where each group contains "4"
threads each. The groups being {0,2,4,6} and {1,3,5,7}.

With this patch, the sched-domain hierarchy for CPUs 0,1 would be
     	CPU0 attaching sched-domain(s):
	domain-0: span=0,2,4,6 level=SMT
	domain-1: span=0-15,24-39,48-55 level=MC
	domain-2: span=0-55 level=DIE

	CPU1 attaching sched-domain(s):
	domain-0: span=1,3,5,7 level=SMT
	domain-1: span=0-15,24-39,48-55 level=MC
	domain-2: span=0-55 level=DIE

The CACHE domain with span=0,2,4,6 for CPU 0 (span=1,3,5,7 for CPU 1
resp.) gets degenerated into the SMT domain. Furthermore, the
last-level-cache domain gets correctly set to the SMT sched-domain.

Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
  • Loading branch information
Gautham R. Shenoy authored and intel-lab-lkp committed Dec 9, 2020
1 parent 5f0ad22 commit 61bc65c
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 4 deletions.
1 change: 1 addition & 0 deletions arch/powerpc/include/asm/smp.h
Expand Up @@ -134,6 +134,7 @@ static inline struct cpumask *cpu_smallcore_mask(int cpu)
extern int cpu_to_core_id(int cpu);

extern bool has_big_cores;
extern bool thread_group_shares_l2;

#define cpu_smt_mask cpu_smt_mask
#ifdef CONFIG_SCHED_SMT
Expand Down
56 changes: 52 additions & 4 deletions arch/powerpc/kernel/smp.c
Expand Up @@ -76,6 +76,7 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 };
struct task_struct *secondary_current;
bool has_big_cores;
bool coregroup_enabled;
bool thread_group_shares_l2;

DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
Expand All @@ -99,6 +100,7 @@ enum {

#define MAX_THREAD_LIST_SIZE 8
#define THREAD_GROUP_SHARE_L1 1
#define THREAD_GROUP_SHARE_L2 2
struct thread_groups {
unsigned int property;
unsigned int nr_groups;
Expand All @@ -107,7 +109,7 @@ struct thread_groups {
};

/* Maximum number of properties that groups of threads within a core can share */
#define MAX_THREAD_GROUP_PROPERTIES 1
#define MAX_THREAD_GROUP_PROPERTIES 2

struct thread_groups_list {
unsigned int nr_properties;
Expand All @@ -121,6 +123,13 @@ static struct thread_groups_list tgl[NR_CPUS] __initdata;
*/
DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);

/*
* On some big-cores system, thread_group_l2_cache_map for each CPU
* corresponds to the set its siblings within the core that share the
* L2-cache.
*/
DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);

/* SMP operations for this machine */
struct smp_ops_t *smp_ops;

Expand Down Expand Up @@ -718,7 +727,9 @@ static void or_cpumasks_related(int i, int j, struct cpumask *(*srcmask)(int),
*
* ibm,thread-groups[i + 0] tells us the property based on which the
* threads are being grouped together. If this value is 1, it implies
* that the threads in the same group share L1, translation cache.
* that the threads in the same group share L1, translation cache. If
* the value is 2, it implies that the threads in the same group share
* the same L2 cache.
*
* ibm,thread-groups[i+1] tells us how many such thread groups exist for the
* property ibm,thread-groups[i]
Expand Down Expand Up @@ -874,7 +885,8 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property)
struct thread_groups *tg = NULL;
cpumask_var_t *mask;

if (cache_property != THREAD_GROUP_SHARE_L1)
if (cache_property != THREAD_GROUP_SHARE_L1 &&
cache_property != THREAD_GROUP_SHARE_L2)
return -EINVAL;

tg = get_thread_groups(cpu, cache_property, &err);
Expand All @@ -888,7 +900,11 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property)
return -ENODATA;
}

mask = &per_cpu(thread_group_l1_cache_map, cpu);
if (cache_property == THREAD_GROUP_SHARE_L1)
mask = &per_cpu(thread_group_l1_cache_map, cpu);
else if (cache_property == THREAD_GROUP_SHARE_L2)
mask = &per_cpu(thread_group_l2_cache_map, cpu);

zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));

for (i = first_thread; i < first_thread + threads_per_core; i++) {
Expand Down Expand Up @@ -990,6 +1006,16 @@ static int init_big_cores(void)
}

has_big_cores = true;

for_each_possible_cpu(cpu) {
int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2);

if (err)
return err;
}

thread_group_shares_l2 = true;
pr_debug("L2 cache only shared by the threads in the small core\n");
return 0;
}

Expand Down Expand Up @@ -1304,6 +1330,28 @@ static bool update_mask_by_l2(int cpu, cpumask_var_t *mask)
if (has_big_cores)
submask_fn = cpu_smallcore_mask;

/*
* If the threads in a thread-group share L2 cache, then then
* the L2-mask can be obtained from thread_group_l2_cache_map.
*/
if (thread_group_shares_l2) {
cpumask_set_cpu(cpu, cpu_l2_cache_mask(cpu));

for_each_cpu(i, per_cpu(thread_group_l2_cache_map, cpu)) {
if (cpu_online(i))
set_cpus_related(i, cpu, cpu_l2_cache_mask);
}

/* Verify that L1-cache siblings are a subset of L2 cache-siblings */
if (!cpumask_equal(submask_fn(cpu), cpu_l2_cache_mask(cpu)) &&
!cpumask_subset(submask_fn(cpu), cpu_l2_cache_mask(cpu))) {
pr_warn_once("CPU %d : Inconsistent L1 and L2 cache siblings\n",
cpu);
}

return true;
}

l2_cache = cpu_to_l2cache(cpu);
if (!l2_cache || !*mask) {
/* Assume only core siblings share cache with this CPU */
Expand Down

0 comments on commit 61bc65c

Please sign in to comment.