Skip to content
Permalink
Browse files
sched/core: Force idle accounting per cgroup
Accounting for "force idle" time per cgroup, which is the time the tasks
of the cgroup forced its SMT siblings into idle.

Force idle time per cgroup is displayed via
  /sys/fs/cgroup/cpuacct/$cg/cpuacct.forceidle.
Force idle time per cgroup per cpu is displayed via
  /sys/fs/cgroup/cpuacct/$cg/cpuacct.forceidle_percpu.
The unit is ns.
It also requires that schedstats is enabled.

We can get the total system forced idle time by looking at the root cgroup,
and we can get how long the cgroup forced it SMT siblings into idle. If the
force idle time of a cgroup is high, that can be rectified by making some
changes(ie. affinity, cpu budget, etc.) to the cgroup.

Signed-off-by: Cruz Zhao <CruzZhao@linux.alibaba.com>
  • Loading branch information
Cruz Zhao authored and intel-lab-lkp committed Jan 11, 2022
1 parent 41abb28 commit 363ef949b487441383f2719bdff3dc9156779565
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 0 deletions.
@@ -774,10 +774,17 @@ void cgroup_rstat_flush_release(void);
#ifdef CONFIG_CGROUP_CPUACCT
void cpuacct_charge(struct task_struct *tsk, u64 cputime);
void cpuacct_account_field(struct task_struct *tsk, int index, u64 val);
#ifdef CONFIG_SCHED_CORE
void cpuacct_account_forceidle(int cpu, struct task_struct *task, u64 cputime);
#endif
#else
static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
static inline void cpuacct_account_field(struct task_struct *tsk, int index,
u64 val) {}
#ifdef CONFIG_SCHED_CORE
static inline void cpuacct_account_forceidle(int cpu, struct task_struct *task,
u64 cputime) {}
#endif
#endif

void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec);
@@ -284,6 +284,7 @@ void __sched_core_account_forceidle(struct rq *rq)
continue;

__schedstat_add(p->stats.core_forceidle_sum, delta);
cpuacct_account_forceidle(i, p, delta);
}
}

@@ -27,6 +27,9 @@ struct cpuacct {
/* cpuusage holds pointer to a u64-type object on every CPU */
u64 __percpu *cpuusage;
struct kernel_cpustat __percpu *cpustat;
#ifdef CONFIG_SCHED_CORE
u64 __percpu *forceidle;
#endif
};

static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
@@ -46,9 +49,15 @@ static inline struct cpuacct *parent_ca(struct cpuacct *ca)
}

static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage);
#ifdef CONFIG_SCHED_CORE
static DEFINE_PER_CPU(u64, root_cpuacct_forceidle);
#endif
static struct cpuacct root_cpuacct = {
.cpustat = &kernel_cpustat,
.cpuusage = &root_cpuacct_cpuusage,
#ifdef CONFIG_SCHED_CORE
.forceidle = &root_cpuacct_forceidle,
#endif
};

/* Create a new CPU accounting group */
@@ -72,8 +81,18 @@ cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
if (!ca->cpustat)
goto out_free_cpuusage;

#ifdef CONFIG_SCHED_CORE
ca->forceidle = alloc_percpu(u64);
if (!ca->forceidle)
goto out_free_cpustat;
#endif

return &ca->css;

#ifdef CONFIG_SCHED_CORE
out_free_cpustat:
free_percpu(ca->cpustat);
#endif
out_free_cpuusage:
free_percpu(ca->cpuusage);
out_free_ca:
@@ -290,6 +309,37 @@ static int cpuacct_stats_show(struct seq_file *sf, void *v)
return 0;
}

#ifdef CONFIG_SCHED_CORE
static u64 __forceidle_read(struct cpuacct *ca, int cpu)
{
return *per_cpu_ptr(ca->forceidle, cpu);
}
static int cpuacct_percpu_forceidle_seq_show(struct seq_file *m, void *V)
{
struct cpuacct *ca = css_ca(seq_css(m));
u64 percpu;
int i;

for_each_possible_cpu(i) {
percpu = __forceidle_read(ca, i);
seq_printf(m, "%llu ", (unsigned long long) percpu);
}
seq_printf(m, "\n");
return 0;
}
static u64 cpuacct_forceidle_read(struct cgroup_subsys_state *css,
struct cftype *cft)
{
struct cpuacct *ca = css_ca(css);
u64 totalforceidle = 0;
int i;

for_each_possible_cpu(i)
totalforceidle += __forceidle_read(ca, i);
return totalforceidle;
}
#endif

static struct cftype files[] = {
{
.name = "usage",
@@ -324,6 +374,16 @@ static struct cftype files[] = {
.name = "stat",
.seq_show = cpuacct_stats_show,
},
#ifdef CONFIG_SCHED_CORE
{
.name = "forceidle",
.read_u64 = cpuacct_forceidle_read,
},
{
.name = "forceidle_percpu",
.seq_show = cpuacct_percpu_forceidle_seq_show,
},
#endif
{ } /* terminate */
};

@@ -359,6 +419,25 @@ void cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
rcu_read_unlock();
}

#ifdef CONFIG_SCHED_CORE
void cpuacct_account_forceidle(int cpu, struct task_struct *tsk, u64 cputime)
{
struct cpuacct *ca;
u64 *fi;

rcu_read_lock();
/*
* We have hold rq->core->__lock here, which protects ca->forceidle
* percpu.
*/
for (ca = task_ca(tsk); ca; ca = parent_ca(ca)) {
fi = per_cpu_ptr(ca->forceidle, cpu);
*fi += cputime;
}
rcu_read_unlock();
}
#endif

struct cgroup_subsys cpuacct_cgrp_subsys = {
.css_alloc = cpuacct_css_alloc,
.css_free = cpuacct_css_free,

0 comments on commit 363ef94

Please sign in to comment.