Skip to content

Commit

Permalink
sched: make struct sched_statistics independent of fair sched class
Browse files Browse the repository at this point in the history
If we want to use schedstats facility, we should move out of
struct sched_statistics from the struct sched_entity or add it into other
sctructs of sched entity as well. Obviously the latter one is bad because
that requires more spaces. So we should move it into a common struct which
can be used by all sched classes.

The struct sched_statistics is the schedular statistics of a task_struct
or a task_group. So we can move it into struct task_struct and
struct task_group to achieve the goal.

Below is the detailed explaination of the change in the structs.

- Before this patch

struct task_struct {            |-> struct sched_entity {
    ...                         |       ...
    struct sched_entity *se; ---|       struct sched_statistics statistics;
    struct sched_rt_entity *rt;         ...
    ...                                 ...
};                                  };

struct task_group {             |--> se[0]->statistics : schedstats of CPU0
    ...                         |
 #ifdef CONFIG_FAIR_GROUP_SCHED |
    struct sched_entity **se; --|--> se[1]->statistics : schedstats of CPU1
                                |
 #endif                         |
                                |--> se[N]->statistics : schedstats of CPUn

 #ifdef CONFIG_FAIR_GROUP_SCHED
    struct sched_rt_entity  **rt_se; (N/A)
 #endif
    ...
};

The '**se' in task_group is allocated in the fair sched class, which is
hard to be reused by other sched class.

- After this patch

struct task_struct {
    ...
    struct sched_statistics statistics;
    ...
    struct sched_entity *se;
    struct sched_rt_entity *rt;
    ...
};

struct task_group {                    |---> stats[0] : of CPU0
    ...                                |
    struct sched_statistics **stats; --|---> stats[1] : of CPU1
    ...                                |
                                       |---> stats[n] : of CPUn
 #ifdef CONFIG_FAIR_GROUP_SCHED
    struct sched_entity **se;
 #endif
 #ifdef CONFIG_RT_GROUP_SCHED
    struct sched_rt_entity  **rt_se;
 #endif
    ...
};

After the patch it is clearly that both of se or rt_se can easily get the
sched_statistics by a task_struct or a task_group.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
  • Loading branch information
laoar authored and intel-lab-lkp committed Dec 1, 2020
1 parent 26dd631 commit a2e8aa5
Show file tree
Hide file tree
Showing 9 changed files with 206 additions and 105 deletions.
3 changes: 1 addition & 2 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -462,8 +462,6 @@ struct sched_entity {

u64 nr_migrations;

struct sched_statistics statistics;

#ifdef CONFIG_FAIR_GROUP_SCHED
int depth;
struct sched_entity *parent;
Expand Down Expand Up @@ -689,6 +687,7 @@ struct task_struct {
unsigned int rt_priority;

const struct sched_class *sched_class;
struct sched_statistics stats;
struct sched_entity se;
struct sched_rt_entity rt;
#ifdef CONFIG_CGROUP_SCHED
Expand Down
24 changes: 14 additions & 10 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2427,11 +2427,11 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
#ifdef CONFIG_SMP
if (cpu == rq->cpu) {
__schedstat_inc(rq->ttwu_local);
__schedstat_inc(p->se.statistics.nr_wakeups_local);
__schedstat_inc(p->stats.nr_wakeups_local);
} else {
struct sched_domain *sd;

__schedstat_inc(p->se.statistics.nr_wakeups_remote);
__schedstat_inc(p->stats.nr_wakeups_remote);
rcu_read_lock();
for_each_domain(rq->cpu, sd) {
if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
Expand All @@ -2443,14 +2443,14 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
}

if (wake_flags & WF_MIGRATED)
__schedstat_inc(p->se.statistics.nr_wakeups_migrate);
__schedstat_inc(p->stats.nr_wakeups_migrate);
#endif /* CONFIG_SMP */

__schedstat_inc(rq->ttwu_count);
__schedstat_inc(p->se.statistics.nr_wakeups);
__schedstat_inc(p->stats.nr_wakeups);

if (wake_flags & WF_SYNC)
__schedstat_inc(p->se.statistics.nr_wakeups_sync);
__schedstat_inc(p->stats.nr_wakeups_sync);
}

/*
Expand Down Expand Up @@ -3080,7 +3080,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)

#ifdef CONFIG_SCHEDSTATS
/* Even if schedstat is disabled, there should not be garbage */
memset(&p->se.statistics, 0, sizeof(p->se.statistics));
memset(&p->stats, 0, sizeof(p->stats));
#endif

RB_CLEAR_NODE(&p->dl.rb_node);
Expand Down Expand Up @@ -7353,9 +7353,9 @@ void normalize_rt_tasks(void)
continue;

p->se.exec_start = 0;
schedstat_set(p->se.statistics.wait_start, 0);
schedstat_set(p->se.statistics.sleep_start, 0);
schedstat_set(p->se.statistics.block_start, 0);
schedstat_set(p->stats.wait_start, 0);
schedstat_set(p->stats.sleep_start, 0);
schedstat_set(p->stats.block_start, 0);

if (!dl_task(p) && !rt_task(p)) {
/*
Expand Down Expand Up @@ -7445,6 +7445,7 @@ static void sched_free_group(struct task_group *tg)
{
free_fair_sched_group(tg);
free_rt_sched_group(tg);
free_tg_schedstats(tg);
autogroup_free(tg);
kmem_cache_free(task_group_cache, tg);
}
Expand All @@ -7464,6 +7465,9 @@ struct task_group *sched_create_group(struct task_group *parent)
if (!alloc_rt_sched_group(tg, parent))
goto err;

if (!alloc_tg_schedstats(tg))
goto err;

alloc_uclamp_sched_group(tg, parent);

return tg;
Expand Down Expand Up @@ -8150,7 +8154,7 @@ static int cpu_cfs_stat_show(struct seq_file *sf, void *v)
int i;

for_each_possible_cpu(i)
ws += schedstat_val(tg->se[i]->statistics.wait_sum);
ws += schedstat_val(tg->stats[i]->wait_sum);

seq_printf(sf, "wait_sum %llu\n", ws);
}
Expand Down
4 changes: 2 additions & 2 deletions kernel/sched/deadline.c
Original file line number Diff line number Diff line change
Expand Up @@ -1264,8 +1264,8 @@ static void update_curr_dl(struct rq *rq)
return;
}

schedstat_set(curr->se.statistics.exec_max,
max(curr->se.statistics.exec_max, delta_exec));
schedstat_set(curr->stats.exec_max,
max(curr->stats.exec_max, delta_exec));

curr->se.sum_exec_runtime += delta_exec;
account_group_exec_runtime(curr, delta_exec);
Expand Down
81 changes: 40 additions & 41 deletions kernel/sched/debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -444,16 +444,16 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
PN(se->sum_exec_runtime);

if (schedstat_enabled()) {
PN_SCHEDSTAT(se->statistics.wait_start);
PN_SCHEDSTAT(se->statistics.sleep_start);
PN_SCHEDSTAT(se->statistics.block_start);
PN_SCHEDSTAT(se->statistics.sleep_max);
PN_SCHEDSTAT(se->statistics.block_max);
PN_SCHEDSTAT(se->statistics.exec_max);
PN_SCHEDSTAT(se->statistics.slice_max);
PN_SCHEDSTAT(se->statistics.wait_max);
PN_SCHEDSTAT(se->statistics.wait_sum);
P_SCHEDSTAT(se->statistics.wait_count);
PN_SCHEDSTAT(tg->stats[cpu]->wait_start);
PN_SCHEDSTAT(tg->stats[cpu]->sleep_start);
PN_SCHEDSTAT(tg->stats[cpu]->block_start);
PN_SCHEDSTAT(tg->stats[cpu]->sleep_max);
PN_SCHEDSTAT(tg->stats[cpu]->block_max);
PN_SCHEDSTAT(tg->stats[cpu]->exec_max);
PN_SCHEDSTAT(tg->stats[cpu]->slice_max);
PN_SCHEDSTAT(tg->stats[cpu]->wait_max);
PN_SCHEDSTAT(tg->stats[cpu]->wait_sum);
P_SCHEDSTAT(tg->stats[cpu]->wait_count);
}

P(se->load.weight);
Expand Down Expand Up @@ -499,9 +499,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
p->prio);

SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
SPLIT_NS(schedstat_val_or_zero(p->se.statistics.wait_sum)),
SPLIT_NS(schedstat_val_or_zero(p->stats.wait_sum)),
SPLIT_NS(p->se.sum_exec_runtime),
SPLIT_NS(schedstat_val_or_zero(p->se.statistics.sum_sleep_runtime)));
SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)));

#ifdef CONFIG_NUMA_BALANCING
SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
Expand Down Expand Up @@ -938,34 +938,33 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,

if (schedstat_enabled()) {
u64 avg_atom, avg_per_cpu;

PN_SCHEDSTAT(se.statistics.sum_sleep_runtime);
PN_SCHEDSTAT(se.statistics.wait_start);
PN_SCHEDSTAT(se.statistics.sleep_start);
PN_SCHEDSTAT(se.statistics.block_start);
PN_SCHEDSTAT(se.statistics.sleep_max);
PN_SCHEDSTAT(se.statistics.block_max);
PN_SCHEDSTAT(se.statistics.exec_max);
PN_SCHEDSTAT(se.statistics.slice_max);
PN_SCHEDSTAT(se.statistics.wait_max);
PN_SCHEDSTAT(se.statistics.wait_sum);
P_SCHEDSTAT(se.statistics.wait_count);
PN_SCHEDSTAT(se.statistics.iowait_sum);
P_SCHEDSTAT(se.statistics.iowait_count);
P_SCHEDSTAT(se.statistics.nr_migrations_cold);
P_SCHEDSTAT(se.statistics.nr_failed_migrations_affine);
P_SCHEDSTAT(se.statistics.nr_failed_migrations_running);
P_SCHEDSTAT(se.statistics.nr_failed_migrations_hot);
P_SCHEDSTAT(se.statistics.nr_forced_migrations);
P_SCHEDSTAT(se.statistics.nr_wakeups);
P_SCHEDSTAT(se.statistics.nr_wakeups_sync);
P_SCHEDSTAT(se.statistics.nr_wakeups_migrate);
P_SCHEDSTAT(se.statistics.nr_wakeups_local);
P_SCHEDSTAT(se.statistics.nr_wakeups_remote);
P_SCHEDSTAT(se.statistics.nr_wakeups_affine);
P_SCHEDSTAT(se.statistics.nr_wakeups_affine_attempts);
P_SCHEDSTAT(se.statistics.nr_wakeups_passive);
P_SCHEDSTAT(se.statistics.nr_wakeups_idle);
PN_SCHEDSTAT(stats.sum_sleep_runtime);
PN_SCHEDSTAT(stats.wait_start);
PN_SCHEDSTAT(stats.sleep_start);
PN_SCHEDSTAT(stats.block_start);
PN_SCHEDSTAT(stats.sleep_max);
PN_SCHEDSTAT(stats.block_max);
PN_SCHEDSTAT(stats.exec_max);
PN_SCHEDSTAT(stats.slice_max);
PN_SCHEDSTAT(stats.wait_max);
PN_SCHEDSTAT(stats.wait_sum);
P_SCHEDSTAT(stats.wait_count);
PN_SCHEDSTAT(stats.iowait_sum);
P_SCHEDSTAT(stats.iowait_count);
P_SCHEDSTAT(stats.nr_migrations_cold);
P_SCHEDSTAT(stats.nr_failed_migrations_affine);
P_SCHEDSTAT(stats.nr_failed_migrations_running);
P_SCHEDSTAT(stats.nr_failed_migrations_hot);
P_SCHEDSTAT(stats.nr_forced_migrations);
P_SCHEDSTAT(stats.nr_wakeups);
P_SCHEDSTAT(stats.nr_wakeups_sync);
P_SCHEDSTAT(stats.nr_wakeups_migrate);
P_SCHEDSTAT(stats.nr_wakeups_local);
P_SCHEDSTAT(stats.nr_wakeups_remote);
P_SCHEDSTAT(stats.nr_wakeups_affine);
P_SCHEDSTAT(stats.nr_wakeups_affine_attempts);
P_SCHEDSTAT(stats.nr_wakeups_passive);
P_SCHEDSTAT(stats.nr_wakeups_idle);

avg_atom = p->se.sum_exec_runtime;
if (nr_switches)
Expand Down Expand Up @@ -1031,6 +1030,6 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
void proc_sched_set_task(struct task_struct *p)
{
#ifdef CONFIG_SCHEDSTATS
memset(&p->se.statistics, 0, sizeof(p->se.statistics));
memset(&p->stats, 0, sizeof(p->stats));
#endif
}
Loading

0 comments on commit a2e8aa5

Please sign in to comment.