Skip to content
Permalink
Browse files
sched/deadline: Fix sched_dl_global_validate()
When user changes sched_rt_{runtime, period}_us, then

  sched_rt_handler()
    -->	sched_dl_bandwidth_validate()
	{
		new_bw = global_rt_runtime()/global_rt_period();

		for_each_possible_cpu(cpu) {
			dl_b = dl_bw_of(cpu);
			if (new_bw < dl_b->total_bw)
				ret = -EBUSY;
		}
	}

Under CONFIG_SMP, dl_bw is per root domain , but not per CPU,
dl_b->total_bw is the allocated bandwidth of the whole root domain.
we should compare dl_b->total_bw against cpus*new_bw, where 'cpus'
is the number of CPUs of the root domain.

Also, below annotation(in kernel/sched/sched.h) implied implementation
only appeared in SCHED_DEADLINE v2[1], then deadline scheduler kept
evolving till got merged(v9), but the annotation remains unchanged,
meaningless and misleading, correct it.

* With respect to SMP, the bandwidth is given on a per-CPU basis,
* meaning that:
*  - dl_bw (< 100%) is the bandwidth of the system (group) on each CPU;
*  - dl_total_bw array contains, in the i-eth element, the currently
*    allocated bandwidth on the i-eth CPU.

[1] https://lkml.org/lkml/2010/2/28/119

Signed-off-by: Peng Liu <iwtbavbm@gmail.com>
  • Loading branch information
Peng Liu authored and intel-lab-lkp committed Sep 17, 2020
1 parent 848785d commit 34a03e474224b29097a1b94a766d6a8eb3f7c4e6
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 25 deletions.
@@ -2511,33 +2511,45 @@ const struct sched_class dl_sched_class
.update_curr = update_curr_dl,
};

#ifdef CONFIG_SMP
static struct cpumask dl_local_possible_mask;
#endif /* CONFIG_SMP */

int sched_dl_global_validate(void)
{
u64 runtime = global_rt_runtime();
u64 period = global_rt_period();
u64 new_bw = to_ratio(period, runtime);
struct dl_bw *dl_b;
int cpu, ret = 0;
int cpu, cpus, ret = 0;
unsigned long flags;

/*
* Here we want to check the bandwidth not being set to some
* value smaller than the currently allocated bandwidth in
* any of the root_domains.
*
* FIXME: Cycling on all the CPUs is overdoing, but simpler than
* cycling on root_domains... Discussion on different/better
* solutions is welcome!
*/
#ifdef CONFIG_SMP
cpumask_t *possible_mask = &dl_local_possible_mask;

cpumask_copy(possible_mask, cpu_possible_mask);
for_each_cpu(cpu, possible_mask) {
#else
for_each_possible_cpu(cpu) {
#endif /* CONFIG_SMP */
rcu_read_lock_sched();
dl_b = dl_bw_of(cpu);

cpus = dl_bw_cpus(cpu);
#ifdef CONFIG_SMP
/* Do the "andnot" operation iff it's necessary. */
if (cpus > 1)
cpumask_andnot(possible_mask, possible_mask,
cpu_rq(cpu)->rd->span);
#endif /* CONFIG_SMP */
raw_spin_lock_irqsave(&dl_b->lock, flags);
if (new_bw < dl_b->total_bw)
if (new_bw * cpus < dl_b->total_bw)
ret = -EBUSY;
raw_spin_unlock_irqrestore(&dl_b->lock, flags);

rcu_read_unlock_sched();

if (ret)
@@ -2566,24 +2578,31 @@ void sched_dl_do_global(void)
struct dl_bw *dl_b;
int cpu;
unsigned long flags;
#ifdef CONFIG_SMP
cpumask_t *possible_mask = NULL;
#endif /* CONFIG_SMP */

def_dl_bandwidth.dl_period = global_rt_period();
def_dl_bandwidth.dl_runtime = global_rt_runtime();

if (global_rt_runtime() != RUNTIME_INF)
new_bw = to_ratio(global_rt_period(), global_rt_runtime());

/*
* FIXME: As above...
*/
for_each_possible_cpu(cpu) {
#ifdef CONFIG_SMP
possible_mask = &dl_local_possible_mask;
cpumask_copy(possible_mask, cpu_possible_mask);
#endif /* CONFIG_SMP */
for_each_cpu(cpu, possible_mask) {
rcu_read_lock_sched();
dl_b = dl_bw_of(cpu);

raw_spin_lock_irqsave(&dl_b->lock, flags);
dl_b->bw = new_bw;
raw_spin_unlock_irqrestore(&dl_b->lock, flags);

#ifdef CONFIG_SMP
cpumask_andnot(possible_mask, possible_mask,
cpu_rq(cpu)->rd->span);
#endif /* CONFIG_SMP */
rcu_read_unlock_sched();
init_dl_rq_bw_ratio(&cpu_rq(cpu)->dl);
}
@@ -258,9 +258,9 @@ struct rt_bandwidth {
void __dl_clear_params(struct task_struct *p);

/*
* To keep the bandwidth of -deadline tasks and groups under control
* To keep the bandwidth of -deadline tasks under control
* we need some place where:
* - store the maximum -deadline bandwidth of the system (the group);
* - store the maximum -deadline bandwidth of each root domain;
* - cache the fraction of that bandwidth that is currently allocated.
*
* This is all done in the data structure below. It is similar to the
@@ -269,17 +269,10 @@ void __dl_clear_params(struct task_struct *p);
* do not decrease any runtime while the group "executes", neither we
* need a timer to replenish it.
*
* With respect to SMP, the bandwidth is given on a per-CPU basis,
* With respect to SMP, the bandwidth is given on a per root domain basis,
* meaning that:
* - dl_bw (< 100%) is the bandwidth of the system (group) on each CPU;
* - dl_total_bw array contains, in the i-eth element, the currently
* allocated bandwidth on the i-eth CPU.
* Moreover, groups consume bandwidth on each CPU, while tasks only
* consume bandwidth on the CPU they're running on.
* Finally, dl_total_bw_cpu is used to cache the index of dl_total_bw
* that will be shown the next time the proc or cgroup controls will
* be red. It on its turn can be changed by writing on its own
* control.
* - bw (< 100%) is the bandwidth of the system on each CPU;
* - total_bw is the currently allocated bandwidth on each root domain.
*/
struct dl_bandwidth {
raw_spinlock_t dl_runtime_lock;

0 comments on commit 34a03e4

Please sign in to comment.