Skip to content
This repository has been archived by the owner on Jul 7, 2021. It is now read-only.

Commit

Permalink
treewide: Tentatively roll back experimental QoS/cpuidle patches
Browse files Browse the repository at this point in the history
At best, idle drain has been wildly inconsistent since the
inclusion of these patches, at worst it has tanked severely. While
there may be some benign improvements amongst this patchset not
directly responsible for these results, my faith in them has been
shaken to the point that I just assume reset them out of existence
rather than risk regression over a net neutral gain.

This reverts the following commits:
1ddf5cb ("ARM64: dts: remove pm qos active latency override")
cfde209 ("drivers: use raw bitwise operations for pm_qos cpumasks")
84084a6 ("cpuidle: Optimize pm_qos notifier callback and IPI semantics")
032624e ("arm64: Allow IPI_WAKEUP to be used outside of the ACPI parking protocol")
702d288 ("qos: Don't disable interrupts while holding pm_qos_lock")
316b8c3 ("qos: Replace expensive cpumask usage with raw bitwise operations")
d4e9fa8 ("cpuidle: lpm-levels: Allow exit latencies equal to target latencies")
e351bc2 ("msm: kgsl: Relax CPU latency requirements to save power")
b6866ca ("scsi: ufs: Only apply pm_qos to the CPU servicing UFS interrupts")
75a0962 ("scsi: ufs: Remove 10 ms CPU idle latency unvote timeout")

Signed-off-by: Adam W. Willis <return.of.octobot@gmail.com>
  • Loading branch information
0ctobot committed Jun 27, 2020
1 parent 5164fcf commit 71a9eca
Show file tree
Hide file tree
Showing 13 changed files with 102 additions and 84 deletions.
2 changes: 2 additions & 0 deletions arch/arm64/boot/dts/qcom/sm8150-gpu.dtsi
Expand Up @@ -100,6 +100,8 @@

tzone-names = "gpuss-0-usr", "gpuss-1-usr";

qcom,pm-qos-active-latency = <44>;

clocks = <&clock_gpucc GPU_CC_CXO_CLK>,
<&clock_gcc GCC_DDRSS_GPU_AXI_CLK>,
<&clock_gcc GCC_GPU_MEMNOC_GFX_CLK>,
Expand Down
7 changes: 7 additions & 0 deletions arch/arm64/include/asm/smp.h
Expand Up @@ -95,7 +95,14 @@ extern void secondary_entry(void);
extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);

#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask);
#else
static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
{
BUILD_BUG();
}
#endif

extern int __cpu_disable(void);

Expand Down
7 changes: 7 additions & 0 deletions arch/arm64/kernel/smp.c
Expand Up @@ -794,10 +794,12 @@ void arch_send_call_function_single_ipi(int cpu)
smp_cross_call_common(cpumask_of(cpu), IPI_CALL_FUNC);
}

#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
{
smp_cross_call_common(mask, IPI_WAKEUP);
}
#endif

#ifdef CONFIG_IRQ_WORK
void arch_irq_work_raise(void)
Expand Down Expand Up @@ -915,8 +917,13 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
break;
#endif

#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
case IPI_WAKEUP:
WARN_ONCE(!acpi_parking_protocol_valid(cpu),
"CPU%u: Wake-up IPI outside the ACPI parking protocol\n",
cpu);
break;
#endif

default:
pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr);
Expand Down
2 changes: 1 addition & 1 deletion drivers/char/adsprpc.c
Expand Up @@ -3470,7 +3470,7 @@ static int fastrpc_internal_control(struct fastrpc_file *fl,
if (err)
goto bail;
fl->pm_qos_req.type = PM_QOS_REQ_AFFINE_CORES;
atomic_set(&fl->pm_qos_req.cpus_affine, *cpumask_bits(cpu_lp_mask));
cpumask_copy(&fl->pm_qos_req.cpus_affine, cpu_lp_mask);
if (!fl->qos_request) {
pm_qos_add_request(&fl->pm_qos_req,
PM_QOS_CPU_DMA_LATENCY, latency);
Expand Down
43 changes: 15 additions & 28 deletions drivers/cpuidle/cpuidle.c
Expand Up @@ -37,27 +37,6 @@ static int enabled_devices;
static int off __read_mostly;
static int initialized __read_mostly;

#ifdef CONFIG_SMP
static atomic_t idled = ATOMIC_INIT(0);

#if NR_CPUS > 32
#error idled CPU mask not big enough for NR_CPUS
#endif

static void cpuidle_set_idle_cpu(unsigned int cpu)
{
atomic_or(BIT(cpu), &idled);
}

static void cpuidle_clear_idle_cpu(unsigned int cpu)
{
atomic_andnot(BIT(cpu), &idled);
}
#else
static inline void cpuidle_set_idle_cpu(unsigned int cpu) { }
static inline void cpuidle_clear_idle_cpu(unsigned int cpu) { }
#endif

int cpuidle_disabled(void)
{
return off;
Expand Down Expand Up @@ -240,9 +219,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
time_start = ns_to_ktime(local_clock());

stop_critical_timings();
cpuidle_set_idle_cpu(dev->cpu);
entered_state = target_state->enter(dev, drv, index);
cpuidle_clear_idle_cpu(dev->cpu);
start_critical_timings();

sched_clock_idle_wakeup_event();
Expand Down Expand Up @@ -666,12 +643,22 @@ EXPORT_SYMBOL_GPL(cpuidle_register);

static void wake_up_idle_cpus(void *v)
{
unsigned long cpus = atomic_read(&idled) & *cpumask_bits(to_cpumask(v));
int cpu;
struct cpumask cpus;

/* Use READ_ONCE to get the isolated mask outside cpu_add_remove_lock */
cpus &= ~READ_ONCE(*cpumask_bits(cpu_isolated_mask));
if (cpus)
arch_send_wakeup_ipi_mask(to_cpumask(&cpus));
preempt_disable();
if (v) {
cpumask_andnot(&cpus, v, cpu_isolated_mask);
cpumask_and(&cpus, &cpus, cpu_online_mask);
} else
cpumask_andnot(&cpus, cpu_online_mask, cpu_isolated_mask);

for_each_cpu(cpu, &cpus) {
if (cpu == smp_processor_id())
continue;
wake_up_if_idle(cpu);
}
preempt_enable();
}

/*
Expand Down
4 changes: 2 additions & 2 deletions drivers/cpuidle/lpm-levels.c
Expand Up @@ -674,7 +674,7 @@ static int cpu_power_select(struct cpuidle_device *dev,
min_residency = pwr_params->min_residency;
max_residency = pwr_params->max_residency;

if (latency_us <= lvl_latency_us)
if (latency_us < lvl_latency_us)
break;

if (next_event_us) {
Expand Down Expand Up @@ -1018,7 +1018,7 @@ static int cluster_select(struct lpm_cluster *cluster, bool from_idle,
&level->num_cpu_votes))
continue;

if (from_idle && latency_us <= pwr_params->exit_latency)
if (from_idle && latency_us < pwr_params->exit_latency)
break;

if (sleep_us < (pwr_params->exit_latency +
Expand Down
7 changes: 6 additions & 1 deletion drivers/gpu/drm/msm/sde/sde_encoder.c
Expand Up @@ -308,6 +308,7 @@ static void _sde_encoder_pm_qos_add_request(struct drm_encoder *drm_enc,
struct pm_qos_request *req;
u32 cpu_mask;
u32 cpu_dma_latency;
int cpu;

if (!sde_kms->catalog || !sde_kms->catalog->perf.cpu_mask)
return;
Expand All @@ -317,7 +318,11 @@ static void _sde_encoder_pm_qos_add_request(struct drm_encoder *drm_enc,

req = &sde_enc->pm_qos_cpu_req;
req->type = PM_QOS_REQ_AFFINE_CORES;
atomic_set(&req->cpus_affine, cpu_mask);
cpumask_empty(&req->cpus_affine);
for_each_possible_cpu(cpu) {
if ((1 << cpu) & cpu_mask)
cpumask_set_cpu(cpu, &req->cpus_affine);
}
pm_qos_add_request(req, PM_QOS_CPU_DMA_LATENCY, cpu_dma_latency);

SDE_EVT32_VERBOSE(DRMID(drm_enc), cpu_mask, cpu_dma_latency);
Expand Down
4 changes: 2 additions & 2 deletions drivers/gpu/msm/adreno.c
Expand Up @@ -1004,12 +1004,12 @@ static int adreno_of_get_power(struct adreno_device *adreno_dev,
/* get pm-qos-active-latency, set it to default if not found */
if (of_property_read_u32(node, "qcom,pm-qos-active-latency",
&device->pwrctrl.pm_qos_active_latency))
device->pwrctrl.pm_qos_active_latency = 1000;
device->pwrctrl.pm_qos_active_latency = 501;

/* get pm-qos-wakeup-latency, set it to default if not found */
if (of_property_read_u32(node, "qcom,pm-qos-wakeup-latency",
&device->pwrctrl.pm_qos_wakeup_latency))
device->pwrctrl.pm_qos_wakeup_latency = 100;
device->pwrctrl.pm_qos_wakeup_latency = 101;

if (of_property_read_u32(node, "qcom,idle-timeout", &timeout))
timeout = 80;
Expand Down
7 changes: 6 additions & 1 deletion drivers/media/platform/msm/sde/rotator/sde_rotator_dev.c
Expand Up @@ -1309,6 +1309,7 @@ void sde_rotator_pm_qos_add(struct sde_rot_data_type *rot_mdata)
{
struct pm_qos_request *req;
u32 cpu_mask;
int cpu;

if (!rot_mdata) {
SDEROT_DBG("invalid rot device or context\n");
Expand All @@ -1322,7 +1323,11 @@ void sde_rotator_pm_qos_add(struct sde_rot_data_type *rot_mdata)

req = &rot_mdata->pm_qos_rot_cpu_req;
req->type = PM_QOS_REQ_AFFINE_CORES;
atomic_set(&req->cpus_affine, cpu_mask);
cpumask_empty(&req->cpus_affine);
for_each_possible_cpu(cpu) {
if ((1 << cpu) & cpu_mask)
cpumask_set_cpu(cpu, &req->cpus_affine);
}
pm_qos_add_request(req, PM_QOS_CPU_DMA_LATENCY,
PM_QOS_DEFAULT_VALUE);

Expand Down
10 changes: 7 additions & 3 deletions drivers/scsi/ufs/ufs-qcom.c
Expand Up @@ -35,6 +35,8 @@
#define MAX_PROP_SIZE 32
#define VDDP_REF_CLK_MIN_UV 1200000
#define VDDP_REF_CLK_MAX_UV 1200000
/* TODO: further tuning for this parameter may be required */
#define UFS_QCOM_PM_QOS_UNVOTE_TIMEOUT_US (10000) /* microseconds */

#define UFS_QCOM_DEFAULT_DBG_PRINT_EN \
(UFS_QCOM_DBG_PRINT_REGS_EN | UFS_QCOM_DBG_PRINT_TEST_BUS_EN)
Expand Down Expand Up @@ -1782,7 +1784,8 @@ static void ufs_qcom_pm_qos_unvote_work(struct work_struct *work)
group->state = PM_QOS_UNVOTED;
spin_unlock_irqrestore(host->hba->host->host_lock, flags);

pm_qos_update_request(&group->req, PM_QOS_DEFAULT_VALUE);
pm_qos_update_request_timeout(&group->req,
group->latency_us, UFS_QCOM_PM_QOS_UNVOTE_TIMEOUT_US);
}

static ssize_t ufs_qcom_pm_qos_enable_show(struct device *dev,
Expand Down Expand Up @@ -1950,8 +1953,9 @@ static int ufs_qcom_pm_qos_init(struct ufs_qcom_host *host)
if (ret)
goto free_groups;

host->pm_qos.groups[i].req.type = PM_QOS_REQ_AFFINE_IRQ;
host->pm_qos.groups[i].req.irq = host->hba->irq;
host->pm_qos.groups[i].req.type = PM_QOS_REQ_AFFINE_CORES;
host->pm_qos.groups[i].req.cpus_affine =
host->pm_qos.groups[i].mask;
host->pm_qos.groups[i].state = PM_QOS_UNVOTED;
host->pm_qos.groups[i].active_reqs = 0;
host->pm_qos.groups[i].host = host;
Expand Down
2 changes: 1 addition & 1 deletion include/linux/pm_qos.h
Expand Up @@ -73,7 +73,7 @@ enum pm_qos_req_type {

struct pm_qos_request {
enum pm_qos_req_type type;
atomic_t cpus_affine;
struct cpumask cpus_affine;
#ifdef CONFIG_SMP
uint32_t irq;
/* Internal structure members */
Expand Down

0 comments on commit 71a9eca

Please sign in to comment.