treewide: Tentatively roll back experimental QoS/cpuidle patches

At best, idle drain has been wildly inconsistent since the inclusion of these patches, at worst it has tanked severely. While there may be some benign improvements amongst this patchset not directly responsible for these results, my faith in them has been shaken to the point that I just assume reset them out of existence rather than risk regression over a net neutral gain. This reverts the following commits: 1ddf5cb ("ARM64: dts: remove pm qos active latency override") cfde209 ("drivers: use raw bitwise operations for pm_qos cpumasks") 84084a6 ("cpuidle: Optimize pm_qos notifier callback and IPI semantics") 032624e ("arm64: Allow IPI_WAKEUP to be used outside of the ACPI parking protocol") 702d288 ("qos: Don't disable interrupts while holding pm_qos_lock") 316b8c3 ("qos: Replace expensive cpumask usage with raw bitwise operations") d4e9fa8 ("cpuidle: lpm-levels: Allow exit latencies equal to target latencies") e351bc2 ("msm: kgsl: Relax CPU latency requirements to save power") b6866ca ("scsi: ufs: Only apply pm_qos to the CPU servicing UFS interrupts") 75a0962 ("scsi: ufs: Remove 10 ms CPU idle latency unvote timeout") Signed-off-by: Adam W. Willis <return.of.octobot@gmail.com>
0ctobot · Jun 27, 2020 · 71a9eca · 71a9eca
1 parent 5164fcf
commit 71a9eca
Show file tree

Hide file tree

Showing 13 changed files with 102 additions and 84 deletions.
diff --git a/arch/arm64/boot/dts/qcom/sm8150-gpu.dtsi b/arch/arm64/boot/dts/qcom/sm8150-gpu.dtsi
@@ -100,6 +100,8 @@
 
 		tzone-names = "gpuss-0-usr", "gpuss-1-usr";
 
+		qcom,pm-qos-active-latency = <44>;
+
 		clocks = <&clock_gpucc GPU_CC_CXO_CLK>,
 			<&clock_gcc GCC_DDRSS_GPU_AXI_CLK>,
 			<&clock_gcc GCC_GPU_MEMNOC_GFX_CLK>,

diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
@@ -95,7 +95,14 @@ extern void secondary_entry(void);
 extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
 extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask);
+#else
+static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
+{
+	BUILD_BUG();
+}
+#endif
 
 extern int __cpu_disable(void);
 

diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
@@ -794,10 +794,12 @@ void arch_send_call_function_single_ipi(int cpu)
 	smp_cross_call_common(cpumask_of(cpu), IPI_CALL_FUNC);
 }
 
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
 void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
 {
 	smp_cross_call_common(mask, IPI_WAKEUP);
 }
+#endif
 
 #ifdef CONFIG_IRQ_WORK
 void arch_irq_work_raise(void)
@@ -915,8 +917,13 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
 		break;
 #endif
 
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
 	case IPI_WAKEUP:
+		WARN_ONCE(!acpi_parking_protocol_valid(cpu),
+			  "CPU%u: Wake-up IPI outside the ACPI parking protocol\n",
+			  cpu);
 		break;
+#endif
 
 	default:
 		pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr);

diff --git a/drivers/char/adsprpc.c b/drivers/char/adsprpc.c
@@ -3470,7 +3470,7 @@ static int fastrpc_internal_control(struct fastrpc_file *fl,
 		if (err)
 			goto bail;
 		fl->pm_qos_req.type = PM_QOS_REQ_AFFINE_CORES;
-		atomic_set(&fl->pm_qos_req.cpus_affine, *cpumask_bits(cpu_lp_mask));
+		cpumask_copy(&fl->pm_qos_req.cpus_affine, cpu_lp_mask);
 		if (!fl->qos_request) {
 			pm_qos_add_request(&fl->pm_qos_req,
 				PM_QOS_CPU_DMA_LATENCY, latency);

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
@@ -37,27 +37,6 @@ static int enabled_devices;
 static int off __read_mostly;
 static int initialized __read_mostly;
 
-#ifdef CONFIG_SMP
-static atomic_t idled = ATOMIC_INIT(0);
-
-#if NR_CPUS > 32
-#error idled CPU mask not big enough for NR_CPUS
-#endif
-
-static void cpuidle_set_idle_cpu(unsigned int cpu)
-{
-	atomic_or(BIT(cpu), &idled);
-}
-
-static void cpuidle_clear_idle_cpu(unsigned int cpu)
-{
-	atomic_andnot(BIT(cpu), &idled);
-}
-#else
-static inline void cpuidle_set_idle_cpu(unsigned int cpu) { }
-static inline void cpuidle_clear_idle_cpu(unsigned int cpu) { }
-#endif
-
 int cpuidle_disabled(void)
 {
 	return off;
@@ -240,9 +219,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 	time_start = ns_to_ktime(local_clock());
 
 	stop_critical_timings();
-	cpuidle_set_idle_cpu(dev->cpu);
 	entered_state = target_state->enter(dev, drv, index);
-	cpuidle_clear_idle_cpu(dev->cpu);
 	start_critical_timings();
 
 	sched_clock_idle_wakeup_event();
@@ -666,12 +643,22 @@ EXPORT_SYMBOL_GPL(cpuidle_register);
 
 static void wake_up_idle_cpus(void *v)
 {
-	unsigned long cpus = atomic_read(&idled) & *cpumask_bits(to_cpumask(v));
+	int cpu;
+	struct cpumask cpus;
 
-	/* Use READ_ONCE to get the isolated mask outside cpu_add_remove_lock */
-	cpus &= ~READ_ONCE(*cpumask_bits(cpu_isolated_mask));
-	if (cpus)
-		arch_send_wakeup_ipi_mask(to_cpumask(&cpus));
+	preempt_disable();
+	if (v) {
+		cpumask_andnot(&cpus, v, cpu_isolated_mask);
+		cpumask_and(&cpus, &cpus, cpu_online_mask);
+	} else
+		cpumask_andnot(&cpus, cpu_online_mask, cpu_isolated_mask);
+
+	for_each_cpu(cpu, &cpus) {
+		if (cpu == smp_processor_id())
+			continue;
+		wake_up_if_idle(cpu);
+	}
+	preempt_enable();
 }
 
 /*

diff --git a/drivers/cpuidle/lpm-levels.c b/drivers/cpuidle/lpm-levels.c
@@ -674,7 +674,7 @@ static int cpu_power_select(struct cpuidle_device *dev,
 		min_residency = pwr_params->min_residency;
 		max_residency = pwr_params->max_residency;
 
-		if (latency_us <= lvl_latency_us)
+		if (latency_us < lvl_latency_us)
 			break;
 
 		if (next_event_us) {
@@ -1018,7 +1018,7 @@ static int cluster_select(struct lpm_cluster *cluster, bool from_idle,
 					&level->num_cpu_votes))
 			continue;
 
-		if (from_idle && latency_us <= pwr_params->exit_latency)
+		if (from_idle && latency_us < pwr_params->exit_latency)
 			break;
 
 		if (sleep_us < (pwr_params->exit_latency +

diff --git a/drivers/gpu/drm/msm/sde/sde_encoder.c b/drivers/gpu/drm/msm/sde/sde_encoder.c
@@ -308,6 +308,7 @@ static void _sde_encoder_pm_qos_add_request(struct drm_encoder *drm_enc,
 	struct pm_qos_request *req;
 	u32 cpu_mask;
 	u32 cpu_dma_latency;
+	int cpu;
 
 	if (!sde_kms->catalog || !sde_kms->catalog->perf.cpu_mask)
 		return;
@@ -317,7 +318,11 @@ static void _sde_encoder_pm_qos_add_request(struct drm_encoder *drm_enc,
 
 	req = &sde_enc->pm_qos_cpu_req;
 	req->type = PM_QOS_REQ_AFFINE_CORES;
-	atomic_set(&req->cpus_affine, cpu_mask);
+	cpumask_empty(&req->cpus_affine);
+	for_each_possible_cpu(cpu) {
+		if ((1 << cpu) & cpu_mask)
+			cpumask_set_cpu(cpu, &req->cpus_affine);
+	}
 	pm_qos_add_request(req, PM_QOS_CPU_DMA_LATENCY, cpu_dma_latency);
 
 	SDE_EVT32_VERBOSE(DRMID(drm_enc), cpu_mask, cpu_dma_latency);

diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
@@ -1004,12 +1004,12 @@ static int adreno_of_get_power(struct adreno_device *adreno_dev,
 	/* get pm-qos-active-latency, set it to default if not found */
 	if (of_property_read_u32(node, "qcom,pm-qos-active-latency",
 		&device->pwrctrl.pm_qos_active_latency))
-		device->pwrctrl.pm_qos_active_latency = 1000;
+		device->pwrctrl.pm_qos_active_latency = 501;
 
 	/* get pm-qos-wakeup-latency, set it to default if not found */
 	if (of_property_read_u32(node, "qcom,pm-qos-wakeup-latency",
 		&device->pwrctrl.pm_qos_wakeup_latency))
-		device->pwrctrl.pm_qos_wakeup_latency = 100;
+		device->pwrctrl.pm_qos_wakeup_latency = 101;
 
 	if (of_property_read_u32(node, "qcom,idle-timeout", &timeout))
 		timeout = 80;

diff --git a/drivers/media/platform/msm/sde/rotator/sde_rotator_dev.c b/drivers/media/platform/msm/sde/rotator/sde_rotator_dev.c
@@ -1309,6 +1309,7 @@ void sde_rotator_pm_qos_add(struct sde_rot_data_type *rot_mdata)
 {
 	struct pm_qos_request *req;
 	u32 cpu_mask;
+	int cpu;
 
 	if (!rot_mdata) {
 		SDEROT_DBG("invalid rot device or context\n");
@@ -1322,7 +1323,11 @@ void sde_rotator_pm_qos_add(struct sde_rot_data_type *rot_mdata)
 
 	req = &rot_mdata->pm_qos_rot_cpu_req;
 	req->type = PM_QOS_REQ_AFFINE_CORES;
-	atomic_set(&req->cpus_affine, cpu_mask);
+	cpumask_empty(&req->cpus_affine);
+	for_each_possible_cpu(cpu) {
+		if ((1 << cpu) & cpu_mask)
+			cpumask_set_cpu(cpu, &req->cpus_affine);
+	}
 	pm_qos_add_request(req, PM_QOS_CPU_DMA_LATENCY,
 		PM_QOS_DEFAULT_VALUE);
 

diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c
@@ -35,6 +35,8 @@
 #define MAX_PROP_SIZE		   32
 #define VDDP_REF_CLK_MIN_UV        1200000
 #define VDDP_REF_CLK_MAX_UV        1200000
+/* TODO: further tuning for this parameter may be required */
+#define UFS_QCOM_PM_QOS_UNVOTE_TIMEOUT_US	(10000) /* microseconds */
 
 #define UFS_QCOM_DEFAULT_DBG_PRINT_EN	\
 	(UFS_QCOM_DBG_PRINT_REGS_EN | UFS_QCOM_DBG_PRINT_TEST_BUS_EN)
@@ -1782,7 +1784,8 @@ static void ufs_qcom_pm_qos_unvote_work(struct work_struct *work)
 	group->state = PM_QOS_UNVOTED;
 	spin_unlock_irqrestore(host->hba->host->host_lock, flags);
 
-	pm_qos_update_request(&group->req, PM_QOS_DEFAULT_VALUE);
+	pm_qos_update_request_timeout(&group->req,
+		group->latency_us, UFS_QCOM_PM_QOS_UNVOTE_TIMEOUT_US);
 }
 
 static ssize_t ufs_qcom_pm_qos_enable_show(struct device *dev,
@@ -1950,8 +1953,9 @@ static int ufs_qcom_pm_qos_init(struct ufs_qcom_host *host)
 		if (ret)
 			goto free_groups;
 
-		host->pm_qos.groups[i].req.type = PM_QOS_REQ_AFFINE_IRQ;
-		host->pm_qos.groups[i].req.irq = host->hba->irq;
+		host->pm_qos.groups[i].req.type = PM_QOS_REQ_AFFINE_CORES;
+		host->pm_qos.groups[i].req.cpus_affine =
+			host->pm_qos.groups[i].mask;
 		host->pm_qos.groups[i].state = PM_QOS_UNVOTED;
 		host->pm_qos.groups[i].active_reqs = 0;
 		host->pm_qos.groups[i].host = host;

diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
@@ -73,7 +73,7 @@ enum pm_qos_req_type {
 
 struct pm_qos_request {
 	enum pm_qos_req_type type;
-	atomic_t cpus_affine;
+	struct cpumask cpus_affine;
 #ifdef CONFIG_SMP
 	uint32_t irq;
 	/* Internal structure members */