From 8f63aa45b97b41803cf137ce56b59abc90482d3d Mon Sep 17 00:00:00 2001 From: Qubitium Date: Wed, 15 Oct 2025 04:49:10 +0000 Subject: [PATCH] make act_group_aware default true Signed-off-by: Qubitium --- gptqmodel/looper/gptq_processor.py | 9 ++++- gptqmodel/looper/qqq_processor.py | 9 ++++- gptqmodel/quantization/config.py | 50 +++++++++++++++++++++++- gptqmodel/quantization/gptq.py | 6 ++- tests/test_quantize_config_activation.py | 28 +++++++++++++ 5 files changed, 96 insertions(+), 6 deletions(-) create mode 100644 tests/test_quantize_config_activation.py diff --git a/gptqmodel/looper/gptq_processor.py b/gptqmodel/looper/gptq_processor.py index a069618d9..d63a3d62c 100644 --- a/gptqmodel/looper/gptq_processor.py +++ b/gptqmodel/looper/gptq_processor.py @@ -75,12 +75,19 @@ def preprocess(self, module: NamedModule, fail_safe: bool): qcfg_clone.mse = self.qcfg.dynamic_get(module.full_name, "mse", qcfg_clone.mse) qcfg_clone.group_size = self.qcfg.dynamic_get(module.full_name, "group_size", qcfg_clone.group_size) - qcfg_clone.desc_act = self.qcfg.dynamic_get(module.full_name, "desc_act", qcfg_clone.desc_act) + desc_act_override = self.qcfg.dynamic_get(module.full_name, "desc_act", None) + if desc_act_override is not None: + qcfg_clone.desc_act = desc_act_override + act_group_aware_override = self.qcfg.dynamic_get(module.full_name, "act_group_aware", None) + if act_group_aware_override is not None: + qcfg_clone.act_group_aware = act_group_aware_override qcfg_clone.damp_percent = self.qcfg.dynamic_get(module.full_name, "damp_percent", qcfg_clone.damp_percent) qcfg_clone.static_groups = self.qcfg.dynamic_get(module.full_name, "static_groups", qcfg_clone.static_groups) qcfg_clone.v2 = self.qcfg.dynamic_get(module.full_name, "v2", qcfg_clone.v2) qcfg_clone.v2_alpha = self.qcfg.dynamic_get(module.full_name, "v2_alpha", qcfg_clone.v2_alpha) + qcfg_clone._resolve_activation_ordering(desc_act_override, act_group_aware_override) + # store last used qcfg_dynamic self.qcfg_dynamic = qcfg_clone diff --git a/gptqmodel/looper/qqq_processor.py b/gptqmodel/looper/qqq_processor.py index 53d958248..ecbcd2489 100644 --- a/gptqmodel/looper/qqq_processor.py +++ b/gptqmodel/looper/qqq_processor.py @@ -69,10 +69,17 @@ def preprocess(self, module: NamedModule): qcfg_clone.mse = self.qcfg.dynamic_get(module.full_name, "mse", qcfg_clone.mse) qcfg_clone.group_size = self.qcfg.dynamic_get(module.full_name, "group_size", qcfg_clone.group_size) - qcfg_clone.desc_act = self.qcfg.dynamic_get(module.full_name, "desc_act", qcfg_clone.desc_act) + desc_act_override = self.qcfg.dynamic_get(module.full_name, "desc_act", None) + if desc_act_override is not None: + qcfg_clone.desc_act = desc_act_override + act_group_aware_override = self.qcfg.dynamic_get(module.full_name, "act_group_aware", None) + if act_group_aware_override is not None: + qcfg_clone.act_group_aware = act_group_aware_override qcfg_clone.damp_percent = self.qcfg.dynamic_get(module.full_name, "damp_percent", qcfg_clone.damp_percent) qcfg_clone.static_groups = self.qcfg.dynamic_get(module.full_name, "static_groups", qcfg_clone.static_groups) + qcfg_clone._resolve_activation_ordering(desc_act_override, act_group_aware_override) + tmp = QQQ(module=module, qcfg=qcfg_clone) if self.qcfg.mse > 0.0: diff --git a/gptqmodel/quantization/config.py b/gptqmodel/quantization/config.py index 708f71593..368b81df7 100644 --- a/gptqmodel/quantization/config.py +++ b/gptqmodel/quantization/config.py @@ -163,8 +163,8 @@ class QuantizeConfig(): damp_percent: float = field(default=None) damp_auto_increment: float = field(default=None) - desc_act: bool = field(default=True) - act_group_aware: bool = field(default=False) + desc_act: Optional[bool] = field(default=None) + act_group_aware: Optional[bool] = field(default=None) static_groups: bool = field(default=False) sym: bool = field(default=True) true_sequential: bool = field(default=True) @@ -321,6 +321,28 @@ def __post_init__(self): if self.hessian_chunk_bytes <= 0: raise ValueError("QuantizeConfig: `hessian_chunk_bytes` must be a positive integer amount of bytes.") + # resolve activation ordering compatibility and defaults + desc_act_user_value = self.desc_act + act_group_aware_user_value = self.act_group_aware + + if desc_act_user_value is None: + # GPTQ defaults to higher quality ordering disabled, others retain legacy default + self.desc_act = False if self.quant_method == METHOD.GPTQ else True + elif isinstance(desc_act_user_value, bool): + self.desc_act = desc_act_user_value + else: + self.desc_act = bool(desc_act_user_value) + + if act_group_aware_user_value is None: + # auto-enable for GPTQ unless user explicitly disables it + self.act_group_aware = self.quant_method == METHOD.GPTQ + elif isinstance(act_group_aware_user_value, bool): + self.act_group_aware = act_group_aware_user_value + else: + self.act_group_aware = bool(act_group_aware_user_value) + + self._resolve_activation_ordering(desc_act_user_value, act_group_aware_user_value) + # validate hybrid act order if self.act_group_aware and self.desc_act: raise ValueError("QuantizeConfig:: `act_group_aware` == `True` requires `desc_act` == `False`.") @@ -352,6 +374,30 @@ def extension_set(self, key: str, value: Any): self.adapter[key.lower()] = value + def _resolve_activation_ordering( + self, + desc_act_user_value: Optional[bool], + act_group_aware_user_value: Optional[bool], + ) -> None: + """Normalize defaults and enforce compatibility between desc_act and act_group_aware.""" + + desc_act_enabled_by_user = bool(desc_act_user_value) if desc_act_user_value is not None else False + act_group_aware_enabled_by_user = ( + bool(act_group_aware_user_value) if act_group_aware_user_value is not None else False + ) + + if desc_act_enabled_by_user and act_group_aware_user_value is not None and act_group_aware_enabled_by_user: + raise ValueError( + "QuantizeConfig:: `act_group_aware` == `True` requires `desc_act` == `False` when both are explicitly set." + ) + + if desc_act_enabled_by_user and act_group_aware_user_value is None and self.act_group_aware: + log.warn( + "QuantizeConfig: `desc_act=True` automatically disables `act_group_aware`. " + "Set `act_group_aware=False` explicitly to silence this warning." + ) + self.act_group_aware = False + def extension_get(self, key: str) -> Any: return self.adapter.get(key.lower()) if self.adapter else None diff --git a/gptqmodel/quantization/gptq.py b/gptqmodel/quantization/gptq.py index ef8a957d1..6fbc37b74 100644 --- a/gptqmodel/quantization/gptq.py +++ b/gptqmodel/quantization/gptq.py @@ -448,13 +448,15 @@ def hf_quantize( group_size=-1, actorder=False, static_groups=False, - act_group_aware=False, + act_group_aware: Optional[bool] = None, ): self.qcfg.group_size = group_size self.qcfg.damp_percent = percdamp self.qcfg.damp_auto_increment = damp_auto_increment self.qcfg.desc_act = actorder - self.qcfg.act_group_aware = act_group_aware + if act_group_aware is not None: + self.qcfg.act_group_aware = act_group_aware + self.qcfg._resolve_activation_ordering(actorder, act_group_aware) self.qcfg.static_groups = static_groups (Q, scale, zero, g_idx, duration, avg_loss, damp_percent, nsamples) = self.quantize(blocksize=blocksize) self.module.weight.data = Q diff --git a/tests/test_quantize_config_activation.py b/tests/test_quantize_config_activation.py new file mode 100644 index 000000000..1e2b27123 --- /dev/null +++ b/tests/test_quantize_config_activation.py @@ -0,0 +1,28 @@ +# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai +# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai +# SPDX-License-Identifier: Apache-2.0 +# Contact: qubitium@modelcloud.ai, x.com/qubitium + +import pytest + +from gptqmodel.quantization import METHOD, QuantizeConfig + + +def test_act_group_aware_enabled_by_default_for_gptq(): + cfg = QuantizeConfig() + assert cfg.quant_method == METHOD.GPTQ + assert cfg.act_group_aware is True + assert cfg.desc_act is False + + +def test_desc_act_enabling_auto_disables_act_group_aware(capfd): + cfg = QuantizeConfig(desc_act=True) + captured = capfd.readouterr() + assert cfg.act_group_aware is False + combined_output = f"{captured.out}\n{captured.err}".lower() + assert "automatically disables" in combined_output + + +def test_explicit_desc_act_and_act_group_aware_raises(): + with pytest.raises(ValueError, match="act_group_aware"): # partial match + QuantizeConfig(desc_act=True, act_group_aware=True)