Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion gptqmodel/looper/gptq_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,19 @@ def preprocess(self, module: NamedModule, fail_safe: bool):
qcfg_clone.mse = self.qcfg.dynamic_get(module.full_name, "mse", qcfg_clone.mse)

qcfg_clone.group_size = self.qcfg.dynamic_get(module.full_name, "group_size", qcfg_clone.group_size)
qcfg_clone.desc_act = self.qcfg.dynamic_get(module.full_name, "desc_act", qcfg_clone.desc_act)
desc_act_override = self.qcfg.dynamic_get(module.full_name, "desc_act", None)
if desc_act_override is not None:
qcfg_clone.desc_act = desc_act_override
act_group_aware_override = self.qcfg.dynamic_get(module.full_name, "act_group_aware", None)
if act_group_aware_override is not None:
qcfg_clone.act_group_aware = act_group_aware_override
qcfg_clone.damp_percent = self.qcfg.dynamic_get(module.full_name, "damp_percent", qcfg_clone.damp_percent)
qcfg_clone.static_groups = self.qcfg.dynamic_get(module.full_name, "static_groups", qcfg_clone.static_groups)
qcfg_clone.v2 = self.qcfg.dynamic_get(module.full_name, "v2", qcfg_clone.v2)
qcfg_clone.v2_alpha = self.qcfg.dynamic_get(module.full_name, "v2_alpha", qcfg_clone.v2_alpha)

qcfg_clone._resolve_activation_ordering(desc_act_override, act_group_aware_override)

# store last used qcfg_dynamic
self.qcfg_dynamic = qcfg_clone

Expand Down
9 changes: 8 additions & 1 deletion gptqmodel/looper/qqq_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,17 @@ def preprocess(self, module: NamedModule):
qcfg_clone.mse = self.qcfg.dynamic_get(module.full_name, "mse", qcfg_clone.mse)

qcfg_clone.group_size = self.qcfg.dynamic_get(module.full_name, "group_size", qcfg_clone.group_size)
qcfg_clone.desc_act = self.qcfg.dynamic_get(module.full_name, "desc_act", qcfg_clone.desc_act)
desc_act_override = self.qcfg.dynamic_get(module.full_name, "desc_act", None)
if desc_act_override is not None:
qcfg_clone.desc_act = desc_act_override
act_group_aware_override = self.qcfg.dynamic_get(module.full_name, "act_group_aware", None)
if act_group_aware_override is not None:
qcfg_clone.act_group_aware = act_group_aware_override
qcfg_clone.damp_percent = self.qcfg.dynamic_get(module.full_name, "damp_percent", qcfg_clone.damp_percent)
qcfg_clone.static_groups = self.qcfg.dynamic_get(module.full_name, "static_groups", qcfg_clone.static_groups)

qcfg_clone._resolve_activation_ordering(desc_act_override, act_group_aware_override)

tmp = QQQ(module=module, qcfg=qcfg_clone)

if self.qcfg.mse > 0.0:
Expand Down
50 changes: 48 additions & 2 deletions gptqmodel/quantization/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,8 @@ class QuantizeConfig():
damp_percent: float = field(default=None)
damp_auto_increment: float = field(default=None)

desc_act: bool = field(default=True)
act_group_aware: bool = field(default=False)
desc_act: Optional[bool] = field(default=None)
act_group_aware: Optional[bool] = field(default=None)
static_groups: bool = field(default=False)
sym: bool = field(default=True)
true_sequential: bool = field(default=True)
Expand Down Expand Up @@ -321,6 +321,28 @@ def __post_init__(self):
if self.hessian_chunk_bytes <= 0:
raise ValueError("QuantizeConfig: `hessian_chunk_bytes` must be a positive integer amount of bytes.")

# resolve activation ordering compatibility and defaults
desc_act_user_value = self.desc_act
act_group_aware_user_value = self.act_group_aware

if desc_act_user_value is None:
# GPTQ defaults to higher quality ordering disabled, others retain legacy default
self.desc_act = False if self.quant_method == METHOD.GPTQ else True
elif isinstance(desc_act_user_value, bool):
self.desc_act = desc_act_user_value
else:
self.desc_act = bool(desc_act_user_value)

if act_group_aware_user_value is None:
# auto-enable for GPTQ unless user explicitly disables it
self.act_group_aware = self.quant_method == METHOD.GPTQ
elif isinstance(act_group_aware_user_value, bool):
self.act_group_aware = act_group_aware_user_value
else:
self.act_group_aware = bool(act_group_aware_user_value)

self._resolve_activation_ordering(desc_act_user_value, act_group_aware_user_value)

# validate hybrid act order
if self.act_group_aware and self.desc_act:
raise ValueError("QuantizeConfig:: `act_group_aware` == `True` requires `desc_act` == `False`.")
Expand Down Expand Up @@ -352,6 +374,30 @@ def extension_set(self, key: str, value: Any):

self.adapter[key.lower()] = value

def _resolve_activation_ordering(
self,
desc_act_user_value: Optional[bool],
act_group_aware_user_value: Optional[bool],
) -> None:
"""Normalize defaults and enforce compatibility between desc_act and act_group_aware."""

desc_act_enabled_by_user = bool(desc_act_user_value) if desc_act_user_value is not None else False
act_group_aware_enabled_by_user = (
bool(act_group_aware_user_value) if act_group_aware_user_value is not None else False
)

if desc_act_enabled_by_user and act_group_aware_user_value is not None and act_group_aware_enabled_by_user:
raise ValueError(
"QuantizeConfig:: `act_group_aware` == `True` requires `desc_act` == `False` when both are explicitly set."
)

if desc_act_enabled_by_user and act_group_aware_user_value is None and self.act_group_aware:
log.warn(
"QuantizeConfig: `desc_act=True` automatically disables `act_group_aware`. "
"Set `act_group_aware=False` explicitly to silence this warning."
)
self.act_group_aware = False

def extension_get(self, key: str) -> Any:
return self.adapter.get(key.lower()) if self.adapter else None

Expand Down
6 changes: 4 additions & 2 deletions gptqmodel/quantization/gptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,13 +448,15 @@ def hf_quantize(
group_size=-1,
actorder=False,
static_groups=False,
act_group_aware=False,
act_group_aware: Optional[bool] = None,
):
self.qcfg.group_size = group_size
self.qcfg.damp_percent = percdamp
self.qcfg.damp_auto_increment = damp_auto_increment
self.qcfg.desc_act = actorder
self.qcfg.act_group_aware = act_group_aware
if act_group_aware is not None:
self.qcfg.act_group_aware = act_group_aware
self.qcfg._resolve_activation_ordering(actorder, act_group_aware)
self.qcfg.static_groups = static_groups
(Q, scale, zero, g_idx, duration, avg_loss, damp_percent, nsamples) = self.quantize(blocksize=blocksize)
self.module.weight.data = Q
Expand Down
28 changes: 28 additions & 0 deletions tests/test_quantize_config_activation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai
# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai
# SPDX-License-Identifier: Apache-2.0
# Contact: qubitium@modelcloud.ai, x.com/qubitium

import pytest

from gptqmodel.quantization import METHOD, QuantizeConfig


def test_act_group_aware_enabled_by_default_for_gptq():
cfg = QuantizeConfig()
assert cfg.quant_method == METHOD.GPTQ
assert cfg.act_group_aware is True
assert cfg.desc_act is False


def test_desc_act_enabling_auto_disables_act_group_aware(capfd):
cfg = QuantizeConfig(desc_act=True)
captured = capfd.readouterr()
assert cfg.act_group_aware is False
combined_output = f"{captured.out}\n{captured.err}".lower()
assert "automatically disables" in combined_output


def test_explicit_desc_act_and_act_group_aware_raises():
with pytest.raises(ValueError, match="act_group_aware"): # partial match
QuantizeConfig(desc_act=True, act_group_aware=True)