We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent c0d7622 commit 9e54a65Copy full SHA for 9e54a65
vllm/model_executor/models/qwen3_next.py
@@ -148,9 +148,11 @@ def __init__(
148
149
def _maybe_ignore_quant_config(self, quant_config: QuantizationConfig):
150
# GPTQ configs do not have a list of ignored modules, however AutoGPTQ
151
- # seems to avoid gate quantization.
152
- # See: https://huggingface.co/Qwen/Qwen3-30B-A3B-GPTQ-Int4
153
- if isinstance(quant_config, (GPTQConfig, GPTQMarlinConfig)):
+ # seems to avoid gate quantization while AutoRound does.
+ if isinstance(
+ quant_config,
154
+ (GPTQConfig,
155
+ GPTQMarlinConfig)) and not quant_config.autoround_version:
156
return None
157
return quant_config
158
0 commit comments