From 7fef7e4c957936298bdcbdb9ce0edbe5e5809e3b Mon Sep 17 00:00:00 2001 From: LRL2-ModelCloud Date: Tue, 28 Oct 2025 13:24:16 +0800 Subject: [PATCH 1/4] check group_size --- gptqmodel/looper/tensorparallel_weight_processor.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gptqmodel/looper/tensorparallel_weight_processor.py b/gptqmodel/looper/tensorparallel_weight_processor.py index f88ed0d93..0d99412bd 100644 --- a/gptqmodel/looper/tensorparallel_weight_processor.py +++ b/gptqmodel/looper/tensorparallel_weight_processor.py @@ -38,9 +38,13 @@ def __init__(self, *args, **kwargs): kwargs.setdefault("require_fwd", False) kwargs.setdefault("fwd_after_process", False) super().__init__(*args, **kwargs) + self.qcfg = kwargs.pop("qcfg", None) self._target_multiple = math.lcm(*self._TP_TARGETS) + if self.qcfg and hasattr(self.qcfg, 'group_size') and self.qcfg.group_size > 0: + self._target_multiple = math.lcm(self._target_multiple, self.qcfg.group_size) + def preprocess(self, module: NamedModule): # pragma: no cover - simple hook # The processor operates on every eligible module; no setup required. pass From 0d0db7336368970b408a1396387070e28db862fc Mon Sep 17 00:00:00 2001 From: LRL2-ModelCloud Date: Tue, 28 Oct 2025 13:37:24 +0800 Subject: [PATCH 2/4] use BACKEND.TORCH --- tests/models/test_longllama.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/models/test_longllama.py b/tests/models/test_longllama.py index 4b6a23711..0b0cb7a3e 100644 --- a/tests/models/test_longllama.py +++ b/tests/models/test_longllama.py @@ -6,6 +6,8 @@ from model_test import ModelTest from gptqmodel.utils.eval import EVAL +from gptqmodel.utils.backend import BACKEND + class TestLongLlama(ModelTest): @@ -19,6 +21,7 @@ class TestLongLlama(ModelTest): } USE_VLLM = False USE_FLASH_ATTN = False + LOAD_BACKEND = BACKEND.TORCH def test_longllama(self): self.quant_lm_eval() From 46996daa39bb16a792189fc1c43ee1e5ac6b5f97 Mon Sep 17 00:00:00 2001 From: LRL2-ModelCloud Date: Tue, 28 Oct 2025 14:29:12 +0800 Subject: [PATCH 3/4] add unit test --- .../looper/tensorparallel_weight_processor.py | 4 +- tests/test_tensorparallel_weight_processor.py | 124 ++++++++++++++++++ 2 files changed, 127 insertions(+), 1 deletion(-) diff --git a/gptqmodel/looper/tensorparallel_weight_processor.py b/gptqmodel/looper/tensorparallel_weight_processor.py index 0d99412bd..e4b0270c0 100644 --- a/gptqmodel/looper/tensorparallel_weight_processor.py +++ b/gptqmodel/looper/tensorparallel_weight_processor.py @@ -38,7 +38,9 @@ def __init__(self, *args, **kwargs): kwargs.setdefault("require_fwd", False) kwargs.setdefault("fwd_after_process", False) super().__init__(*args, **kwargs) - self.qcfg = kwargs.pop("qcfg", None) + qcfg_from_kwargs = kwargs.pop("qcfg", None) + if qcfg_from_kwargs is not None: + self.qcfg = qcfg_from_kwargs self._target_multiple = math.lcm(*self._TP_TARGETS) diff --git a/tests/test_tensorparallel_weight_processor.py b/tests/test_tensorparallel_weight_processor.py index f64507721..6e9be1603 100644 --- a/tests/test_tensorparallel_weight_processor.py +++ b/tests/test_tensorparallel_weight_processor.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 # Contact: qubitium@modelcloud.ai, x.com/qubitium +import math import torch from gptqmodel.looper.named_module import NamedModule @@ -62,3 +63,126 @@ def test_tensorparallel_pre_padding_applies_zero_pad_metadata(): gptq.free() assert "tp_pad_info" not in named.state + + +def test_tensorparallel_weight_processor_with_positive_group_size(): + """Test that _target_multiple is correctly calculated when group_size > 0.""" + linear = torch.nn.Linear(10, 7, bias=False) + named = NamedModule(linear, name="proj", full_name="layer.0.proj", layer_index=0) + + qcfg = QuantizeConfig(bits=4, mock_quantization=True) + qcfg.group_size = 128 # Positive group_size + qcfg.desc_act = False + qcfg.act_group_aware = False + + calibration_stub = [{"input_ids": torch.ones((1, 1), dtype=torch.long)}] + + preprocessor = TensorParallelWeightProcessor( + tokenizer=None, + qcfg=qcfg, + calibration=calibration_stub, + prepare_dataset_func=_noop_prepare_dataset, + calibration_concat_size=None, + calibration_sort=None, + batch_size=1, + logger_board="", + ) + + # Verify that _target_multiple includes group_size in LCM calculation + # Default TP_TARGETS = (2, 4, 8), so math.lcm(2, 4, 8) = 8 + # With group_size = 128, math.lcm(8, 128) = 128 + expected_target_multiple = math.lcm(8, 128) + assert preprocessor._target_multiple == expected_target_multiple + assert preprocessor._target_multiple == 128 + + +def test_tensorparallel_weight_processor_with_negative_group_size(): + """Test that _target_multiple uses default value when group_size < 0.""" + linear = torch.nn.Linear(10, 7, bias=False) + named = NamedModule(linear, name="proj", full_name="layer.0.proj", layer_index=0) + + qcfg = QuantizeConfig(bits=4, mock_quantization=True) + qcfg.group_size = -1 # Negative group_size + qcfg.desc_act = False + qcfg.act_group_aware = False + + calibration_stub = [{"input_ids": torch.ones((1, 1), dtype=torch.long)}] + + preprocessor = TensorParallelWeightProcessor( + tokenizer=None, + qcfg=qcfg, + calibration=calibration_stub, + prepare_dataset_func=_noop_prepare_dataset, + calibration_concat_size=None, + calibration_sort=None, + batch_size=1, + logger_board="", + ) + + # Verify that _target_multiple only uses TP_TARGETS when group_size < 0 + # Default TP_TARGETS = (2, 4, 8), so math.lcm(2, 4, 8) = 8 + expected_target_multiple = math.lcm(2, 4, 8) + assert preprocessor._target_multiple == expected_target_multiple + assert preprocessor._target_multiple == 8 + + +def test_tensorparallel_weight_processor_group_size_lcm_calculation(): + """Test LCM calculation with various group_size values.""" + linear = torch.nn.Linear(10, 7, bias=False) + named = NamedModule(linear, name="proj", full_name="layer.0.proj", layer_index=0) + + calibration_stub = [{"input_ids": torch.ones((1, 1), dtype=torch.long)}] + + # Test with group_size = 32 + qcfg = QuantizeConfig(bits=4, mock_quantization=True) + qcfg.group_size = 32 + qcfg.desc_act = False + qcfg.act_group_aware = False + + preprocessor = TensorParallelWeightProcessor( + tokenizer=None, + qcfg=qcfg, + calibration=calibration_stub, + prepare_dataset_func=_noop_prepare_dataset, + calibration_concat_size=None, + calibration_sort=None, + batch_size=1, + logger_board="", + ) + + # math.lcm(8, 32) = 32 + assert preprocessor._target_multiple == 32 + + # Test with group_size = 64 + qcfg.group_size = 64 + preprocessor = TensorParallelWeightProcessor( + tokenizer=None, + qcfg=qcfg, + calibration=calibration_stub, + prepare_dataset_func=_noop_prepare_dataset, + calibration_concat_size=None, + calibration_sort=None, + batch_size=1, + logger_board="", + ) + + # math.lcm(8, 64) = 64 + assert preprocessor._target_multiple == 64 + + # Test with group_size = 12 (not a power of 2) + qcfg.group_size = 12 + preprocessor = TensorParallelWeightProcessor( + tokenizer=None, + qcfg=qcfg, + calibration=calibration_stub, + prepare_dataset_func=_noop_prepare_dataset, + calibration_concat_size=None, + calibration_sort=None, + batch_size=1, + logger_board="", + ) + + # math.lcm(8, 12) = 24 + expected_lcm = math.lcm(8, 12) + assert preprocessor._target_multiple == expected_lcm + assert preprocessor._target_multiple == 24 From 2cde337b21dcf8aa684c61a29854644d9f49a626 Mon Sep 17 00:00:00 2001 From: LRL2-ModelCloud Date: Tue, 28 Oct 2025 14:30:33 +0800 Subject: [PATCH 4/4] cleanup --- tests/test_tensorparallel_weight_processor.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/test_tensorparallel_weight_processor.py b/tests/test_tensorparallel_weight_processor.py index 6e9be1603..9a3a80f27 100644 --- a/tests/test_tensorparallel_weight_processor.py +++ b/tests/test_tensorparallel_weight_processor.py @@ -35,7 +35,6 @@ def test_tensorparallel_pre_padding_applies_zero_pad_metadata(): calibration_concat_size=None, calibration_sort=None, batch_size=1, - logger_board="", ) preprocessor.process(named) @@ -85,7 +84,6 @@ def test_tensorparallel_weight_processor_with_positive_group_size(): calibration_concat_size=None, calibration_sort=None, batch_size=1, - logger_board="", ) # Verify that _target_multiple includes group_size in LCM calculation @@ -116,7 +114,6 @@ def test_tensorparallel_weight_processor_with_negative_group_size(): calibration_concat_size=None, calibration_sort=None, batch_size=1, - logger_board="", ) # Verify that _target_multiple only uses TP_TARGETS when group_size < 0 @@ -147,7 +144,6 @@ def test_tensorparallel_weight_processor_group_size_lcm_calculation(): calibration_concat_size=None, calibration_sort=None, batch_size=1, - logger_board="", ) # math.lcm(8, 32) = 32 @@ -163,7 +159,6 @@ def test_tensorparallel_weight_processor_group_size_lcm_calculation(): calibration_concat_size=None, calibration_sort=None, batch_size=1, - logger_board="", ) # math.lcm(8, 64) = 64 @@ -179,7 +174,6 @@ def test_tensorparallel_weight_processor_group_size_lcm_calculation(): calibration_concat_size=None, calibration_sort=None, batch_size=1, - logger_board="", ) # math.lcm(8, 12) = 24