From 7fef7e4c957936298bdcbdb9ce0edbe5e5809e3b Mon Sep 17 00:00:00 2001
From: LRL2-ModelCloud <lrl2@modelcloud.ai>
Date: Tue, 28 Oct 2025 13:24:16 +0800
Subject: [PATCH 1/4] check group_size

---
 gptqmodel/looper/tensorparallel_weight_processor.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/gptqmodel/looper/tensorparallel_weight_processor.py b/gptqmodel/looper/tensorparallel_weight_processor.py
index f88ed0d93..0d99412bd 100644
--- a/gptqmodel/looper/tensorparallel_weight_processor.py
+++ b/gptqmodel/looper/tensorparallel_weight_processor.py
@@ -38,9 +38,13 @@ def __init__(self, *args, **kwargs):
         kwargs.setdefault("require_fwd", False)
         kwargs.setdefault("fwd_after_process", False)
         super().__init__(*args, **kwargs)
+        self.qcfg = kwargs.pop("qcfg", None)
 
         self._target_multiple = math.lcm(*self._TP_TARGETS)
 
+        if self.qcfg and hasattr(self.qcfg, 'group_size') and self.qcfg.group_size > 0:
+            self._target_multiple = math.lcm(self._target_multiple, self.qcfg.group_size)
+
     def preprocess(self, module: NamedModule):  # pragma: no cover - simple hook
         # The processor operates on every eligible module; no setup required.
         pass

From 0d0db7336368970b408a1396387070e28db862fc Mon Sep 17 00:00:00 2001
From: LRL2-ModelCloud <lrl2@modelcloud.ai>
Date: Tue, 28 Oct 2025 13:37:24 +0800
Subject: [PATCH 2/4] use BACKEND.TORCH

---
 tests/models/test_longllama.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/models/test_longllama.py b/tests/models/test_longllama.py
index 4b6a23711..0b0cb7a3e 100644
--- a/tests/models/test_longllama.py
+++ b/tests/models/test_longllama.py
@@ -6,6 +6,8 @@
 from model_test import ModelTest
 
 from gptqmodel.utils.eval import EVAL
+from gptqmodel.utils.backend import BACKEND
+
 
 
 class TestLongLlama(ModelTest):
@@ -19,6 +21,7 @@ class TestLongLlama(ModelTest):
     }
     USE_VLLM = False
     USE_FLASH_ATTN = False
+    LOAD_BACKEND = BACKEND.TORCH
 
     def test_longllama(self):
         self.quant_lm_eval()

From 46996daa39bb16a792189fc1c43ee1e5ac6b5f97 Mon Sep 17 00:00:00 2001
From: LRL2-ModelCloud <lrl2@modelcloud.ai>
Date: Tue, 28 Oct 2025 14:29:12 +0800
Subject: [PATCH 3/4] add unit test

---
 .../looper/tensorparallel_weight_processor.py |   4 +-
 tests/test_tensorparallel_weight_processor.py | 124 ++++++++++++++++++
 2 files changed, 127 insertions(+), 1 deletion(-)

diff --git a/gptqmodel/looper/tensorparallel_weight_processor.py b/gptqmodel/looper/tensorparallel_weight_processor.py
index 0d99412bd..e4b0270c0 100644
--- a/gptqmodel/looper/tensorparallel_weight_processor.py
+++ b/gptqmodel/looper/tensorparallel_weight_processor.py
@@ -38,7 +38,9 @@ def __init__(self, *args, **kwargs):
         kwargs.setdefault("require_fwd", False)
         kwargs.setdefault("fwd_after_process", False)
         super().__init__(*args, **kwargs)
-        self.qcfg = kwargs.pop("qcfg", None)
+        qcfg_from_kwargs = kwargs.pop("qcfg", None)
+        if qcfg_from_kwargs is not None:
+            self.qcfg = qcfg_from_kwargs
 
         self._target_multiple = math.lcm(*self._TP_TARGETS)
 
diff --git a/tests/test_tensorparallel_weight_processor.py b/tests/test_tensorparallel_weight_processor.py
index f64507721..6e9be1603 100644
--- a/tests/test_tensorparallel_weight_processor.py
+++ b/tests/test_tensorparallel_weight_processor.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # Contact: qubitium@modelcloud.ai, x.com/qubitium
 
+import math
 import torch
 
 from gptqmodel.looper.named_module import NamedModule
@@ -62,3 +63,126 @@ def test_tensorparallel_pre_padding_applies_zero_pad_metadata():
 
     gptq.free()
     assert "tp_pad_info" not in named.state
+
+
+def test_tensorparallel_weight_processor_with_positive_group_size():
+    """Test that _target_multiple is correctly calculated when group_size > 0."""
+    linear = torch.nn.Linear(10, 7, bias=False)
+    named = NamedModule(linear, name="proj", full_name="layer.0.proj", layer_index=0)
+
+    qcfg = QuantizeConfig(bits=4, mock_quantization=True)
+    qcfg.group_size = 128  # Positive group_size
+    qcfg.desc_act = False
+    qcfg.act_group_aware = False
+
+    calibration_stub = [{"input_ids": torch.ones((1, 1), dtype=torch.long)}]
+
+    preprocessor = TensorParallelWeightProcessor(
+        tokenizer=None,
+        qcfg=qcfg,
+        calibration=calibration_stub,
+        prepare_dataset_func=_noop_prepare_dataset,
+        calibration_concat_size=None,
+        calibration_sort=None,
+        batch_size=1,
+        logger_board="",
+    )
+
+    # Verify that _target_multiple includes group_size in LCM calculation
+    # Default TP_TARGETS = (2, 4, 8), so math.lcm(2, 4, 8) = 8
+    # With group_size = 128, math.lcm(8, 128) = 128
+    expected_target_multiple = math.lcm(8, 128)
+    assert preprocessor._target_multiple == expected_target_multiple
+    assert preprocessor._target_multiple == 128
+
+
+def test_tensorparallel_weight_processor_with_negative_group_size():
+    """Test that _target_multiple uses default value when group_size < 0."""
+    linear = torch.nn.Linear(10, 7, bias=False)
+    named = NamedModule(linear, name="proj", full_name="layer.0.proj", layer_index=0)
+
+    qcfg = QuantizeConfig(bits=4, mock_quantization=True)
+    qcfg.group_size = -1  # Negative group_size
+    qcfg.desc_act = False
+    qcfg.act_group_aware = False
+
+    calibration_stub = [{"input_ids": torch.ones((1, 1), dtype=torch.long)}]
+
+    preprocessor = TensorParallelWeightProcessor(
+        tokenizer=None,
+        qcfg=qcfg,
+        calibration=calibration_stub,
+        prepare_dataset_func=_noop_prepare_dataset,
+        calibration_concat_size=None,
+        calibration_sort=None,
+        batch_size=1,
+        logger_board="",
+    )
+
+    # Verify that _target_multiple only uses TP_TARGETS when group_size < 0
+    # Default TP_TARGETS = (2, 4, 8), so math.lcm(2, 4, 8) = 8
+    expected_target_multiple = math.lcm(2, 4, 8)
+    assert preprocessor._target_multiple == expected_target_multiple
+    assert preprocessor._target_multiple == 8
+
+
+def test_tensorparallel_weight_processor_group_size_lcm_calculation():
+    """Test LCM calculation with various group_size values."""
+    linear = torch.nn.Linear(10, 7, bias=False)
+    named = NamedModule(linear, name="proj", full_name="layer.0.proj", layer_index=0)
+
+    calibration_stub = [{"input_ids": torch.ones((1, 1), dtype=torch.long)}]
+
+    # Test with group_size = 32
+    qcfg = QuantizeConfig(bits=4, mock_quantization=True)
+    qcfg.group_size = 32
+    qcfg.desc_act = False
+    qcfg.act_group_aware = False
+
+    preprocessor = TensorParallelWeightProcessor(
+        tokenizer=None,
+        qcfg=qcfg,
+        calibration=calibration_stub,
+        prepare_dataset_func=_noop_prepare_dataset,
+        calibration_concat_size=None,
+        calibration_sort=None,
+        batch_size=1,
+        logger_board="",
+    )
+
+    # math.lcm(8, 32) = 32
+    assert preprocessor._target_multiple == 32
+
+    # Test with group_size = 64
+    qcfg.group_size = 64
+    preprocessor = TensorParallelWeightProcessor(
+        tokenizer=None,
+        qcfg=qcfg,
+        calibration=calibration_stub,
+        prepare_dataset_func=_noop_prepare_dataset,
+        calibration_concat_size=None,
+        calibration_sort=None,
+        batch_size=1,
+        logger_board="",
+    )
+
+    # math.lcm(8, 64) = 64
+    assert preprocessor._target_multiple == 64
+
+    # Test with group_size = 12 (not a power of 2)
+    qcfg.group_size = 12
+    preprocessor = TensorParallelWeightProcessor(
+        tokenizer=None,
+        qcfg=qcfg,
+        calibration=calibration_stub,
+        prepare_dataset_func=_noop_prepare_dataset,
+        calibration_concat_size=None,
+        calibration_sort=None,
+        batch_size=1,
+        logger_board="",
+    )
+
+    # math.lcm(8, 12) = 24
+    expected_lcm = math.lcm(8, 12)
+    assert preprocessor._target_multiple == expected_lcm
+    assert preprocessor._target_multiple == 24

From 2cde337b21dcf8aa684c61a29854644d9f49a626 Mon Sep 17 00:00:00 2001
From: LRL2-ModelCloud <lrl2@modelcloud.ai>
Date: Tue, 28 Oct 2025 14:30:33 +0800
Subject: [PATCH 4/4] cleanup

---
 tests/test_tensorparallel_weight_processor.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tests/test_tensorparallel_weight_processor.py b/tests/test_tensorparallel_weight_processor.py
index 6e9be1603..9a3a80f27 100644
--- a/tests/test_tensorparallel_weight_processor.py
+++ b/tests/test_tensorparallel_weight_processor.py
@@ -35,7 +35,6 @@ def test_tensorparallel_pre_padding_applies_zero_pad_metadata():
         calibration_concat_size=None,
         calibration_sort=None,
         batch_size=1,
-        logger_board="",
     )
 
     preprocessor.process(named)
@@ -85,7 +84,6 @@ def test_tensorparallel_weight_processor_with_positive_group_size():
         calibration_concat_size=None,
         calibration_sort=None,
         batch_size=1,
-        logger_board="",
     )
 
     # Verify that _target_multiple includes group_size in LCM calculation
@@ -116,7 +114,6 @@ def test_tensorparallel_weight_processor_with_negative_group_size():
         calibration_concat_size=None,
         calibration_sort=None,
         batch_size=1,
-        logger_board="",
     )
 
     # Verify that _target_multiple only uses TP_TARGETS when group_size < 0
@@ -147,7 +144,6 @@ def test_tensorparallel_weight_processor_group_size_lcm_calculation():
         calibration_concat_size=None,
         calibration_sort=None,
         batch_size=1,
-        logger_board="",
     )
 
     # math.lcm(8, 32) = 32
@@ -163,7 +159,6 @@ def test_tensorparallel_weight_processor_group_size_lcm_calculation():
         calibration_concat_size=None,
         calibration_sort=None,
         batch_size=1,
-        logger_board="",
     )
 
     # math.lcm(8, 64) = 64
@@ -179,7 +174,6 @@ def test_tensorparallel_weight_processor_group_size_lcm_calculation():
         calibration_concat_size=None,
         calibration_sort=None,
         batch_size=1,
-        logger_board="",
     )
 
     # math.lcm(8, 12) = 24