From 6062b6ad84aa97c688931135e9a11c884dbd5ddd Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Fri, 17 Oct 2025 10:37:32 +0800 Subject: [PATCH] use original process_batch Signed-off-by: ZX-ModelCloud --- gptqmodel/quantization/gptqv2.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/gptqmodel/quantization/gptqv2.py b/gptqmodel/quantization/gptqv2.py index b40969c9f..77deea81f 100644 --- a/gptqmodel/quantization/gptqv2.py +++ b/gptqmodel/quantization/gptqv2.py @@ -33,6 +33,11 @@ def __init__(self, module: NamedModule, qcfg: Optional[QuantizeConfig] = None): self.native_inps = module.state.pop(NATIVE_INPUTS_STATE_KEY) + def add_batch(self, inp: torch.Tensor, out: torch.Tensor, batch_index: Optional[int] = None): + with self.lock: + self.fwd_counter += 1 + self.process_batch(inp) + # TODO FIXME: using v1 new process_batch kills v2 quantization quality, use original process_batch # sample counter based on batch request # instead of batched token #. # def process_batch(self, inp):