From 0404c9a3a041f91f9f05fb5fe686c3850a1b5af2 Mon Sep 17 00:00:00 2001 From: Qubitium Date: Fri, 26 Sep 2025 01:19:33 +0000 Subject: [PATCH] remove prev thread fix, replaced by main changes Signed-off-by: Qubitium --- gptqmodel/quantization/gptq.py | 47 +--------------------------------- 1 file changed, 1 insertion(+), 46 deletions(-) diff --git a/gptqmodel/quantization/gptq.py b/gptqmodel/quantization/gptq.py index 37ef9d3e8..c5facd093 100644 --- a/gptqmodel/quantization/gptq.py +++ b/gptqmodel/quantization/gptq.py @@ -27,45 +27,6 @@ log = setup_logger() -# TODO: move this to a locking class -# -------------------------------------------------------------------------------------- -# Per-device lock registry to guard device-specific critical sections (like tensor moves) -# -------------------------------------------------------------------------------------- -_device_locks = {} # {(device_type, index): threading.Lock()} -_device_locks_guard = threading.Lock() # guards the registry itself - - -def _device_key(dev) -> tuple: - """ - Normalize a device into a hashable (type, index) key. - Examples: - torch.device('cuda', 0) -> ('cuda', 0) - torch.device('xpu') -> ('xpu', -1) - 'cuda:1' -> ('cuda', 1) - 'cpu' -> ('cpu', -1) - """ - if isinstance(dev, torch.device): - return (dev.type, dev.index if dev.index is not None else -1) - if isinstance(dev, str): - try: - d = torch.device(dev) - return _device_key(d) - except Exception: - return ("str", dev) # last-resort string key - # Unknown type — stringify - return ("unknown", str(dev)) - - -def _get_device_lock(dev) -> threading.Lock: - key = _device_key(dev) - with _device_locks_guard: - lk = _device_locks.get(key) - if lk is None: - lk = threading.Lock() - _device_locks[key] = lk - return lk -# -------------------------------------------------------------------------------------- - lock = threading.Lock() torch.backends.cuda.matmul.allow_tf32 = False torch.backends.cudnn.allow_tf32 = False @@ -616,13 +577,7 @@ def quantize( scale = torch.cat(scale, dim=1) zero = torch.cat(zero, dim=1) - target_device = self.module.weight.data.device - - # limit one sync tensor move action per device due to cuda limits - if Q.device != target_device: - dev_lock = _get_device_lock(target_device) - with dev_lock: - Q = Q.to(device=target_device, non_blocking=False) + Q = Q.to(device=self.module.weight.data.device, non_blocking=False) duration = time.time() - start