From 888edcec17325201e403156d80e0f01a2e13e168 Mon Sep 17 00:00:00 2001 From: Qubitium Date: Tue, 30 Sep 2025 07:30:26 +0000 Subject: [PATCH 1/3] fix auto gc thread not blocking on signal queue Signed-off-by: Qubitium --- gptqmodel/utils/threadx.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/gptqmodel/utils/threadx.py b/gptqmodel/utils/threadx.py index 2a7a1899f..989752ecc 100644 --- a/gptqmodel/utils/threadx.py +++ b/gptqmodel/utils/threadx.py @@ -1169,20 +1169,16 @@ def _janitor_loop(self): empty_cache() using the LIVE attribute if callable, otherwise the HARD COPY captured at import time. """ - WAIT_TIMEOUT = 0.1 while True: - if DEBUG_ON: log.debug("DP-Janitor: waiting for trigger…") - if self._stop_event.is_set(): - if DEBUG_ON: log.debug("DP-Janitor: stop event set before wait; exiting") - break - - triggered = self._gc_event.wait(timeout=WAIT_TIMEOUT) - if not triggered: - continue + if DEBUG_ON: + log.debug("DP-Janitor: waiting for trigger…") + self._gc_event.wait() self._gc_event.clear() + if self._stop_event.is_set(): - if DEBUG_ON: log.debug("DP-Janitor: stop event set after trigger; exiting") + if DEBUG_ON: + log.debug("DP-Janitor: stop event set; exiting") break # Debounce window: absorb additional triggers before deciding. @@ -1201,7 +1197,7 @@ def _janitor_loop(self): while self._auto_gc_disable_count > 0 and not self._stop_event.is_set(): if DEBUG_ON: log.debug("DP-Janitor: auto-GC disabled; waiting…") - self._auto_gc_disable_cv.wait(timeout=WAIT_TIMEOUT) + self._auto_gc_disable_cv.wait() if self._stop_event.is_set(): if DEBUG_ON: log.debug("DP-Janitor: stop event set during auto-GC wait; exiting") break From 633c1859d0a49cca6cea70a0b476513bfcedf902 Mon Sep 17 00:00:00 2001 From: Qubitium Date: Tue, 30 Sep 2025 07:37:23 +0000 Subject: [PATCH 2/3] unify DEBUG=1 check Signed-off-by: Qubitium --- gptqmodel/__init__.py | 2 ++ gptqmodel/looper/module_looper.py | 9 +++++++++ gptqmodel/utils/threadx.py | 3 +-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/gptqmodel/__init__.py b/gptqmodel/__init__.py index 258080baa..40b1e6e57 100644 --- a/gptqmodel/__init__.py +++ b/gptqmodel/__init__.py @@ -5,6 +5,8 @@ import os +DEBUG_ON = str(os.environ.get("DEBUG", "")).lower() in ("1", "true", "yes", "on") + from .models import GPTQModel, get_best_device from .quantization import BaseQuantizeConfig, QuantizeConfig from .utils import BACKEND diff --git a/gptqmodel/looper/module_looper.py b/gptqmodel/looper/module_looper.py index 3b8b08bb8..6e4a44268 100644 --- a/gptqmodel/looper/module_looper.py +++ b/gptqmodel/looper/module_looper.py @@ -33,6 +33,7 @@ from ..models._const import SUPPORTS_MODULE_TYPES, DEVICE from ..nn_modules.hooked_linear import (STOP_FORWARD_EXCEPTION, HookedLinear, StopForward, replace_module_with_hooked_legacy) +from .. import DEBUG_ON from ..utils.attn_mask import apply_keep_mask_bt, normalize_seq_mask from ..utils.device import get_device, get_device_new from ..utils.logger import setup_logger @@ -193,18 +194,26 @@ def _select_forward_devices(self, base_device: Optional[torch.device]) -> List[t def _clone_module_for_devices(self, module: torch.nn.Module, devices: List[torch.device]) -> Dict[torch.device, torch.nn.Module]: clones: Dict[torch.device, torch.nn.Module] = {} + module_label = getattr(module, "full_name", module.__class__.__name__) + clone_timings = [] if DEBUG_ON else None cleared_attrs = self._clear_non_picklable_state(module) try: for dev in devices: + start_ts = time.perf_counter() if DEBUG_ON else None replica = copy.deepcopy(module) replica = replica.to(dev) replica.eval() _rehome_module_to_device(replica, dev, move_parameters=False, move_buffers=True) self._clear_non_picklable_state(replica) clones[dev] = replica + if clone_timings is not None and start_ts is not None: + clone_timings.append((dev, time.perf_counter() - start_ts)) finally: self._restore_non_picklable_state(cleared_attrs) + if clone_timings: + timing_str = ", ".join(f"{str(dev)}={duration * 1000:.2f}ms" for dev, duration in clone_timings) + log.debug(f"ModuleLooper: deepcopy {module_label} -> {timing_str}") return clones def _clear_non_picklable_state(self, module: torch.nn.Module): diff --git a/gptqmodel/utils/threadx.py b/gptqmodel/utils/threadx.py index 989752ecc..be0f1af08 100644 --- a/gptqmodel/utils/threadx.py +++ b/gptqmodel/utils/threadx.py @@ -6,7 +6,6 @@ from __future__ import annotations import contextlib -import os import queue import threading import time @@ -15,6 +14,7 @@ import torch +from .. import DEBUG_ON from ..utils.logger import setup_logger @@ -22,7 +22,6 @@ # Debug logging is very chatty and can alter timings subtly in tests. # We gate all extra diagnostics behind the DEBUG env (1/true/yes/on). -DEBUG_ON = str(os.environ.get("DEBUG", "")).lower() in ("1", "true", "yes", "on") # DeviceLike allows ergonomic call sites: 'cuda:0', 0, torch.device('cuda', 0), etc. DeviceLike = Union[str, int, torch.device] From 377ce8cfc8c3e8ac74061647e191af7d7b53524f Mon Sep 17 00:00:00 2001 From: Qubitium Date: Tue, 30 Sep 2025 07:40:19 +0000 Subject: [PATCH 3/3] format Signed-off-by: Qubitium --- gptqmodel/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gptqmodel/__init__.py b/gptqmodel/__init__.py index 40b1e6e57..373d7642b 100644 --- a/gptqmodel/__init__.py +++ b/gptqmodel/__init__.py @@ -5,6 +5,7 @@ import os + DEBUG_ON = str(os.environ.get("DEBUG", "")).lower() in ("1", "true", "yes", "on") from .models import GPTQModel, get_best_device