From 888edcec17325201e403156d80e0f01a2e13e168 Mon Sep 17 00:00:00 2001
From: Qubitium <Qubitium@modelcloud.ai>
Date: Tue, 30 Sep 2025 07:30:26 +0000
Subject: [PATCH 1/3] fix auto gc thread not blocking on signal queue

Signed-off-by: Qubitium <Qubitium@modelcloud.ai>
---
 gptqmodel/utils/threadx.py | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/gptqmodel/utils/threadx.py b/gptqmodel/utils/threadx.py
index 2a7a1899f..989752ecc 100644
--- a/gptqmodel/utils/threadx.py
+++ b/gptqmodel/utils/threadx.py
@@ -1169,20 +1169,16 @@ def _janitor_loop(self):
             empty_cache() using the LIVE attribute if callable, otherwise the
             HARD COPY captured at import time.
         """
-        WAIT_TIMEOUT = 0.1
         while True:
-            if DEBUG_ON: log.debug("DP-Janitor: waiting for trigger…")
-            if self._stop_event.is_set():
-                if DEBUG_ON: log.debug("DP-Janitor: stop event set before wait; exiting")
-                break
-
-            triggered = self._gc_event.wait(timeout=WAIT_TIMEOUT)
-            if not triggered:
-                continue
+            if DEBUG_ON:
+                log.debug("DP-Janitor: waiting for trigger…")
 
+            self._gc_event.wait()
             self._gc_event.clear()
+
             if self._stop_event.is_set():
-                if DEBUG_ON: log.debug("DP-Janitor: stop event set after trigger; exiting")
+                if DEBUG_ON:
+                    log.debug("DP-Janitor: stop event set; exiting")
                 break
 
             # Debounce window: absorb additional triggers before deciding.
@@ -1201,7 +1197,7 @@ def _janitor_loop(self):
                 while self._auto_gc_disable_count > 0 and not self._stop_event.is_set():
                     if DEBUG_ON:
                         log.debug("DP-Janitor: auto-GC disabled; waiting…")
-                    self._auto_gc_disable_cv.wait(timeout=WAIT_TIMEOUT)
+                    self._auto_gc_disable_cv.wait()
                 if self._stop_event.is_set():
                     if DEBUG_ON: log.debug("DP-Janitor: stop event set during auto-GC wait; exiting")
                     break

From 633c1859d0a49cca6cea70a0b476513bfcedf902 Mon Sep 17 00:00:00 2001
From: Qubitium <Qubitium@modelcloud.ai>
Date: Tue, 30 Sep 2025 07:37:23 +0000
Subject: [PATCH 2/3] unify DEBUG=1 check

Signed-off-by: Qubitium <Qubitium@modelcloud.ai>
---
 gptqmodel/__init__.py             | 2 ++
 gptqmodel/looper/module_looper.py | 9 +++++++++
 gptqmodel/utils/threadx.py        | 3 +--
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/gptqmodel/__init__.py b/gptqmodel/__init__.py
index 258080baa..40b1e6e57 100644
--- a/gptqmodel/__init__.py
+++ b/gptqmodel/__init__.py
@@ -5,6 +5,8 @@
 
 import os
 
+DEBUG_ON = str(os.environ.get("DEBUG", "")).lower() in ("1", "true", "yes", "on")
+
 from .models import GPTQModel, get_best_device
 from .quantization import BaseQuantizeConfig, QuantizeConfig
 from .utils import BACKEND
diff --git a/gptqmodel/looper/module_looper.py b/gptqmodel/looper/module_looper.py
index 3b8b08bb8..6e4a44268 100644
--- a/gptqmodel/looper/module_looper.py
+++ b/gptqmodel/looper/module_looper.py
@@ -33,6 +33,7 @@
 from ..models._const import SUPPORTS_MODULE_TYPES, DEVICE
 from ..nn_modules.hooked_linear import (STOP_FORWARD_EXCEPTION, HookedLinear,
                                         StopForward, replace_module_with_hooked_legacy)
+from .. import DEBUG_ON
 from ..utils.attn_mask import apply_keep_mask_bt, normalize_seq_mask
 from ..utils.device import get_device, get_device_new
 from ..utils.logger import setup_logger
@@ -193,18 +194,26 @@ def _select_forward_devices(self, base_device: Optional[torch.device]) -> List[t
 
     def _clone_module_for_devices(self, module: torch.nn.Module, devices: List[torch.device]) -> Dict[torch.device, torch.nn.Module]:
         clones: Dict[torch.device, torch.nn.Module] = {}
+        module_label = getattr(module, "full_name", module.__class__.__name__)
+        clone_timings = [] if DEBUG_ON else None
 
         cleared_attrs = self._clear_non_picklable_state(module)
         try:
             for dev in devices:
+                start_ts = time.perf_counter() if DEBUG_ON else None
                 replica = copy.deepcopy(module)
                 replica = replica.to(dev)
                 replica.eval()
                 _rehome_module_to_device(replica, dev, move_parameters=False, move_buffers=True)
                 self._clear_non_picklable_state(replica)
                 clones[dev] = replica
+                if clone_timings is not None and start_ts is not None:
+                    clone_timings.append((dev, time.perf_counter() - start_ts))
         finally:
             self._restore_non_picklable_state(cleared_attrs)
+        if clone_timings:
+            timing_str = ", ".join(f"{str(dev)}={duration * 1000:.2f}ms" for dev, duration in clone_timings)
+            log.debug(f"ModuleLooper: deepcopy {module_label} -> {timing_str}")
         return clones
 
     def _clear_non_picklable_state(self, module: torch.nn.Module):
diff --git a/gptqmodel/utils/threadx.py b/gptqmodel/utils/threadx.py
index 989752ecc..be0f1af08 100644
--- a/gptqmodel/utils/threadx.py
+++ b/gptqmodel/utils/threadx.py
@@ -6,7 +6,6 @@
 from __future__ import annotations
 
 import contextlib
-import os
 import queue
 import threading
 import time
@@ -15,6 +14,7 @@
 
 import torch
 
+from .. import DEBUG_ON
 from ..utils.logger import setup_logger
 
 
@@ -22,7 +22,6 @@
 
 # Debug logging is very chatty and can alter timings subtly in tests.
 # We gate all extra diagnostics behind the DEBUG env (1/true/yes/on).
-DEBUG_ON = str(os.environ.get("DEBUG", "")).lower() in ("1", "true", "yes", "on")
 
 # DeviceLike allows ergonomic call sites: 'cuda:0', 0, torch.device('cuda', 0), etc.
 DeviceLike = Union[str, int, torch.device]

From 377ce8cfc8c3e8ac74061647e191af7d7b53524f Mon Sep 17 00:00:00 2001
From: Qubitium <Qubitium@modelcloud.ai>
Date: Tue, 30 Sep 2025 07:40:19 +0000
Subject: [PATCH 3/3] format

Signed-off-by: Qubitium <Qubitium@modelcloud.ai>
---
 gptqmodel/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gptqmodel/__init__.py b/gptqmodel/__init__.py
index 40b1e6e57..373d7642b 100644
--- a/gptqmodel/__init__.py
+++ b/gptqmodel/__init__.py
@@ -5,6 +5,7 @@
 
 import os
 
+
 DEBUG_ON = str(os.environ.get("DEBUG", "")).lower() in ("1", "true", "yes", "on")
 
 from .models import GPTQModel, get_best_device