From 785e7f8b40b2d96dcac95e7a13d5f8c75014bbd2 Mon Sep 17 00:00:00 2001
From: Qubitium <Qubitium@modelcloud.ai>
Date: Fri, 26 Sep 2025 00:31:21 +0000
Subject: [PATCH 1/3] fix cuda thread ctx

Signed-off-by: Qubitium <Qubitium@modelcloud.ai>
---
 gptqmodel/looper/gptq_processor.py |  2 +-
 gptqmodel/looper/module_looper.py  | 10 ++++++++++
 gptqmodel/utils/model.py           |  2 +-
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/gptqmodel/looper/gptq_processor.py b/gptqmodel/looper/gptq_processor.py
index 1dc55034a..c4750837b 100644
--- a/gptqmodel/looper/gptq_processor.py
+++ b/gptqmodel/looper/gptq_processor.py
@@ -21,7 +21,7 @@
 from ..utils.logger import setup_logger
 from ..utils.model import create_quant_module, find_modules, move_to, pack_model, pack_module
 from ..utils.offload import undo_offload_to_disk
-from ..utils.torch import torch_streamCtx, torch_sync
+from ..utils.torch import torch_streamCtx, torch_sync, HAS_CUDA
 
 log = setup_logger()
 lock = threading.Lock()
diff --git a/gptqmodel/looper/module_looper.py b/gptqmodel/looper/module_looper.py
index 4206237f5..e2062546d 100644
--- a/gptqmodel/looper/module_looper.py
+++ b/gptqmodel/looper/module_looper.py
@@ -451,6 +451,11 @@ def loop(self, auto_gc=True, calibration_enable_gpu_cache=True, buffered_fwd=Fal
                             futures = []
 
                             def process_module(name, m):
+                                # prevent cuda sync memory ctx bugs
+                                m_device = get_device(m)
+                                if HAS_CUDA and m_device is not None and m_device.type == "cuda":
+                                    torch.cuda.set_device(module.weight.device)
+
                                 processor.process(module=m, auto_gc=auto_gc)
                                 return name, m
 
@@ -544,6 +549,11 @@ def process_module(name, m):
                     for reverse_p in reversed(self.processors):
                         for name in processed_subset:
                             def finalize_module(module):
+                                # prevent cuda sync memory ctx bugs
+                                m_device = get_device(module)
+                                if HAS_CUDA and m_device is not None and m_device.type == "cuda":
+                                    torch.cuda.set_device(module.weight.device)
+
                                 reverse_p.submodule_finalize(module, self.gptq_model)
 
                                 # checking for disk offloading
diff --git a/gptqmodel/utils/model.py b/gptqmodel/utils/model.py
index 3d0310d9c..6ad6735be 100644
--- a/gptqmodel/utils/model.py
+++ b/gptqmodel/utils/model.py
@@ -76,7 +76,7 @@ def recurse_setattr(module, name, value):
         recurse_setattr(getattr(module, name), rest, value)
 
 
-def get_device(obj: torch.Tensor | nn.Module):
+def get_device(obj: torch.Tensor | nn.Module) -> torch.device:
     if isinstance(obj, torch.Tensor):
         return obj.device
 

From 9185d96b3552b04e2f9e1cb12b6d5cf58624c96a Mon Sep 17 00:00:00 2001
From: Qubitium <Qubitium@modelcloud.ai>
Date: Fri, 26 Sep 2025 00:32:49 +0000
Subject: [PATCH 2/3] import

Signed-off-by: Qubitium <Qubitium@modelcloud.ai>
---
 gptqmodel/looper/module_looper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gptqmodel/looper/module_looper.py b/gptqmodel/looper/module_looper.py
index e2062546d..b87e370b4 100644
--- a/gptqmodel/looper/module_looper.py
+++ b/gptqmodel/looper/module_looper.py
@@ -28,7 +28,7 @@
 from ..utils.model import find_modules, get_device, get_module, get_module_by_name_prefix, move_to, nested_move_to
 from ..utils.offload import offload_to_disk
 from ..utils.structure import print_module_tree
-from ..utils.torch import (ALL_DEVICES, CPU, DEFAULT_BALANCE_STRATEGY, META, BalanceStrategy,
+from ..utils.torch import (HAS_CUDA, ALL_DEVICES, CPU, DEFAULT_BALANCE_STRATEGY, META, BalanceStrategy,
                            device_next, device_next_reset, torch_empty_cache, torch_sync)
 from .awq_processor import AWQProcessor
 

From 0b52143d11d190927f924b7a843ef0f065739b64 Mon Sep 17 00:00:00 2001
From: Qubitium <Qubitium@modelcloud.ai>
Date: Fri, 26 Sep 2025 00:41:29 +0000
Subject: [PATCH 3/3] fix param/buffers debug tree indent

Signed-off-by: Qubitium <Qubitium@modelcloud.ai>
---
 gptqmodel/looper/gptq_processor.py |  2 +-
 gptqmodel/looper/module_looper.py  |  2 +-
 gptqmodel/utils/structure.py       | 96 +++++++++++++++++++++++-------
 3 files changed, 76 insertions(+), 24 deletions(-)

diff --git a/gptqmodel/looper/gptq_processor.py b/gptqmodel/looper/gptq_processor.py
index c4750837b..0dc8eecf1 100644
--- a/gptqmodel/looper/gptq_processor.py
+++ b/gptqmodel/looper/gptq_processor.py
@@ -21,7 +21,7 @@
 from ..utils.logger import setup_logger
 from ..utils.model import create_quant_module, find_modules, move_to, pack_model, pack_module
 from ..utils.offload import undo_offload_to_disk
-from ..utils.torch import torch_streamCtx, torch_sync, HAS_CUDA
+from ..utils.torch import HAS_CUDA, torch_streamCtx, torch_sync
 
 log = setup_logger()
 lock = threading.Lock()
diff --git a/gptqmodel/looper/module_looper.py b/gptqmodel/looper/module_looper.py
index b87e370b4..1f50eacda 100644
--- a/gptqmodel/looper/module_looper.py
+++ b/gptqmodel/looper/module_looper.py
@@ -28,7 +28,7 @@
 from ..utils.model import find_modules, get_device, get_module, get_module_by_name_prefix, move_to, nested_move_to
 from ..utils.offload import offload_to_disk
 from ..utils.structure import print_module_tree
-from ..utils.torch import (HAS_CUDA, ALL_DEVICES, CPU, DEFAULT_BALANCE_STRATEGY, META, BalanceStrategy,
+from ..utils.torch import (ALL_DEVICES, CPU, DEFAULT_BALANCE_STRATEGY, HAS_CUDA, META, BalanceStrategy,
                            device_next, device_next_reset, torch_empty_cache, torch_sync)
 from .awq_processor import AWQProcessor
 
diff --git a/gptqmodel/utils/structure.py b/gptqmodel/utils/structure.py
index 331f8237b..be7c5e4d9 100644
--- a/gptqmodel/utils/structure.py
+++ b/gptqmodel/utils/structure.py
@@ -212,7 +212,24 @@ def print_module_tree(
     experts_regex: str = r"(^|\.)experts($|\.)",
     experts_show: int = 1,
 ):
-    _ = re.compile(filter_regex) if filter_regex else None  # reserved for future use
+    """
+    Pretty-print a module tree with sizes, devices, dtypes, and optional param/buffer details.
+    Each depth uses a distinct color for better readability.
+    """
+
+    # Color palette per depth (cycles if deeper)
+    DEPTH_COLORS = [
+        "\033[36m",  # cyan
+        "\033[33m",  # yellow
+        "\033[35m",  # magenta
+        "\033[32m",  # green
+        "\033[34m",  # blue
+    ]
+
+    def depth_color(depth: int) -> str:
+        return DEPTH_COLORS[depth % len(DEPTH_COLORS)]
+
+    _ = re.compile(filter_regex) if filter_regex else None
     experts_name_re = re.compile(experts_regex) if collapse_experts else None
     seen: Set[int] = set()
 
@@ -220,28 +237,51 @@ def print_module_tree(
     total_b = sum(b.numel() for b in model.buffers())
 
     def should_collapse(qual_name: str, container: nn.Module) -> bool:
-        if not experts_name_re: return False
-        if not experts_name_re.search(qual_name): return False
-        if not isinstance(container, (nn.ModuleList, nn.Sequential)): return False
+        if not experts_name_re:
+            return False
+        if not experts_name_re.search(qual_name):
+            return False
+        if not isinstance(container, (nn.ModuleList, nn.Sequential)):
+            return False
         names = [n for n, _ in container.named_children()]
-        if not names: return False
+        if not names:
+            return False
         return all(n.isdigit() for n in names) and len(names) > max(0, experts_show)
 
+    def _format_line(prefix: str, trunk: str, qual_name: str, mod: nn.Module,
+                     show_counts: bool, color: bool, depth: int) -> str:
+        cls = mod.__class__.__name__
+        left = _maybe(prefix + trunk, FG_GRAY, color=color)
+        # Apply depth-based color for the name
+        name = _maybe(qual_name, depth_color(depth), color=color)
+        klass = _maybe(cls, DIM, color=color)
+        if show_counts:
+            p, b = _counts_for_module(mod)
+            counts = _maybe(f"(P={human_count(p)} B={human_count(b)})", FG_YELLOW, color=color)
+            return f"{left}{name}: {klass}  {counts}"
+        else:
+            return f"{left}{name}: {klass}"
+
     def rec(mod: nn.Module, name: str, depth: int, prefix: str, is_last: bool):
-        if max_depth is not None and depth > max_depth: return
+        if max_depth is not None and depth > max_depth:
+            return
         mod_id = id(mod)
         shared = "" if mod_id not in seen else "  ↩ shared ref"
         seen.add(mod_id)
+
         trunk = "└─ " if is_last else "├─ "
-        line = _format_line(prefix, trunk, name, mod, show_counts=True, color=color)
+        line = _format_line(prefix, trunk, name, mod, show_counts=True, color=color, depth=depth)
         print(line + " " + _annotate(mod, color=color) + shared)
-        if shared: return
+        if shared:
+            return
 
         indent = prefix + ("   " if is_last else "│  ")
+        param_indent = indent + ("   " if is_last else "│  ")
+
         if show_all:
-            _print_params(indent, mod, include_buffers=True, color=color)
+            _print_params(param_indent, mod, include_buffers=True, color=color)
         elif show_params or show_buffers:
-            _print_params(indent, mod, include_buffers=show_buffers, color=color)
+            _print_params(param_indent, mod, include_buffers=show_buffers, color=color)
 
         children = list(mod.named_children())
         n = len(children)
@@ -249,8 +289,10 @@ def rec(mod: nn.Module, name: str, depth: int, prefix: str, is_last: bool):
             last = (i == n - 1)
             child_prefix = prefix + ("   " if is_last else "│  ")
             display_name = f"{name}.{child_name}" if name else child_name
+
             if should_collapse(display_name, child):
-                line2 = _format_line(child_prefix, "└─ " if last else "├─ ", display_name, child, True, color)
+                line2 = _format_line(child_prefix, "└─ " if last else "├─ ",
+                                     display_name, child, True, color, depth+1)
                 print(line2 + " " + _annotate(child, color=color))
                 sub_children = list(child.named_children())
                 total_k = len(sub_children)
@@ -259,26 +301,36 @@ def rec(mod: nn.Module, name: str, depth: int, prefix: str, is_last: bool):
                     sub_last = (j == k_show - 1) and (k_show == total_k)
                     sub_prefix = child_prefix + ("   " if last else "│  ")
                     sub_trunk = "└─ " if sub_last else "├─ "
-                    line3 = _format_line(sub_prefix, sub_trunk, f"{display_name}.{sub_name}", sub_mod, True, color)
+                    line3 = _format_line(sub_prefix, sub_trunk,
+                                         f"{display_name}.{sub_name}",
+                                         sub_mod, True, color, depth+2)
                     print(line3 + " " + _annotate(sub_mod, color=color))
-                    rec(sub_mod, f"{display_name}.{sub_name}", depth + 2, child_prefix + ("   " if last else "│  "), sub_last)
-                if k_show < total_k:
+                    rec(sub_mod, f"{display_name}.{sub_name}",
+                        depth + 2, child_prefix + ("   " if last else "│  "), sub_last)
+                if k_show < total_k and total_k > 0:
                     p_one, b_one = _param_summary(sub_children[0][1], recurse=True)
-                    collapsed = f"• … collapsed (repeats {k_show}..{total_k-1}, per-expert P={human_count(p_one)} B={human_count(b_one)})"
+                    collapsed = (
+                        f"• … collapsed (repeats {k_show}..{total_k-1}, "
+                        f"per-expert P={human_count(p_one)} B={human_count(b_one)})"
+                    )
                     print(_maybe(child_prefix + ("   " if last else "│  ") + collapsed, DIM, color=color))
                 continue
             rec(child, display_name, depth + 1, child_prefix, last)
 
-    print(_format_line("", "", root_name, model, show_counts=True, color=color) + " " + _annotate(model, color=color))
-    root_indent = "   "
+    # Print root
+    print(_format_line("", "", root_name, model, show_counts=True, color=color, depth=0)
+          + " " + _annotate(model, color=color))
+
+    root_trunk_indent = "   "
+    root_param_indent = root_trunk_indent + "   "
+
     if show_all:
-        _print_params(root_indent, model, include_buffers=True, color=color)
+        _print_params(root_param_indent, model, include_buffers=True, color=color)
     elif show_params or show_buffers:
-        _print_params(root_indent, model, include_buffers=show_buffers, color=color)
+        _print_params(root_param_indent, model, include_buffers=show_buffers, color=color)
 
-    children_root = list(model.named_children())
-    for i, (child_name, child) in enumerate(children_root):
-        last = (i == len(children_root) - 1)
+    for i, (child_name, child) in enumerate(model.named_children()):
+        last = (i == len(list(model.named_children())) - 1)
         rec(child, f"{root_name}.{child_name}", 1, "", last)
 
     print("\nTotal parameters:", human_count(total_p), " | Total buffers:", human_count(total_b))