ModelCloud · Qubitium · Oct 15, 2025 · Oct 15, 2025 · Oct 15, 2025 · Oct 15, 2025
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -4,6 +4,7 @@ recursive-include gptqmodel_ext/exllamav2 *.h *.cuh *.cu *.cpp
 recursive-include gptqmodel_ext/exllama_eora/eora *.h *.cuh *.cu *.cpp *.py
 recursive-include gptqmodel_ext/marlin *.h *.cuh *.cu *.cpp
 recursive-include gptqmodel_ext/qqq *.h *.cuh *.cu *.cpp
+include gptqmodel_ext/pack_block_cpu.cpp
 include gptqmodel_ext/marlin/generate_kernels.py
 recursive-exclude gptqmodel_ext __pycache__ *.pyc
 prune tests/

diff --git a/gptqmodel/nn_modules/qlinear/pack_block_ext.py b/gptqmodel/nn_modules/qlinear/pack_block_ext.py
@@ -4,64 +4,12 @@
 
 from __future__ import annotations
 
-import logging
-import os
-from pathlib import Path
-from typing import Optional, Tuple
+from typing import Tuple
 
 import torch
 from torch import Tensor
-from torch.utils.cpp_extension import load
 
-
-log = logging.getLogger(__name__)
-
-_EXTENSION = None
-_EXTENSION_INITIALISED = False
-
-
-def _load_extension() -> Optional[object]:
-    global _EXTENSION, _EXTENSION_INITIALISED
-    if hasattr(torch.ops.gptqmodel, "pack_block_cpu"):
-        _EXTENSION_INITIALISED = True
-        _EXTENSION = True
-        return _EXTENSION
-
-    if _EXTENSION_INITIALISED and _EXTENSION is not None:
-        return _EXTENSION
-
-    source_path = Path(__file__).resolve().parents[3] / "pack_block_cpu.cpp"
-    if not source_path.exists():
-        # Fallback to repository root/gptqmodel_ext
-        source_path = Path(__file__).resolve().parents[3] / "gptqmodel_ext" / "pack_block_cpu.cpp"
-    if not source_path.exists():
-        log.debug("pack_block_cpu extension source not found at %s", source_path)
-        _EXTENSION = None
-        _EXTENSION_INITIALISED = True
-        return None
-
-    extra_cflags = ["-O3", "-std=c++17"]
-    extra_ldflags = []
-
-    build_dir = os.environ.get("GPTQMODEL_EXT_BUILD", None)
-
-    try:
-        load(
-            name="gptqmodel_pack_block_cpu",
-            sources=[str(source_path)],
-            extra_cflags=extra_cflags,
-            extra_ldflags=extra_ldflags,
-            build_directory=build_dir,
-            verbose=False,
-            is_python_module=False,
-        )
-        log.debug("pack_block_cpu extension loaded from %s", source_path)
-        _EXTENSION = True
-    except Exception as exc:  # pragma: no cover - environment-specific
-        log.debug("pack_block_cpu extension build failed: %s", exc)
-        _EXTENSION = None
-    _EXTENSION_INITIALISED = True
-    return _EXTENSION
+from gptqmodel.utils.cpp import load_pack_block_extension
 
 
 def pack_block_cpu(
@@ -74,7 +22,7 @@ def pack_block_cpu(
     block_in: int,
     threads: int,
 ) -> Tuple[Tensor, Tensor]:
-    ext = _load_extension()
+    ext = load_pack_block_extension()
     if ext is None:
         raise RuntimeError("pack_block_cpu extension unavailable")
     return torch.ops.gptqmodel.pack_block_cpu(

diff --git a/gptqmodel/utils/cpp.py b/gptqmodel/utils/cpp.py
@@ -0,0 +1,72 @@
+# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai
+# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai
+# SPDX-License-Identifier: Apache-2.0
+# Contact: qubitium@modelcloud.ai, x.com/qubitium
+
+from __future__ import annotations
+
+import logging
+import os
+from pathlib import Path
+from typing import Optional
+
+import torch
+from torch.utils.cpp_extension import load
+
+from .env import env_flag
+
+
+log = logging.getLogger(__name__)
+
+_PACK_BLOCK_EXTENSION: Optional[bool] = None
+_PACK_BLOCK_EXTENSION_INITIALISED = False
+
+
+def load_pack_block_extension(*, verbose: bool = False) -> Optional[object]:
+    """Ensure the pack_block CPU extension is built and loaded."""
+
+    global _PACK_BLOCK_EXTENSION, _PACK_BLOCK_EXTENSION_INITIALISED
+
+    if hasattr(torch.ops.gptqmodel, "pack_block_cpu"):
+        _PACK_BLOCK_EXTENSION_INITIALISED = True
+        _PACK_BLOCK_EXTENSION = True
+        return _PACK_BLOCK_EXTENSION
+
+    if _PACK_BLOCK_EXTENSION_INITIALISED and _PACK_BLOCK_EXTENSION:
+        return _PACK_BLOCK_EXTENSION
+
+    project_root = Path(__file__).resolve().parents[2]
+    source_path = project_root / "pack_block_cpu.cpp"
+    if not source_path.exists():
+        source_path = project_root / "gptqmodel_ext" / "pack_block_cpu.cpp"
+    if not source_path.exists():
+        log.debug("pack_block_cpu extension source not found at %s", source_path)
+        _PACK_BLOCK_EXTENSION = None
+        _PACK_BLOCK_EXTENSION_INITIALISED = True
+        return None
+
+    extra_cflags = ["-O3", "-std=c++17"]
+    extra_ldflags: list[str] = []
+
+    build_dir = os.getenv("GPTQMODEL_EXT_BUILD")
+
+    if not verbose:
+        verbose = env_flag("GPTQMODEL_EXT_VERBOSE", True)
+
+    try:
+        load(
+            name="gptqmodel_pack_block_cpu",
+            sources=[str(source_path)],
+            extra_cflags=extra_cflags,
+            extra_ldflags=extra_ldflags,
+            build_directory=build_dir,
+            verbose=verbose,
+            is_python_module=False,
+        )
+        log.debug("pack_block_cpu extension loaded from %s", source_path)
+        _PACK_BLOCK_EXTENSION = True
+    except Exception as exc:  # pragma: no cover - environment-specific
+        log.debug("pack_block_cpu extension build failed: %s", exc)
+        _PACK_BLOCK_EXTENSION = None
+    _PACK_BLOCK_EXTENSION_INITIALISED = True
+    return _PACK_BLOCK_EXTENSION
diff --git a/gptqmodel_ext/__init__.py b/gptqmodel_ext/__init__.py
@@ -0,0 +1,7 @@
+# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai
+# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai
+# SPDX-License-Identifier: Apache-2.0
+# Contact: qubitium@modelcloud.ai, x.com/qubitium
+"""Support files for GPTQModel native extensions."""
+
+__all__ = []
diff --git a/setup.py b/setup.py
@@ -664,6 +664,7 @@ def run(self):
 setup(
     version = gptqmodel_version,
     packages=find_packages(),
+    include_package_data=True,
     extras_require={
         "test": ["pytest>=8.2.2", "parameterized"],
         "quality": ["ruff==0.13.0", "isort==6.0.1"],