From 31fd19b4434bd54fe7c53e5db5d21086b52b9df1 Mon Sep 17 00:00:00 2001
From: Qubitium <qubitium@modelcloud.ai>
Date: Wed, 15 Oct 2025 04:56:41 +0000
Subject: [PATCH 1/4] emit compile logs

Signed-off-by: Qubitium <qubitium@modelcloud.ai>
---
 gptqmodel/nn_modules/qlinear/pack_block_ext.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gptqmodel/nn_modules/qlinear/pack_block_ext.py b/gptqmodel/nn_modules/qlinear/pack_block_ext.py
index 856afc4d7..159c91547 100644
--- a/gptqmodel/nn_modules/qlinear/pack_block_ext.py
+++ b/gptqmodel/nn_modules/qlinear/pack_block_ext.py
@@ -52,7 +52,7 @@ def _load_extension() -> Optional[object]:
             extra_cflags=extra_cflags,
             extra_ldflags=extra_ldflags,
             build_directory=build_dir,
-            verbose=False,
+            verbose=True,
             is_python_module=False,
         )
         log.debug("pack_block_cpu extension loaded from %s", source_path)

From 39b25aeaf5a2cc5d71ddadf7e20efc1c34349b8f Mon Sep 17 00:00:00 2001
From: Qubitium <qubitium@modelcloud.ai>
Date: Wed, 15 Oct 2025 05:04:05 +0000
Subject: [PATCH 2/4] refractor

Signed-off-by: Qubitium <qubitium@modelcloud.ai>
---
 .../nn_modules/qlinear/pack_block_ext.py      | 58 +-------------
 gptqmodel/utils/cpp.py                        | 76 +++++++++++++++++++
 2 files changed, 79 insertions(+), 55 deletions(-)
 create mode 100644 gptqmodel/utils/cpp.py

diff --git a/gptqmodel/nn_modules/qlinear/pack_block_ext.py b/gptqmodel/nn_modules/qlinear/pack_block_ext.py
index 159c91547..fdb3c6483 100644
--- a/gptqmodel/nn_modules/qlinear/pack_block_ext.py
+++ b/gptqmodel/nn_modules/qlinear/pack_block_ext.py
@@ -4,64 +4,12 @@
 
 from __future__ import annotations
 
-import logging
-import os
-from pathlib import Path
-from typing import Optional, Tuple
+from typing import Tuple
 
 import torch
 from torch import Tensor
-from torch.utils.cpp_extension import load
 
-
-log = logging.getLogger(__name__)
-
-_EXTENSION = None
-_EXTENSION_INITIALISED = False
-
-
-def _load_extension() -> Optional[object]:
-    global _EXTENSION, _EXTENSION_INITIALISED
-    if hasattr(torch.ops.gptqmodel, "pack_block_cpu"):
-        _EXTENSION_INITIALISED = True
-        _EXTENSION = True
-        return _EXTENSION
-
-    if _EXTENSION_INITIALISED and _EXTENSION is not None:
-        return _EXTENSION
-
-    source_path = Path(__file__).resolve().parents[3] / "pack_block_cpu.cpp"
-    if not source_path.exists():
-        # Fallback to repository root/gptqmodel_ext
-        source_path = Path(__file__).resolve().parents[3] / "gptqmodel_ext" / "pack_block_cpu.cpp"
-    if not source_path.exists():
-        log.debug("pack_block_cpu extension source not found at %s", source_path)
-        _EXTENSION = None
-        _EXTENSION_INITIALISED = True
-        return None
-
-    extra_cflags = ["-O3", "-std=c++17"]
-    extra_ldflags = []
-
-    build_dir = os.environ.get("GPTQMODEL_EXT_BUILD", None)
-
-    try:
-        load(
-            name="gptqmodel_pack_block_cpu",
-            sources=[str(source_path)],
-            extra_cflags=extra_cflags,
-            extra_ldflags=extra_ldflags,
-            build_directory=build_dir,
-            verbose=True,
-            is_python_module=False,
-        )
-        log.debug("pack_block_cpu extension loaded from %s", source_path)
-        _EXTENSION = True
-    except Exception as exc:  # pragma: no cover - environment-specific
-        log.debug("pack_block_cpu extension build failed: %s", exc)
-        _EXTENSION = None
-    _EXTENSION_INITIALISED = True
-    return _EXTENSION
+from gptqmodel.utils.cpp import load_pack_block_extension
 
 
 def pack_block_cpu(
@@ -74,7 +22,7 @@ def pack_block_cpu(
     block_in: int,
     threads: int,
 ) -> Tuple[Tensor, Tensor]:
-    ext = _load_extension()
+    ext = load_pack_block_extension()
     if ext is None:
         raise RuntimeError("pack_block_cpu extension unavailable")
     return torch.ops.gptqmodel.pack_block_cpu(
diff --git a/gptqmodel/utils/cpp.py b/gptqmodel/utils/cpp.py
new file mode 100644
index 000000000..bc9ea6f77
--- /dev/null
+++ b/gptqmodel/utils/cpp.py
@@ -0,0 +1,76 @@
+# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai
+# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai
+# SPDX-License-Identifier: Apache-2.0
+# Contact: qubitium@modelcloud.ai, x.com/qubitium
+
+from __future__ import annotations
+
+import logging
+import os
+from pathlib import Path
+from typing import Optional
+
+import torch
+from torch.utils.cpp_extension import load
+
+from .env import env_flag
+
+
+log = logging.getLogger(__name__)
+
+_PACK_BLOCK_EXTENSION: Optional[bool] = None
+_PACK_BLOCK_EXTENSION_INITIALISED = False
+
+
+def load_pack_block_extension(*, verbose: bool = False) -> Optional[object]:
+    """Ensure the pack_block CPU extension is built and loaded.
+
+    Returns ``True`` when the extension is available, ``None`` otherwise.
+    The function is idempotent and caches its result to avoid repeated builds.
+    """
+
+    global _PACK_BLOCK_EXTENSION, _PACK_BLOCK_EXTENSION_INITIALISED
+
+    if hasattr(torch.ops.gptqmodel, "pack_block_cpu"):
+        _PACK_BLOCK_EXTENSION_INITIALISED = True
+        _PACK_BLOCK_EXTENSION = True
+        return _PACK_BLOCK_EXTENSION
+
+    if _PACK_BLOCK_EXTENSION_INITIALISED and _PACK_BLOCK_EXTENSION is not None:
+        return _PACK_BLOCK_EXTENSION
+
+    project_root = Path(__file__).resolve().parents[2]
+    source_path = project_root / "pack_block_cpu.cpp"
+    if not source_path.exists():
+        source_path = project_root / "gptqmodel_ext" / "pack_block_cpu.cpp"
+    if not source_path.exists():
+        log.debug("pack_block_cpu extension source not found at %s", source_path)
+        _PACK_BLOCK_EXTENSION = None
+        _PACK_BLOCK_EXTENSION_INITIALISED = True
+        return None
+
+    extra_cflags = ["-O3", "-std=c++17"]
+    extra_ldflags: list[str] = []
+
+    build_dir = os.getenv("GPTQMODEL_EXT_BUILD")
+
+    if not verbose:
+        verbose = env_flag("GPTQMODEL_EXT_VERBOSE", True)
+
+    try:
+        load(
+            name="gptqmodel_pack_block_cpu",
+            sources=[str(source_path)],
+            extra_cflags=extra_cflags,
+            extra_ldflags=extra_ldflags,
+            build_directory=build_dir,
+            verbose=verbose,
+            is_python_module=False,
+        )
+        log.debug("pack_block_cpu extension loaded from %s", source_path)
+        _PACK_BLOCK_EXTENSION = True
+    except Exception as exc:  # pragma: no cover - environment-specific
+        log.debug("pack_block_cpu extension build failed: %s", exc)
+        _PACK_BLOCK_EXTENSION = None
+    _PACK_BLOCK_EXTENSION_INITIALISED = True
+    return _PACK_BLOCK_EXTENSION

From d296e60ce88204d39bf8a6f82303eb727fc94ea8 Mon Sep 17 00:00:00 2001
From: Qubitium <qubitium@modelcloud.ai>
Date: Wed, 15 Oct 2025 05:55:45 +0000
Subject: [PATCH 3/4] cleanup

Signed-off-by: Qubitium <qubitium@modelcloud.ai>
---
 gptqmodel/utils/cpp.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/gptqmodel/utils/cpp.py b/gptqmodel/utils/cpp.py
index bc9ea6f77..5de8e2ede 100644
--- a/gptqmodel/utils/cpp.py
+++ b/gptqmodel/utils/cpp.py
@@ -23,11 +23,7 @@
 
 
 def load_pack_block_extension(*, verbose: bool = False) -> Optional[object]:
-    """Ensure the pack_block CPU extension is built and loaded.
-
-    Returns ``True`` when the extension is available, ``None`` otherwise.
-    The function is idempotent and caches its result to avoid repeated builds.
-    """
+    """Ensure the pack_block CPU extension is built and loaded."""
 
     global _PACK_BLOCK_EXTENSION, _PACK_BLOCK_EXTENSION_INITIALISED
 
@@ -36,7 +32,7 @@ def load_pack_block_extension(*, verbose: bool = False) -> Optional[object]:
         _PACK_BLOCK_EXTENSION = True
         return _PACK_BLOCK_EXTENSION
 
-    if _PACK_BLOCK_EXTENSION_INITIALISED and _PACK_BLOCK_EXTENSION is not None:
+    if _PACK_BLOCK_EXTENSION_INITIALISED and _PACK_BLOCK_EXTENSION:
         return _PACK_BLOCK_EXTENSION
 
     project_root = Path(__file__).resolve().parents[2]

From 5e2afe0486f1513068555a5f90d48a9383e78284 Mon Sep 17 00:00:00 2001
From: Qubitium <qubitium@modelcloud.ai>
Date: Wed, 15 Oct 2025 06:04:02 +0000
Subject: [PATCH 4/4] make sure packer cpp is included

Signed-off-by: Qubitium <qubitium@modelcloud.ai>
---
 MANIFEST.in               | 1 +
 gptqmodel_ext/__init__.py | 7 +++++++
 setup.py                  | 1 +
 3 files changed, 9 insertions(+)
 create mode 100644 gptqmodel_ext/__init__.py

diff --git a/MANIFEST.in b/MANIFEST.in
index b3715a358..9efddd22b 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -4,6 +4,7 @@ recursive-include gptqmodel_ext/exllamav2 *.h *.cuh *.cu *.cpp
 recursive-include gptqmodel_ext/exllama_eora/eora *.h *.cuh *.cu *.cpp *.py
 recursive-include gptqmodel_ext/marlin *.h *.cuh *.cu *.cpp
 recursive-include gptqmodel_ext/qqq *.h *.cuh *.cu *.cpp
+include gptqmodel_ext/pack_block_cpu.cpp
 include gptqmodel_ext/marlin/generate_kernels.py
 recursive-exclude gptqmodel_ext __pycache__ *.pyc
 prune tests/
diff --git a/gptqmodel_ext/__init__.py b/gptqmodel_ext/__init__.py
new file mode 100644
index 000000000..2a40b7225
--- /dev/null
+++ b/gptqmodel_ext/__init__.py
@@ -0,0 +1,7 @@
+# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai
+# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai
+# SPDX-License-Identifier: Apache-2.0
+# Contact: qubitium@modelcloud.ai, x.com/qubitium
+"""Support files for GPTQModel native extensions."""
+
+__all__ = []
diff --git a/setup.py b/setup.py
index 820335b33..271ee8362 100644
--- a/setup.py
+++ b/setup.py
@@ -664,6 +664,7 @@ def run(self):
 setup(
     version = gptqmodel_version,
     packages=find_packages(),
+    include_package_data=True,
     extras_require={
         "test": ["pytest>=8.2.2", "parameterized"],
         "quality": ["ruff==0.13.0", "isort==6.0.1"],