From 31fd19b4434bd54fe7c53e5db5d21086b52b9df1 Mon Sep 17 00:00:00 2001 From: Qubitium Date: Wed, 15 Oct 2025 04:56:41 +0000 Subject: [PATCH 1/4] emit compile logs Signed-off-by: Qubitium --- gptqmodel/nn_modules/qlinear/pack_block_ext.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gptqmodel/nn_modules/qlinear/pack_block_ext.py b/gptqmodel/nn_modules/qlinear/pack_block_ext.py index 856afc4d7..159c91547 100644 --- a/gptqmodel/nn_modules/qlinear/pack_block_ext.py +++ b/gptqmodel/nn_modules/qlinear/pack_block_ext.py @@ -52,7 +52,7 @@ def _load_extension() -> Optional[object]: extra_cflags=extra_cflags, extra_ldflags=extra_ldflags, build_directory=build_dir, - verbose=False, + verbose=True, is_python_module=False, ) log.debug("pack_block_cpu extension loaded from %s", source_path) From 39b25aeaf5a2cc5d71ddadf7e20efc1c34349b8f Mon Sep 17 00:00:00 2001 From: Qubitium Date: Wed, 15 Oct 2025 05:04:05 +0000 Subject: [PATCH 2/4] refractor Signed-off-by: Qubitium --- .../nn_modules/qlinear/pack_block_ext.py | 58 +------------- gptqmodel/utils/cpp.py | 76 +++++++++++++++++++ 2 files changed, 79 insertions(+), 55 deletions(-) create mode 100644 gptqmodel/utils/cpp.py diff --git a/gptqmodel/nn_modules/qlinear/pack_block_ext.py b/gptqmodel/nn_modules/qlinear/pack_block_ext.py index 159c91547..fdb3c6483 100644 --- a/gptqmodel/nn_modules/qlinear/pack_block_ext.py +++ b/gptqmodel/nn_modules/qlinear/pack_block_ext.py @@ -4,64 +4,12 @@ from __future__ import annotations -import logging -import os -from pathlib import Path -from typing import Optional, Tuple +from typing import Tuple import torch from torch import Tensor -from torch.utils.cpp_extension import load - -log = logging.getLogger(__name__) - -_EXTENSION = None -_EXTENSION_INITIALISED = False - - -def _load_extension() -> Optional[object]: - global _EXTENSION, _EXTENSION_INITIALISED - if hasattr(torch.ops.gptqmodel, "pack_block_cpu"): - _EXTENSION_INITIALISED = True - _EXTENSION = True - return _EXTENSION - - if _EXTENSION_INITIALISED and _EXTENSION is not None: - return _EXTENSION - - source_path = Path(__file__).resolve().parents[3] / "pack_block_cpu.cpp" - if not source_path.exists(): - # Fallback to repository root/gptqmodel_ext - source_path = Path(__file__).resolve().parents[3] / "gptqmodel_ext" / "pack_block_cpu.cpp" - if not source_path.exists(): - log.debug("pack_block_cpu extension source not found at %s", source_path) - _EXTENSION = None - _EXTENSION_INITIALISED = True - return None - - extra_cflags = ["-O3", "-std=c++17"] - extra_ldflags = [] - - build_dir = os.environ.get("GPTQMODEL_EXT_BUILD", None) - - try: - load( - name="gptqmodel_pack_block_cpu", - sources=[str(source_path)], - extra_cflags=extra_cflags, - extra_ldflags=extra_ldflags, - build_directory=build_dir, - verbose=True, - is_python_module=False, - ) - log.debug("pack_block_cpu extension loaded from %s", source_path) - _EXTENSION = True - except Exception as exc: # pragma: no cover - environment-specific - log.debug("pack_block_cpu extension build failed: %s", exc) - _EXTENSION = None - _EXTENSION_INITIALISED = True - return _EXTENSION +from gptqmodel.utils.cpp import load_pack_block_extension def pack_block_cpu( @@ -74,7 +22,7 @@ def pack_block_cpu( block_in: int, threads: int, ) -> Tuple[Tensor, Tensor]: - ext = _load_extension() + ext = load_pack_block_extension() if ext is None: raise RuntimeError("pack_block_cpu extension unavailable") return torch.ops.gptqmodel.pack_block_cpu( diff --git a/gptqmodel/utils/cpp.py b/gptqmodel/utils/cpp.py new file mode 100644 index 000000000..bc9ea6f77 --- /dev/null +++ b/gptqmodel/utils/cpp.py @@ -0,0 +1,76 @@ +# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai +# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai +# SPDX-License-Identifier: Apache-2.0 +# Contact: qubitium@modelcloud.ai, x.com/qubitium + +from __future__ import annotations + +import logging +import os +from pathlib import Path +from typing import Optional + +import torch +from torch.utils.cpp_extension import load + +from .env import env_flag + + +log = logging.getLogger(__name__) + +_PACK_BLOCK_EXTENSION: Optional[bool] = None +_PACK_BLOCK_EXTENSION_INITIALISED = False + + +def load_pack_block_extension(*, verbose: bool = False) -> Optional[object]: + """Ensure the pack_block CPU extension is built and loaded. + + Returns ``True`` when the extension is available, ``None`` otherwise. + The function is idempotent and caches its result to avoid repeated builds. + """ + + global _PACK_BLOCK_EXTENSION, _PACK_BLOCK_EXTENSION_INITIALISED + + if hasattr(torch.ops.gptqmodel, "pack_block_cpu"): + _PACK_BLOCK_EXTENSION_INITIALISED = True + _PACK_BLOCK_EXTENSION = True + return _PACK_BLOCK_EXTENSION + + if _PACK_BLOCK_EXTENSION_INITIALISED and _PACK_BLOCK_EXTENSION is not None: + return _PACK_BLOCK_EXTENSION + + project_root = Path(__file__).resolve().parents[2] + source_path = project_root / "pack_block_cpu.cpp" + if not source_path.exists(): + source_path = project_root / "gptqmodel_ext" / "pack_block_cpu.cpp" + if not source_path.exists(): + log.debug("pack_block_cpu extension source not found at %s", source_path) + _PACK_BLOCK_EXTENSION = None + _PACK_BLOCK_EXTENSION_INITIALISED = True + return None + + extra_cflags = ["-O3", "-std=c++17"] + extra_ldflags: list[str] = [] + + build_dir = os.getenv("GPTQMODEL_EXT_BUILD") + + if not verbose: + verbose = env_flag("GPTQMODEL_EXT_VERBOSE", True) + + try: + load( + name="gptqmodel_pack_block_cpu", + sources=[str(source_path)], + extra_cflags=extra_cflags, + extra_ldflags=extra_ldflags, + build_directory=build_dir, + verbose=verbose, + is_python_module=False, + ) + log.debug("pack_block_cpu extension loaded from %s", source_path) + _PACK_BLOCK_EXTENSION = True + except Exception as exc: # pragma: no cover - environment-specific + log.debug("pack_block_cpu extension build failed: %s", exc) + _PACK_BLOCK_EXTENSION = None + _PACK_BLOCK_EXTENSION_INITIALISED = True + return _PACK_BLOCK_EXTENSION From d296e60ce88204d39bf8a6f82303eb727fc94ea8 Mon Sep 17 00:00:00 2001 From: Qubitium Date: Wed, 15 Oct 2025 05:55:45 +0000 Subject: [PATCH 3/4] cleanup Signed-off-by: Qubitium --- gptqmodel/utils/cpp.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/gptqmodel/utils/cpp.py b/gptqmodel/utils/cpp.py index bc9ea6f77..5de8e2ede 100644 --- a/gptqmodel/utils/cpp.py +++ b/gptqmodel/utils/cpp.py @@ -23,11 +23,7 @@ def load_pack_block_extension(*, verbose: bool = False) -> Optional[object]: - """Ensure the pack_block CPU extension is built and loaded. - - Returns ``True`` when the extension is available, ``None`` otherwise. - The function is idempotent and caches its result to avoid repeated builds. - """ + """Ensure the pack_block CPU extension is built and loaded.""" global _PACK_BLOCK_EXTENSION, _PACK_BLOCK_EXTENSION_INITIALISED @@ -36,7 +32,7 @@ def load_pack_block_extension(*, verbose: bool = False) -> Optional[object]: _PACK_BLOCK_EXTENSION = True return _PACK_BLOCK_EXTENSION - if _PACK_BLOCK_EXTENSION_INITIALISED and _PACK_BLOCK_EXTENSION is not None: + if _PACK_BLOCK_EXTENSION_INITIALISED and _PACK_BLOCK_EXTENSION: return _PACK_BLOCK_EXTENSION project_root = Path(__file__).resolve().parents[2] From 5e2afe0486f1513068555a5f90d48a9383e78284 Mon Sep 17 00:00:00 2001 From: Qubitium Date: Wed, 15 Oct 2025 06:04:02 +0000 Subject: [PATCH 4/4] make sure packer cpp is included Signed-off-by: Qubitium --- MANIFEST.in | 1 + gptqmodel_ext/__init__.py | 7 +++++++ setup.py | 1 + 3 files changed, 9 insertions(+) create mode 100644 gptqmodel_ext/__init__.py diff --git a/MANIFEST.in b/MANIFEST.in index b3715a358..9efddd22b 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -4,6 +4,7 @@ recursive-include gptqmodel_ext/exllamav2 *.h *.cuh *.cu *.cpp recursive-include gptqmodel_ext/exllama_eora/eora *.h *.cuh *.cu *.cpp *.py recursive-include gptqmodel_ext/marlin *.h *.cuh *.cu *.cpp recursive-include gptqmodel_ext/qqq *.h *.cuh *.cu *.cpp +include gptqmodel_ext/pack_block_cpu.cpp include gptqmodel_ext/marlin/generate_kernels.py recursive-exclude gptqmodel_ext __pycache__ *.pyc prune tests/ diff --git a/gptqmodel_ext/__init__.py b/gptqmodel_ext/__init__.py new file mode 100644 index 000000000..2a40b7225 --- /dev/null +++ b/gptqmodel_ext/__init__.py @@ -0,0 +1,7 @@ +# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai +# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai +# SPDX-License-Identifier: Apache-2.0 +# Contact: qubitium@modelcloud.ai, x.com/qubitium +"""Support files for GPTQModel native extensions.""" + +__all__ = [] diff --git a/setup.py b/setup.py index 820335b33..271ee8362 100644 --- a/setup.py +++ b/setup.py @@ -664,6 +664,7 @@ def run(self): setup( version = gptqmodel_version, packages=find_packages(), + include_package_data=True, extras_require={ "test": ["pytest>=8.2.2", "parameterized"], "quality": ["ruff==0.13.0", "isort==6.0.1"],