diff --git a/MANIFEST.in b/MANIFEST.in index b3715a358..9efddd22b 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -4,6 +4,7 @@ recursive-include gptqmodel_ext/exllamav2 *.h *.cuh *.cu *.cpp recursive-include gptqmodel_ext/exllama_eora/eora *.h *.cuh *.cu *.cpp *.py recursive-include gptqmodel_ext/marlin *.h *.cuh *.cu *.cpp recursive-include gptqmodel_ext/qqq *.h *.cuh *.cu *.cpp +include gptqmodel_ext/pack_block_cpu.cpp include gptqmodel_ext/marlin/generate_kernels.py recursive-exclude gptqmodel_ext __pycache__ *.pyc prune tests/ diff --git a/gptqmodel/nn_modules/qlinear/pack_block_ext.py b/gptqmodel/nn_modules/qlinear/pack_block_ext.py index 856afc4d7..fdb3c6483 100644 --- a/gptqmodel/nn_modules/qlinear/pack_block_ext.py +++ b/gptqmodel/nn_modules/qlinear/pack_block_ext.py @@ -4,64 +4,12 @@ from __future__ import annotations -import logging -import os -from pathlib import Path -from typing import Optional, Tuple +from typing import Tuple import torch from torch import Tensor -from torch.utils.cpp_extension import load - -log = logging.getLogger(__name__) - -_EXTENSION = None -_EXTENSION_INITIALISED = False - - -def _load_extension() -> Optional[object]: - global _EXTENSION, _EXTENSION_INITIALISED - if hasattr(torch.ops.gptqmodel, "pack_block_cpu"): - _EXTENSION_INITIALISED = True - _EXTENSION = True - return _EXTENSION - - if _EXTENSION_INITIALISED and _EXTENSION is not None: - return _EXTENSION - - source_path = Path(__file__).resolve().parents[3] / "pack_block_cpu.cpp" - if not source_path.exists(): - # Fallback to repository root/gptqmodel_ext - source_path = Path(__file__).resolve().parents[3] / "gptqmodel_ext" / "pack_block_cpu.cpp" - if not source_path.exists(): - log.debug("pack_block_cpu extension source not found at %s", source_path) - _EXTENSION = None - _EXTENSION_INITIALISED = True - return None - - extra_cflags = ["-O3", "-std=c++17"] - extra_ldflags = [] - - build_dir = os.environ.get("GPTQMODEL_EXT_BUILD", None) - - try: - load( - name="gptqmodel_pack_block_cpu", - sources=[str(source_path)], - extra_cflags=extra_cflags, - extra_ldflags=extra_ldflags, - build_directory=build_dir, - verbose=False, - is_python_module=False, - ) - log.debug("pack_block_cpu extension loaded from %s", source_path) - _EXTENSION = True - except Exception as exc: # pragma: no cover - environment-specific - log.debug("pack_block_cpu extension build failed: %s", exc) - _EXTENSION = None - _EXTENSION_INITIALISED = True - return _EXTENSION +from gptqmodel.utils.cpp import load_pack_block_extension def pack_block_cpu( @@ -74,7 +22,7 @@ def pack_block_cpu( block_in: int, threads: int, ) -> Tuple[Tensor, Tensor]: - ext = _load_extension() + ext = load_pack_block_extension() if ext is None: raise RuntimeError("pack_block_cpu extension unavailable") return torch.ops.gptqmodel.pack_block_cpu( diff --git a/gptqmodel/utils/cpp.py b/gptqmodel/utils/cpp.py new file mode 100644 index 000000000..5de8e2ede --- /dev/null +++ b/gptqmodel/utils/cpp.py @@ -0,0 +1,72 @@ +# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai +# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai +# SPDX-License-Identifier: Apache-2.0 +# Contact: qubitium@modelcloud.ai, x.com/qubitium + +from __future__ import annotations + +import logging +import os +from pathlib import Path +from typing import Optional + +import torch +from torch.utils.cpp_extension import load + +from .env import env_flag + + +log = logging.getLogger(__name__) + +_PACK_BLOCK_EXTENSION: Optional[bool] = None +_PACK_BLOCK_EXTENSION_INITIALISED = False + + +def load_pack_block_extension(*, verbose: bool = False) -> Optional[object]: + """Ensure the pack_block CPU extension is built and loaded.""" + + global _PACK_BLOCK_EXTENSION, _PACK_BLOCK_EXTENSION_INITIALISED + + if hasattr(torch.ops.gptqmodel, "pack_block_cpu"): + _PACK_BLOCK_EXTENSION_INITIALISED = True + _PACK_BLOCK_EXTENSION = True + return _PACK_BLOCK_EXTENSION + + if _PACK_BLOCK_EXTENSION_INITIALISED and _PACK_BLOCK_EXTENSION: + return _PACK_BLOCK_EXTENSION + + project_root = Path(__file__).resolve().parents[2] + source_path = project_root / "pack_block_cpu.cpp" + if not source_path.exists(): + source_path = project_root / "gptqmodel_ext" / "pack_block_cpu.cpp" + if not source_path.exists(): + log.debug("pack_block_cpu extension source not found at %s", source_path) + _PACK_BLOCK_EXTENSION = None + _PACK_BLOCK_EXTENSION_INITIALISED = True + return None + + extra_cflags = ["-O3", "-std=c++17"] + extra_ldflags: list[str] = [] + + build_dir = os.getenv("GPTQMODEL_EXT_BUILD") + + if not verbose: + verbose = env_flag("GPTQMODEL_EXT_VERBOSE", True) + + try: + load( + name="gptqmodel_pack_block_cpu", + sources=[str(source_path)], + extra_cflags=extra_cflags, + extra_ldflags=extra_ldflags, + build_directory=build_dir, + verbose=verbose, + is_python_module=False, + ) + log.debug("pack_block_cpu extension loaded from %s", source_path) + _PACK_BLOCK_EXTENSION = True + except Exception as exc: # pragma: no cover - environment-specific + log.debug("pack_block_cpu extension build failed: %s", exc) + _PACK_BLOCK_EXTENSION = None + _PACK_BLOCK_EXTENSION_INITIALISED = True + return _PACK_BLOCK_EXTENSION diff --git a/gptqmodel_ext/__init__.py b/gptqmodel_ext/__init__.py new file mode 100644 index 000000000..2a40b7225 --- /dev/null +++ b/gptqmodel_ext/__init__.py @@ -0,0 +1,7 @@ +# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai +# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai +# SPDX-License-Identifier: Apache-2.0 +# Contact: qubitium@modelcloud.ai, x.com/qubitium +"""Support files for GPTQModel native extensions.""" + +__all__ = [] diff --git a/setup.py b/setup.py index 820335b33..271ee8362 100644 --- a/setup.py +++ b/setup.py @@ -664,6 +664,7 @@ def run(self): setup( version = gptqmodel_version, packages=find_packages(), + include_package_data=True, extras_require={ "test": ["pytest>=8.2.2", "parameterized"], "quality": ["ruff==0.13.0", "isort==6.0.1"],