Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ recursive-include gptqmodel_ext/exllamav2 *.h *.cuh *.cu *.cpp
recursive-include gptqmodel_ext/exllama_eora/eora *.h *.cuh *.cu *.cpp *.py
recursive-include gptqmodel_ext/marlin *.h *.cuh *.cu *.cpp
recursive-include gptqmodel_ext/qqq *.h *.cuh *.cu *.cpp
include gptqmodel_ext/pack_block_cpu.cpp
include gptqmodel_ext/marlin/generate_kernels.py
recursive-exclude gptqmodel_ext __pycache__ *.pyc
prune tests/
Expand Down
58 changes: 3 additions & 55 deletions gptqmodel/nn_modules/qlinear/pack_block_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,64 +4,12 @@

from __future__ import annotations

import logging
import os
from pathlib import Path
from typing import Optional, Tuple
from typing import Tuple

import torch
from torch import Tensor
from torch.utils.cpp_extension import load


log = logging.getLogger(__name__)

_EXTENSION = None
_EXTENSION_INITIALISED = False


def _load_extension() -> Optional[object]:
global _EXTENSION, _EXTENSION_INITIALISED
if hasattr(torch.ops.gptqmodel, "pack_block_cpu"):
_EXTENSION_INITIALISED = True
_EXTENSION = True
return _EXTENSION

if _EXTENSION_INITIALISED and _EXTENSION is not None:
return _EXTENSION

source_path = Path(__file__).resolve().parents[3] / "pack_block_cpu.cpp"
if not source_path.exists():
# Fallback to repository root/gptqmodel_ext
source_path = Path(__file__).resolve().parents[3] / "gptqmodel_ext" / "pack_block_cpu.cpp"
if not source_path.exists():
log.debug("pack_block_cpu extension source not found at %s", source_path)
_EXTENSION = None
_EXTENSION_INITIALISED = True
return None

extra_cflags = ["-O3", "-std=c++17"]
extra_ldflags = []

build_dir = os.environ.get("GPTQMODEL_EXT_BUILD", None)

try:
load(
name="gptqmodel_pack_block_cpu",
sources=[str(source_path)],
extra_cflags=extra_cflags,
extra_ldflags=extra_ldflags,
build_directory=build_dir,
verbose=False,
is_python_module=False,
)
log.debug("pack_block_cpu extension loaded from %s", source_path)
_EXTENSION = True
except Exception as exc: # pragma: no cover - environment-specific
log.debug("pack_block_cpu extension build failed: %s", exc)
_EXTENSION = None
_EXTENSION_INITIALISED = True
return _EXTENSION
from gptqmodel.utils.cpp import load_pack_block_extension


def pack_block_cpu(
Expand All @@ -74,7 +22,7 @@ def pack_block_cpu(
block_in: int,
threads: int,
) -> Tuple[Tensor, Tensor]:
ext = _load_extension()
ext = load_pack_block_extension()
if ext is None:
raise RuntimeError("pack_block_cpu extension unavailable")
return torch.ops.gptqmodel.pack_block_cpu(
Expand Down
72 changes: 72 additions & 0 deletions gptqmodel/utils/cpp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai
# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai
# SPDX-License-Identifier: Apache-2.0
# Contact: qubitium@modelcloud.ai, x.com/qubitium

from __future__ import annotations

import logging
import os
from pathlib import Path
from typing import Optional

import torch
from torch.utils.cpp_extension import load

from .env import env_flag


log = logging.getLogger(__name__)

_PACK_BLOCK_EXTENSION: Optional[bool] = None
_PACK_BLOCK_EXTENSION_INITIALISED = False


def load_pack_block_extension(*, verbose: bool = False) -> Optional[object]:
"""Ensure the pack_block CPU extension is built and loaded."""

global _PACK_BLOCK_EXTENSION, _PACK_BLOCK_EXTENSION_INITIALISED

if hasattr(torch.ops.gptqmodel, "pack_block_cpu"):
_PACK_BLOCK_EXTENSION_INITIALISED = True
_PACK_BLOCK_EXTENSION = True
return _PACK_BLOCK_EXTENSION

if _PACK_BLOCK_EXTENSION_INITIALISED and _PACK_BLOCK_EXTENSION:
return _PACK_BLOCK_EXTENSION

project_root = Path(__file__).resolve().parents[2]
source_path = project_root / "pack_block_cpu.cpp"
if not source_path.exists():
source_path = project_root / "gptqmodel_ext" / "pack_block_cpu.cpp"
if not source_path.exists():
log.debug("pack_block_cpu extension source not found at %s", source_path)
_PACK_BLOCK_EXTENSION = None
_PACK_BLOCK_EXTENSION_INITIALISED = True
return None

extra_cflags = ["-O3", "-std=c++17"]
extra_ldflags: list[str] = []

build_dir = os.getenv("GPTQMODEL_EXT_BUILD")

if not verbose:
verbose = env_flag("GPTQMODEL_EXT_VERBOSE", True)

try:
load(
name="gptqmodel_pack_block_cpu",
sources=[str(source_path)],
extra_cflags=extra_cflags,
extra_ldflags=extra_ldflags,
build_directory=build_dir,
verbose=verbose,
is_python_module=False,
)
log.debug("pack_block_cpu extension loaded from %s", source_path)
_PACK_BLOCK_EXTENSION = True
except Exception as exc: # pragma: no cover - environment-specific
log.debug("pack_block_cpu extension build failed: %s", exc)
_PACK_BLOCK_EXTENSION = None
_PACK_BLOCK_EXTENSION_INITIALISED = True
return _PACK_BLOCK_EXTENSION
7 changes: 7 additions & 0 deletions gptqmodel_ext/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai
# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai
# SPDX-License-Identifier: Apache-2.0
# Contact: qubitium@modelcloud.ai, x.com/qubitium
"""Support files for GPTQModel native extensions."""

__all__ = []
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,7 @@ def run(self):
setup(
version = gptqmodel_version,
packages=find_packages(),
include_package_data=True,
extras_require={
"test": ["pytest>=8.2.2", "parameterized"],
"quality": ["ruff==0.13.0", "isort==6.0.1"],
Expand Down