From 1b57a67521acebdddd2aa6240633712e73a99f96 Mon Sep 17 00:00:00 2001 From: CSY-ModelCloud Date: Wed, 22 Oct 2025 15:58:35 +0800 Subject: [PATCH 1/3] include package data --- setup.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 7fd343759..c84893a78 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,6 @@ from setuptools import find_namespace_packages, find_packages, setup from setuptools.command.bdist_wheel import bdist_wheel as _bdist_wheel - CUTLASS_VERSION = "3.5.0" CUTLASS_RELEASE_URL = f"https://github.com/NVIDIA/cutlass/archive/refs/tags/v{CUTLASS_VERSION}.tar.gz" @@ -420,6 +419,7 @@ def _resolve_wheel_url(tag_name: str, wheel_name: str) -> str: # Fallback: default GitHub template return DEFAULT_WHEEL_URL_TEMPLATE.format(tag_name=tag_name, wheel_name=wheel_name) + # Decide HAS_CUDA_V8 / HAS_CUDA_V9 without torch HAS_CUDA_V8 = False HAS_CUDA_V9 = False @@ -526,7 +526,7 @@ def _env_enabled_any(names, default="1") -> bool: print(f"Using NVCC_THREADS={nvcc_threads} for per-invocation NVCC concurrency.") # Optional conda CUDA runtime headers - #conda_cuda_include_dir = os.path.join(get_python_lib(), "nvidia/cuda_runtime/include") + # conda_cuda_include_dir = os.path.join(get_python_lib(), "nvidia/cuda_runtime/include") # if os.path.isdir(conda_cuda_include_dir): # include_dirs.append(conda_cuda_include_dir) # print(f"appending conda cuda include dir {conda_cuda_include_dir}") @@ -786,6 +786,8 @@ def _hipify_compile_flags(flags): additional_setup_kwargs = { "ext_modules": extensions, + "include_package_data": True, + "package_data": {"": ["build/lib/*.so"]}, "cmdclass": {"build_ext": cpp_ext.BuildExtension.with_options( use_ninja=True, no_python_abi_suffix=True, @@ -850,7 +852,7 @@ def run(self): _packages.append(_pkg) setup( - version = gptqmodel_version, + version=gptqmodel_version, packages=_packages, include_package_data=True, extras_require={ From 3517cc44206a135bbdd0bee779160afed0ffc232 Mon Sep 17 00:00:00 2001 From: CSY-ModelCloud Date: Wed, 22 Oct 2025 16:46:14 +0800 Subject: [PATCH 2/3] disabele build_lib --- setup.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/setup.py b/setup.py index c84893a78..aefb3cd17 100644 --- a/setup.py +++ b/setup.py @@ -614,7 +614,7 @@ def _hipify_compile_flags(flags): # Extensions (gate marlin/qqq/eora/exllamav2 on CUDA sm_80+ and non-ROCm) if sys.platform != "win32": if not ROCM_VERSION and HAS_CUDA_V8: - if BUILD_MARLIN: + if False and BUILD_MARLIN: marlin_kernel_dir = Path("gptqmodel_ext/marlin") marlin_kernel_files = sorted(marlin_kernel_dir.glob("kernel_*.cu")) @@ -645,7 +645,7 @@ def _hipify_compile_flags(flags): ) ] - if BUILD_MACHETE and HAS_CUDA_V9 and _version_geq(NVCC_VERSION, 12, 0): + if False and BUILD_MACHETE and HAS_CUDA_V9 and _version_geq(NVCC_VERSION, 12, 0): try: result = subprocess.run( [sys.executable, "gptqmodel_ext/machete/generate.py"], @@ -686,7 +686,7 @@ def _hipify_compile_flags(flags): ) ] - if BUILD_QQQ: + if False and BUILD_QQQ: extensions += [ cpp_ext.CUDAExtension( "gptqmodel_qqq_kernels", @@ -699,7 +699,7 @@ def _hipify_compile_flags(flags): ) ] - if BUILD_EORA: + if False and BUILD_EORA: extensions += [ cpp_ext.CUDAExtension( "gptqmodel_exllama_eora", @@ -711,7 +711,7 @@ def _hipify_compile_flags(flags): extra_compile_args=extra_compile_args, ) ] - if BUILD_EXLLAMA_V2: + if False and BUILD_EXLLAMA_V2: extensions += [ cpp_ext.CUDAExtension( "gptqmodel_exllamav2_kernels", @@ -726,7 +726,7 @@ def _hipify_compile_flags(flags): ] # both CUDA and ROCm compatible - if BUILD_EXLLAMA_V1: + if True: extensions += [ cpp_ext.CUDAExtension( "gptqmodel_exllama_kernels", @@ -742,7 +742,7 @@ def _hipify_compile_flags(flags): ) ] - if BUILD_AWQ: + if False and BUILD_AWQ: if ROCM_VERSION: print("Skipping AWQ kernels on ROCm: inline PTX is CUDA-only.") else: @@ -786,13 +786,13 @@ def _hipify_compile_flags(flags): additional_setup_kwargs = { "ext_modules": extensions, - "include_package_data": True, - "package_data": {"": ["build/lib/*.so"]}, + # "include_package_data": True, + # "package_data": {"": ["build/lib/*.so"]}, "cmdclass": {"build_ext": cpp_ext.BuildExtension.with_options( use_ninja=True, no_python_abi_suffix=True, build_temp="build/temp", - build_lib="build/lib", + # build_lib="build/lib", TODO FIX ME why package_data doesn't work.. clean_first=False # keep intermediates for reuse )}, } From f4ba974de514c21b864ba838d2f07ad3cff21ea6 Mon Sep 17 00:00:00 2001 From: CSY-ModelCloud Date: Wed, 22 Oct 2025 16:48:41 +0800 Subject: [PATCH 3/3] remove test codes --- setup.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/setup.py b/setup.py index aefb3cd17..9a83f75fc 100644 --- a/setup.py +++ b/setup.py @@ -614,7 +614,7 @@ def _hipify_compile_flags(flags): # Extensions (gate marlin/qqq/eora/exllamav2 on CUDA sm_80+ and non-ROCm) if sys.platform != "win32": if not ROCM_VERSION and HAS_CUDA_V8: - if False and BUILD_MARLIN: + if BUILD_MARLIN: marlin_kernel_dir = Path("gptqmodel_ext/marlin") marlin_kernel_files = sorted(marlin_kernel_dir.glob("kernel_*.cu")) @@ -645,7 +645,7 @@ def _hipify_compile_flags(flags): ) ] - if False and BUILD_MACHETE and HAS_CUDA_V9 and _version_geq(NVCC_VERSION, 12, 0): + if BUILD_MACHETE and HAS_CUDA_V9 and _version_geq(NVCC_VERSION, 12, 0): try: result = subprocess.run( [sys.executable, "gptqmodel_ext/machete/generate.py"], @@ -686,7 +686,7 @@ def _hipify_compile_flags(flags): ) ] - if False and BUILD_QQQ: + if BUILD_QQQ: extensions += [ cpp_ext.CUDAExtension( "gptqmodel_qqq_kernels", @@ -699,7 +699,7 @@ def _hipify_compile_flags(flags): ) ] - if False and BUILD_EORA: + if BUILD_EORA: extensions += [ cpp_ext.CUDAExtension( "gptqmodel_exllama_eora", @@ -711,7 +711,7 @@ def _hipify_compile_flags(flags): extra_compile_args=extra_compile_args, ) ] - if False and BUILD_EXLLAMA_V2: + if BUILD_EXLLAMA_V2: extensions += [ cpp_ext.CUDAExtension( "gptqmodel_exllamav2_kernels", @@ -726,7 +726,7 @@ def _hipify_compile_flags(flags): ] # both CUDA and ROCm compatible - if True: + if BUILD_EXLLAMA_V1: extensions += [ cpp_ext.CUDAExtension( "gptqmodel_exllama_kernels", @@ -742,7 +742,7 @@ def _hipify_compile_flags(flags): ) ] - if False and BUILD_AWQ: + if BUILD_AWQ: if ROCM_VERSION: print("Skipping AWQ kernels on ROCm: inline PTX is CUDA-only.") else: