diff --git a/gptqmodel/cli/__init__.py b/gptqmodel/cli/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/gptqmodel/cli/env.py b/gptqmodel/cli/env.py new file mode 100644 index 000000000..f276ad25b --- /dev/null +++ b/gptqmodel/cli/env.py @@ -0,0 +1,984 @@ +# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai +# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai +# SPDX-License-Identifier: Apache-2.0 +# Contact: qubitium@modelcloud.ai, x.com/qubitium +import argparse +# ruff: noqa +# code borrowed from https://github.com/pytorch/pytorch/blob/main/torch/utils/collect_env.py + +# mypy: allow-untyped-defs + +# Unlike the rest of the PyTorch this file must be python2 compliant. +# This script outputs relevant system environment info +# Run it with `python collect_env.py` or `python -m torch.utils.collect_env` +import datetime +import json +import locale +import os +import re +import subprocess +import sys +from collections import namedtuple +from typing import cast as _cast + + +try: + import torch + + TORCH_AVAILABLE = True +except (ImportError, NameError, AttributeError, OSError): + TORCH_AVAILABLE = False + +# System Environment Information +SystemEnv = namedtuple( + "SystemEnv", + [ + "torch_version", + "is_debug_build", + "cuda_compiled_version", + "gcc_version", + "clang_version", + "cmake_version", + "os", + "libc_version", + "python_version", + "python_platform", + "is_cuda_available", + "cuda_runtime_version", + "cuda_module_loading", + "nvidia_driver_version", + "nvidia_gpu_models", + "cudnn_version", + "is_xpu_available", + "pip_version", # 'pip' or 'pip3' + "pip_packages", + "conda_packages", + "hip_compiled_version", + "hip_runtime_version", + "miopen_runtime_version", + "caching_allocator_config", + "is_xnnpack_available", + "cpu_info", + ], +) + +COMMON_PATTERNS = [ + "torch", + "numpy", + "triton", + "onnx", + "nccl", + "transformers", + "nvidia", + "flashinfer", + "bitblas", + "optimum", + "sglang", + "vllm" +] + +NVIDIA_PATTERNS = [ + "cuda-cudart", + "cuda-cupti", + "cuda-libraries", + "cuda-opencl", + "cuda-nvrtc", + "cuda-runtime", + "cublas", + "cudnn", + "cufft", + "curand", + "cusolver", + "cusparse", + "nccl", + "nvjitlink", + "nvtx", +] + +ONEAPI_PATTERNS = [ + "dpcpp-cpp-rt", + "intel-cmplr-lib-rt", + "intel-cmplr-lib-ur", + "intel-cmplr-lic-rt", + "intel-opencl-rt", + "intel-sycl-rt", + "mkl", + "onemkl-sycl-blas", + "onemkl-sycl-dft", + "onemkl-sycl-lapack", + "onemkl-sycl-rng", + "onemkl-sycl-sparse", + "intel-openmp", + "tbb", + "impi-rt", + "impi-devel", + "oneccl", + "oneccl-devel", + "intel-pti", + "umf", + "tcmlib", +] + +CONDA_PATTERNS = [] + +PIP_PATTERNS = [] + + +def run(command): + """Return (return-code, stdout, stderr).""" + shell = type(command) is str + p = subprocess.Popen( + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell + ) + raw_output, raw_err = p.communicate() + rc = p.returncode + if get_platform() == "win32": + enc = "oem" + else: + enc = locale.getpreferredencoding() + output = raw_output.decode(enc) + err = raw_err.decode(enc) + return rc, output.strip(), err.strip() + + +def run_and_read_all(run_lambda, command): + """Run command using run_lambda; reads and returns entire output if rc is 0.""" + rc, out, _ = run_lambda(command) + if rc != 0: + return None + return out + + +def run_and_parse_first_match(run_lambda, command, regex): + """Run command using run_lambda, returns the first regex match if it exists.""" + rc, out, _ = run_lambda(command) + if rc != 0: + return None + match = re.search(regex, out) + if match is None: + return None + return match.group(1) + + +def run_and_return_first_line(run_lambda, command): + """Run command using run_lambda and returns first line if output is not empty.""" + rc, out, _ = run_lambda(command) + if rc != 0: + return None + return out.split("\n")[0] + + +def get_conda_packages(run_lambda, patterns=None): + if patterns is None: + patterns = CONDA_PATTERNS + COMMON_PATTERNS + NVIDIA_PATTERNS + ONEAPI_PATTERNS + conda = os.environ.get("CONDA_EXE", "conda") + out = run_and_read_all(run_lambda, "{} list".format(conda)) + if out is None: + return out + + return "\n".join( + line + for line in out.splitlines() + if not line.startswith("#") and any(name in line for name in patterns) + ) + + +def get_gcc_version(run_lambda): + return run_and_parse_first_match(run_lambda, "gcc --version", r"gcc (.*)") + + +def get_clang_version(run_lambda): + return run_and_parse_first_match( + run_lambda, "clang --version", r"clang version (.*)" + ) + + +def get_cmake_version(run_lambda): + return run_and_parse_first_match(run_lambda, "cmake --version", r"cmake (.*)") + + +def get_nvidia_driver_version(run_lambda): + if get_platform() == "darwin": + cmd = "kextstat | grep -i cuda" + return run_and_parse_first_match( + run_lambda, cmd, r"com[.]nvidia[.]CUDA [(](.*?)[)]" + ) + smi = get_nvidia_smi() + return run_and_parse_first_match(run_lambda, smi, r"Driver Version: (.*?) ") + + +def get_gpu_info(run_lambda): + """ + Collect and return GPU/XPU device information. + - Supports CUDA and Intel XPU coexistence. + - Reports device name and total memory in GiB. + """ + + def mib_to_gib(mib: float) -> float: + """Convert MiB to GiB.""" + return mib / 1024.0 # 1 GiB = 1024 MiB + + info_lines = [] + + # ───────────────────────────────────────────── + # CUDA devices (NVIDIA) + # ───────────────────────────────────────────── + if hasattr(torch, "cuda") and torch.cuda.is_available(): + try: + smi = get_nvidia_smi() + uuid_regex = re.compile(r" \(UUID: .+?\)") + rc, out, _ = run_lambda(smi + " -L") + + if rc == 0 and out: + out = re.sub(uuid_regex, "", out) + # Query GPU memory (MiB) + rc2, mem_info, _ = run_lambda( + smi + " --query-gpu=memory.total --format=csv,noheader,nounits" + ) + if rc2 == 0 and mem_info: + mems = [ + f"{mib_to_gib(float(m.strip())):.1f} GiB" + for m in mem_info.strip().splitlines() + ] + out_lines = out.strip().splitlines() + info_lines.append("CUDA devices:") + for i, line in enumerate(out_lines): + mem_str = mems[i] if i < len(mems) else "?" + info_lines.append(f" - {line.strip()} ({mem_str})") + else: + info_lines.append("CUDA devices detected, but memory info unavailable.") + else: + # fallback to torch cuda properties + info_lines.append("CUDA devices:") + for i in range(torch.cuda.device_count()): + prop = torch.cuda.get_device_properties(i) + mem_gib = prop.total_memory / (1024**3) + info_lines.append(f" - GPU {i}: {prop.name} ({mem_gib:.1f} GiB)") + except Exception as e: + info_lines.append(f"CUDA info error: {e}") + else: + info_lines.append("CUDA stats not collected (CUDA not available)") + + # ───────────────────────────────────────────── + # XPU devices (Intel GPU / NPU) + # ───────────────────────────────────────────── + if hasattr(torch, "xpu") and torch.xpu.is_available(): + try: + info_lines.append("\nXPU devices:") + for i in range(torch.xpu.device_count()): + dev_name = torch.xpu.get_device_name(i) + dev_prop = torch.xpu.get_device_properties(i) + mem_gib = getattr(dev_prop, "total_memory", 0) / (1024**3) + info_lines.append(f" - XPU {i}: {dev_name} ({mem_gib:.1f} GiB)") + except Exception as e: + info_lines.append(f"XPU detected, but unable to read properties ({e})") + else: + info_lines.append("XPU stats not collected (XPU not available)") + + return "\n".join(info_lines) + + +def get_running_cuda_version(run_lambda): + return run_and_parse_first_match(run_lambda, "nvcc --version", r"release .+ V(.*)") + + +def get_cudnn_version(run_lambda): + """Return a list of libcudnn.so; it's hard to tell which one is being used.""" + if get_platform() == "win32": + system_root = os.environ.get("SYSTEMROOT", "C:\\Windows") + cuda_path = os.environ.get("CUDA_PATH", "%CUDA_PATH%") + where_cmd = os.path.join(system_root, "System32", "where") + cudnn_cmd = '{} /R "{}\\bin" cudnn*.dll'.format(where_cmd, cuda_path) + elif get_platform() == "darwin": + # CUDA libraries and drivers can be found in /usr/local/cuda/. See + # https://docs.nvidia.com/cuda/archive/9.0/cuda-installation-guide-mac-os-x/index.html#installation + # https://docs.nvidia.com/deeplearning/cudnn/installation/latest/ + # Use CUDNN_LIBRARY when cudnn library is installed elsewhere. + cudnn_cmd = "ls /usr/local/cuda/lib/libcudnn*" + else: + cudnn_cmd = 'ldconfig -p | grep libcudnn | rev | cut -d" " -f1 | rev' + rc, out, _ = run_lambda(cudnn_cmd) + # find will return 1 if there are permission errors or if not found + if len(out) == 0 or (rc != 1 and rc != 0): + l = os.environ.get("CUDNN_LIBRARY") + if l is not None and os.path.isfile(l): + return os.path.realpath(l) + return None + files_set = set() + for fn in out.split("\n"): + fn = os.path.realpath(fn) # eliminate symbolic links + if os.path.isfile(fn): + files_set.add(fn) + if not files_set: + return None + # Alphabetize the result because the order is non-deterministic otherwise + files = sorted(files_set) + if len(files) == 1: + return files[0] + result = "\n".join(files) + return "Probably one of the following:\n{}".format(result) + + +def get_nvidia_smi(): + # Note: nvidia-smi is currently available only on Windows and Linux + smi = "nvidia-smi" + if get_platform() == "win32": + system_root = os.environ.get("SYSTEMROOT", "C:\\Windows") + program_files_root = os.environ.get("PROGRAMFILES", "C:\\Program Files") + legacy_path = os.path.join( + program_files_root, "NVIDIA Corporation", "NVSMI", smi + ) + new_path = os.path.join(system_root, "System32", smi) + smis = [new_path, legacy_path] + for candidate_smi in smis: + if os.path.exists(candidate_smi): + smi = '"{}"'.format(candidate_smi) + break + return smi + + +def _detect_linux_pkg_manager(): + if get_platform() != "linux": + return "N/A" + for mgr_name in ["dpkg", "dnf", "yum", "zypper"]: + rc, _, _ = run(f"which {mgr_name}") + if rc == 0: + return mgr_name + return "N/A" + + +def get_linux_pkg_version(run_lambda, pkg_name): + pkg_mgr = _detect_linux_pkg_manager() + if pkg_mgr == "N/A": + return "N/A" + + grep_version = { + "dpkg": { + "field_index": 2, + "command": "dpkg -l | grep {}", + }, + "dnf": { + "field_index": 1, + "command": "dnf list | grep {}", + }, + "yum": { + "field_index": 1, + "command": "yum list | grep {}", + }, + "zypper": { + "field_index": 2, + "command": "zypper info {} | grep Version", + }, + } + + field_index: int = int(_cast(int, grep_version[pkg_mgr]["field_index"])) + cmd: str = str(grep_version[pkg_mgr]["command"]) + cmd = cmd.format(pkg_name) + ret = run_and_read_all(run_lambda, cmd) + if ret is None or ret == "": + return "N/A" + lst = re.sub(" +", " ", ret).split(" ") + if len(lst) <= field_index: + return "N/A" + return lst[field_index] + + +def get_intel_gpu_driver_version(run_lambda): + lst = [] + platform = get_platform() + if platform == "linux": + pkgs = { # type: ignore[var-annotated] + "dpkg": { + "intel-opencl-icd", + "libze1", + "level-zero", + }, + "dnf": { + "intel-opencl", + "level-zero", + }, + "yum": { + "intel-opencl", + "level-zero", + }, + "zypper": { + "intel-opencl", + "level-zero", + }, + }.get(_detect_linux_pkg_manager(), {}) + for pkg in pkgs: + ver = get_linux_pkg_version(run_lambda, pkg) + if ver != "N/A": + lst.append(f"* {pkg}:\t{ver}") + if platform in ["win32", "cygwin"]: + txt = run_and_read_all( + run_lambda, + 'powershell.exe "gwmi -Class Win32_PnpSignedDriver | where{$_.DeviceClass -eq \\"DISPLAY\\"\ + -and $_.Manufacturer -match \\"Intel\\"} | Select-Object -Property DeviceName,DriverVersion,DriverDate\ + | ConvertTo-Json"', + ) + try: + obj = json.loads(txt) + if type(obj) is list: + for o in obj: + lst.append( + f'* {o["DeviceName"]}: {o["DriverVersion"]} ({o["DriverDate"]})' + ) + else: + lst.append(f'* {obj["DriverVersion"]} ({obj["DriverDate"]})') + except ValueError as e: + lst.append(txt) + lst.append(str(e)) + return "\n".join(lst) + + +def get_intel_gpu_onboard(run_lambda): + lst: list[str] = [] + platform = get_platform() + if platform == "linux": + txt = run_and_read_all(run_lambda, "xpu-smi discovery -j") + if txt: + try: + obj = json.loads(txt) + device_list = obj.get("device_list", []) + if isinstance(device_list, list) and device_list: + lst.extend(f'* {device["device_name"]}' for device in device_list) + else: + lst.append("N/A") + except (ValueError, TypeError) as e: + lst.append(txt) + lst.append(str(e)) + else: + lst.append("N/A") + if platform in ["win32", "cygwin"]: + txt = run_and_read_all( + run_lambda, + 'powershell.exe "gwmi -Class Win32_PnpSignedDriver | where{$_.DeviceClass -eq \\"DISPLAY\\"\ + -and $_.Manufacturer -match \\"Intel\\"} | Select-Object -Property DeviceName | ConvertTo-Json"', + ) + if txt: + try: + obj = json.loads(txt) + if isinstance(obj, list) and obj: + lst.extend(f'* {device["DeviceName"]}' for device in obj) + else: + lst.append(f'* {obj.get("DeviceName", "N/A")}') + except ValueError as e: + lst.append(txt) + lst.append(str(e)) + else: + lst.append("N/A") + return "\n".join(lst) + + +def get_intel_gpu_detected(run_lambda): + if not TORCH_AVAILABLE or not hasattr(torch, "xpu"): + return "N/A" + + device_count = torch.xpu.device_count() + if device_count == 0: + return "N/A" + + devices = [ + f"* [{i}] {torch.xpu.get_device_properties(i)}" for i in range(device_count) + ] + return "\n".join(devices) + + +# example outputs of CPU infos +# * linux +# Architecture: x86_64 +# CPU op-mode(s): 32-bit, 64-bit +# Address sizes: 46 bits physical, 48 bits virtual +# Byte Order: Little Endian +# CPU(s): 128 +# On-line CPU(s) list: 0-127 +# Vendor ID: GenuineIntel +# Model name: Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz +# CPU family: 6 +# Model: 106 +# Thread(s) per core: 2 +# Core(s) per socket: 32 +# Socket(s): 2 +# Stepping: 6 +# BogoMIPS: 5799.78 +# Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr +# sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl +# xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq monitor ssse3 fma cx16 +# pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand +# hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced +# fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid avx512f avx512dq rdseed adx smap +# avx512ifma clflushopt clwb avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 +# xsaves wbnoinvd ida arat avx512vbmi pku ospke avx512_vbmi2 gfni vaes vpclmulqdq +# avx512_vnni avx512_bitalg tme avx512_vpopcntdq rdpid md_clear flush_l1d arch_capabilities +# Virtualization features: +# Hypervisor vendor: KVM +# Virtualization type: full +# Caches (sum of all): +# L1d: 3 MiB (64 instances) +# L1i: 2 MiB (64 instances) +# L2: 80 MiB (64 instances) +# L3: 108 MiB (2 instances) +# NUMA: +# NUMA node(s): 2 +# NUMA node0 CPU(s): 0-31,64-95 +# NUMA node1 CPU(s): 32-63,96-127 +# Vulnerabilities: +# Itlb multihit: Not affected +# L1tf: Not affected +# Mds: Not affected +# Meltdown: Not affected +# Mmio stale data: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown +# Retbleed: Not affected +# Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp +# Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +# Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence +# Srbds: Not affected +# Tsx async abort: Not affected +# * win32 +# Architecture=9 +# CurrentClockSpeed=2900 +# DeviceID=CPU0 +# Family=179 +# L2CacheSize=40960 +# L2CacheSpeed= +# Manufacturer=GenuineIntel +# MaxClockSpeed=2900 +# Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz +# ProcessorType=3 +# Revision=27142 +# +# Architecture=9 +# CurrentClockSpeed=2900 +# DeviceID=CPU1 +# Family=179 +# L2CacheSize=40960 +# L2CacheSpeed= +# Manufacturer=GenuineIntel +# MaxClockSpeed=2900 +# Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz +# ProcessorType=3 +# Revision=27142 + + +def get_cpu_info(run_lambda): + rc, out, err = 0, "", "" + if get_platform() == "linux": + rc, out, err = run_lambda("lscpu") + elif get_platform() == "win32": + rc, out, err = run_lambda( + 'powershell.exe "gwmi -Class Win32_Processor | Select-Object -Property Name,Manufacturer,Family,\ + Architecture,ProcessorType,DeviceID,CurrentClockSpeed,MaxClockSpeed,L2CacheSize,L2CacheSpeed,Revision\ + | ConvertTo-Json"' + ) + if rc == 0: + lst = [] + try: + obj = json.loads(out) + if type(obj) is list: + for o in obj: + lst.append("----------------------") + lst.extend([f"{k}: {v}" for (k, v) in o.items()]) + else: + lst.extend([f"{k}: {v}" for (k, v) in obj.items()]) + except ValueError as e: + lst.append(out) + lst.append(str(e)) + out = "\n".join(lst) + elif get_platform() == "darwin": + rc, out, err = run_lambda("sysctl -n machdep.cpu.brand_string") + cpu_info = "None" + if rc == 0: + cpu_info = out + else: + cpu_info = err + return cpu_info + + +def get_platform(): + if sys.platform.startswith("linux"): + return "linux" + elif sys.platform.startswith("win32"): + return "win32" + elif sys.platform.startswith("cygwin"): + return "cygwin" + elif sys.platform.startswith("darwin"): + return "darwin" + else: + return sys.platform + + +def get_mac_version(run_lambda): + return run_and_parse_first_match(run_lambda, "sw_vers -productVersion", r"(.*)") + + +def get_windows_version(run_lambda): + ret = run_and_read_all( + run_lambda, + 'powershell.exe "gwmi -Class Win32_OperatingSystem | Select-Object -Property Caption,\ + OSArchitecture,Version | ConvertTo-Json"', + ) + try: + obj = json.loads(ret) + ret = f'{obj["Caption"]} ({obj["Version"]} {obj["OSArchitecture"]})' + except ValueError as e: + ret += f"\n{str(e)}" + return ret + + +def get_lsb_version(run_lambda): + return run_and_parse_first_match( + run_lambda, "lsb_release -a", r"Description:\t(.*)" + ) + + +def check_release_file(run_lambda): + return run_and_parse_first_match( + run_lambda, "cat /etc/*-release", r'PRETTY_NAME="(.*)"' + ) + + +def get_os(run_lambda): + from platform import machine + + platform = get_platform() + + if platform in ["win32", "cygwin"]: + return get_windows_version(run_lambda) + + if platform == "darwin": + version = get_mac_version(run_lambda) + if version is None: + return None + return "macOS {} ({})".format(version, machine()) + + if platform == "linux": + # Ubuntu/Debian based + desc = get_lsb_version(run_lambda) + if desc is not None: + return "{} ({})".format(desc, machine()) + + # Try reading /etc/*-release + desc = check_release_file(run_lambda) + if desc is not None: + return "{} ({})".format(desc, machine()) + + return "{} ({})".format(platform, machine()) + + # Unknown platform + return platform + + +def get_python_platform(): + import platform + + return platform.platform() + + +def get_libc_version(): + import platform + + if get_platform() != "linux": + return "N/A" + return "-".join(platform.libc_ver()) + + +def get_pip_packages(run_lambda, patterns=None): + """Return `pip list` output. Note: will also find conda-installed pytorch and numpy packages.""" + if patterns is None: + patterns = PIP_PATTERNS + COMMON_PATTERNS + NVIDIA_PATTERNS + ONEAPI_PATTERNS + + pip_version = "pip3" if sys.version_info.major == 3 else "pip" + + os.environ["PIP_DISABLE_PIP_VERSION_CHECK"] = "1" + # People generally have pip as `pip` or `pip3` + # But here it is invoked as `python -mpip` + out = run_and_read_all( + run_lambda, [sys.executable, "-mpip", "list", "--format=freeze"] + ) + if out is None: + return pip_version, out + + filtered_out = "\n".join( + line for line in out.splitlines() if any(name in line for name in patterns) + ) + + return pip_version, filtered_out + + +def get_cachingallocator_config(): + ca_config = os.environ.get("PYTORCH_CUDA_ALLOC_CONF", "") + if not ca_config: + ca_config = os.environ.get("PYTORCH_HIP_ALLOC_CONF", "") + return ca_config + + +def get_cuda_module_loading_config(): + if TORCH_AVAILABLE and torch.cuda.is_available(): + torch.cuda.init() + config = os.environ.get("CUDA_MODULE_LOADING", "") + return config + else: + return "N/A" + + +def is_xnnpack_available(): + if TORCH_AVAILABLE: + import torch.backends.xnnpack + + return str(torch.backends.xnnpack.enabled) # type: ignore[attr-defined] + else: + return "N/A" + + +def get_env_info(): + """ + Collects environment information to aid in debugging. + + The returned environment information contains details on torch version, is debug build + or not, cuda compiled version, gcc version, clang version, cmake version, operating + system, libc version, python version, python platform, CUDA availability, CUDA + runtime version, CUDA module loading config, GPU model and configuration, Nvidia + driver version, cuDNN version, pip version and versions of relevant pip and + conda packages, HIP runtime version, MIOpen runtime version, + Caching allocator config, XNNPACK availability and CPU information. + + Returns: + SystemEnv (namedtuple): A tuple containing various environment details + and system information. + """ + run_lambda = run + pip_version, pip_list_output = get_pip_packages(run_lambda) + + if TORCH_AVAILABLE: + version_str = torch.__version__ + debug_mode_str = str(torch.version.debug) + cuda_available_str = str(torch.cuda.is_available()) + cuda_version_str = torch.version.cuda + xpu_available_str = str(torch.xpu.is_available()) + if torch.xpu.is_available(): + xpu_available_str = ( + f"{xpu_available_str}\n" + + f"XPU used to build PyTorch: {torch.version.xpu}\n" + + f"Intel GPU driver version:\n{get_intel_gpu_driver_version(run_lambda)}\n" + + f"Intel GPU models onboard:\n{get_intel_gpu_onboard(run_lambda)}\n" + + f"Intel GPU models detected:\n{get_intel_gpu_detected(run_lambda)}" + ) + if ( + not hasattr(torch.version, "hip") or torch.version.hip is None + ): # cuda version + hip_compiled_version = hip_runtime_version = miopen_runtime_version = "N/A" + else: # HIP version + + def get_version_or_na(cfg, prefix): + _lst = [s.rsplit(None, 1)[-1] for s in cfg if prefix in s] + return _lst[0] if _lst else "N/A" + + cfg = torch._C._show_config().split("\n") + hip_runtime_version = get_version_or_na(cfg, "HIP Runtime") + miopen_runtime_version = get_version_or_na(cfg, "MIOpen") + cuda_version_str = "N/A" + hip_compiled_version = torch.version.hip + else: + version_str = debug_mode_str = cuda_available_str = cuda_version_str = xpu_available_str = "N/A" # type: ignore[assignment] + hip_compiled_version = hip_runtime_version = miopen_runtime_version = "N/A" + + sys_version = sys.version.replace("\n", " ") + + conda_packages = get_conda_packages(run_lambda) + + return SystemEnv( + torch_version=version_str, + is_debug_build=debug_mode_str, + python_version="{} ({}-bit runtime)".format( + sys_version, sys.maxsize.bit_length() + 1 + ), + python_platform=get_python_platform(), + is_cuda_available=cuda_available_str, + cuda_compiled_version=cuda_version_str, + cuda_runtime_version=get_running_cuda_version(run_lambda), + cuda_module_loading=get_cuda_module_loading_config(), + nvidia_gpu_models=get_gpu_info(run_lambda), + nvidia_driver_version=get_nvidia_driver_version(run_lambda), + cudnn_version=get_cudnn_version(run_lambda), + is_xpu_available=xpu_available_str, + hip_compiled_version=hip_compiled_version, + hip_runtime_version=hip_runtime_version, + miopen_runtime_version=miopen_runtime_version, + pip_version=pip_version, + pip_packages=pip_list_output, + conda_packages=conda_packages, + os=get_os(run_lambda), + libc_version=get_libc_version(), + gcc_version=get_gcc_version(run_lambda), + clang_version=get_clang_version(run_lambda), + cmake_version=get_cmake_version(run_lambda), + caching_allocator_config=get_cachingallocator_config(), + is_xnnpack_available=is_xnnpack_available(), + cpu_info=get_cpu_info(run_lambda), + ) + + +env_info_fmt = """ +PyTorch version: {torch_version} +Is debug build: {is_debug_build} +CUDA used to build PyTorch: {cuda_compiled_version} +ROCM used to build PyTorch: {hip_compiled_version} + +OS: {os} +GCC version: {gcc_version} +Clang version: {clang_version} +CMake version: {cmake_version} +Libc version: {libc_version} + +Python version: {python_version} +Python platform: {python_platform} +Is CUDA available: {is_cuda_available} +CUDA runtime version: {cuda_runtime_version} +CUDA_MODULE_LOADING set to: {cuda_module_loading} +GPU models and configuration: {nvidia_gpu_models} +Nvidia driver version: {nvidia_driver_version} +cuDNN version: {cudnn_version} +Is XPU available: {is_xpu_available} +HIP runtime version: {hip_runtime_version} +MIOpen runtime version: {miopen_runtime_version} +Is XNNPACK available: {is_xnnpack_available} + +CPU: +{cpu_info} + +Versions of relevant libraries: +{pip_packages} +{conda_packages} +""".strip() + + +def pretty_str(envinfo): + def replace_nones(dct, replacement="Could not collect"): + for key in dct: + if dct[key] is not None: + continue + dct[key] = replacement + return dct + + def replace_bools(dct, true="Yes", false="No"): + for key in dct: + if dct[key] is True: + dct[key] = true + elif dct[key] is False: + dct[key] = false + return dct + + def prepend(text, tag="[prepend]"): + lines = text.split("\n") + updated_lines = [tag + line for line in lines] + return "\n".join(updated_lines) + + def replace_if_empty(text, replacement="No relevant packages"): + if text is not None and len(text) == 0: + return replacement + return text + + def maybe_start_on_next_line(string): + # If `string` is multiline, prepend a \n to it. + if string is not None and len(string.split("\n")) > 1: + return "\n{}\n".format(string) + return string + + mutable_dict = envinfo._asdict() + + # If nvidia_gpu_models is multiline, start on the next line + mutable_dict["nvidia_gpu_models"] = maybe_start_on_next_line( + envinfo.nvidia_gpu_models + ) + + # If the machine doesn't have CUDA, report some fields as 'No CUDA' + dynamic_cuda_fields = [ + "cuda_runtime_version", + "nvidia_gpu_models", + "nvidia_driver_version", + ] + all_cuda_fields = dynamic_cuda_fields + ["cudnn_version"] + all_dynamic_cuda_fields_missing = all( + mutable_dict[field] is None for field in dynamic_cuda_fields + ) + if ( + TORCH_AVAILABLE + and not torch.cuda.is_available() + and all_dynamic_cuda_fields_missing + ): + for field in all_cuda_fields: + mutable_dict[field] = "No CUDA" + if envinfo.cuda_compiled_version is None: + mutable_dict["cuda_compiled_version"] = "None" + + # Replace True with Yes, False with No + mutable_dict = replace_bools(mutable_dict) + + # Replace all None objects with 'Could not collect' + mutable_dict = replace_nones(mutable_dict) + + # If either of these are '', replace with 'No relevant packages' + mutable_dict["pip_packages"] = replace_if_empty(mutable_dict["pip_packages"]) + mutable_dict["conda_packages"] = replace_if_empty(mutable_dict["conda_packages"]) + + # Tag conda and pip packages with a prefix + # If they were previously None, they'll show up as ie '[conda] Could not collect' + if mutable_dict["pip_packages"]: + mutable_dict["pip_packages"] = prepend( + mutable_dict["pip_packages"], "[{}] ".format(envinfo.pip_version) + ) + if mutable_dict["conda_packages"]: + mutable_dict["conda_packages"] = prepend( + mutable_dict["conda_packages"], "[conda] " + ) + mutable_dict["cpu_info"] = envinfo.cpu_info + return env_info_fmt.format(**mutable_dict) + + +def get_pretty_env_info(): + """ + Returns a pretty string of environment information. + + This function retrieves environment information by calling the `get_env_info` function + and then formats the information into a human-readable string. The retrieved environment + information is listed in the document of `get_env_info`. + This function is used in `python collect_env.py` that should be executed when reporting a bug. + + Returns: + str: A pretty string of the environment information. + """ + return pretty_str(get_env_info()) + + +def collect_env() -> None: + print("Collecting environment information...") + output = get_pretty_env_info() + print(output) + + if ( + TORCH_AVAILABLE + and hasattr(torch, "utils") + and hasattr(torch.utils, "_crash_handler") + ): + minidump_dir = torch.utils._crash_handler.DEFAULT_MINIDUMP_DIR + if sys.platform == "linux" and os.path.exists(minidump_dir): + dumps = [ + os.path.join(minidump_dir, dump) for dump in os.listdir(minidump_dir) + ] + latest = max(dumps, key=os.path.getctime) + ctime = os.path.getctime(latest) + creation_time = datetime.datetime.fromtimestamp(ctime).strftime( + "%Y-%m-%d %H:%M:%S" + ) + msg = ( + "\n*** Detected a minidump at {} created on {}, ".format( + latest, creation_time + ) + + "if this is related to your bug please include it when you file a report ***" + ) + print(msg, file=sys.stderr) + + +def _handle_env_command(_args: argparse.Namespace) -> None: + collect_env() diff --git a/gptqmodel/cli/gptqmodel.py b/gptqmodel/cli/gptqmodel.py new file mode 100644 index 000000000..d175a9d4c --- /dev/null +++ b/gptqmodel/cli/gptqmodel.py @@ -0,0 +1,28 @@ +import argparse +from typing import Iterable + +from gptqmodel.cli.env import _handle_env_command + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(prog="gptqmodel", description="Utilities for GPTQModel") + subparsers = parser.add_subparsers(dest="command") + + env_parser = subparsers.add_parser("env", help="Inspect the local GPTQModel runtime environment") + env_parser.set_defaults(func=_handle_env_command) + + return parser + + +def main(argv: Iterable[str] | None = None) -> None: + parser = build_parser() + args = parser.parse_args(list(argv) if argv is not None else None) + + if hasattr(args, "func"): + args.func(args) + else: + parser.print_help() + + +if __name__ == "__main__": # pragma: no cover - manual execution entrypoint + main() diff --git a/pyproject.toml b/pyproject.toml index aed30f355..eada03051 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,6 +62,9 @@ dependencies = [ # "flash-attn>=2.8.3", <-- install for lower vram usage ] +[project.scripts] +gptqmodel = "gptqmodel.cli.gptqmodel:main" + [project.urls] Homepage = "https://github.com/ModelCloud/GPTQModel" diff --git a/tests/test_kernel_output_torch_fused.py b/tests/test_kernel_output_torch_fused.py index aa1ddff02..3d03016e9 100644 --- a/tests/test_kernel_output_torch_fused.py +++ b/tests/test_kernel_output_torch_fused.py @@ -8,8 +8,8 @@ import torch from logbar import LogBar from parameterized import parameterized -from torch import Tensor from tabulate import tabulate +from torch import Tensor from gptqmodel import BACKEND, GPTQModel from gptqmodel.nn_modules.qlinear.torch import TorchQuantLinear