diff --git a/gptqmodel/cli/__init__.py b/gptqmodel/cli/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/gptqmodel/cli/env.py b/gptqmodel/cli/env.py
new file mode 100644
index 000000000..f276ad25b
--- /dev/null
+++ b/gptqmodel/cli/env.py
@@ -0,0 +1,984 @@
+# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai
+# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai
+# SPDX-License-Identifier: Apache-2.0
+# Contact: qubitium@modelcloud.ai, x.com/qubitium
+import argparse
+# ruff: noqa
+# code borrowed from https://github.com/pytorch/pytorch/blob/main/torch/utils/collect_env.py
+
+# mypy: allow-untyped-defs
+
+# Unlike the rest of the PyTorch this file must be python2 compliant.
+# This script outputs relevant system environment info
+# Run it with `python collect_env.py` or `python -m torch.utils.collect_env`
+import datetime
+import json
+import locale
+import os
+import re
+import subprocess
+import sys
+from collections import namedtuple
+from typing import cast as _cast
+
+
+try:
+    import torch
+
+    TORCH_AVAILABLE = True
+except (ImportError, NameError, AttributeError, OSError):
+    TORCH_AVAILABLE = False
+
+# System Environment Information
+SystemEnv = namedtuple(
+    "SystemEnv",
+    [
+        "torch_version",
+        "is_debug_build",
+        "cuda_compiled_version",
+        "gcc_version",
+        "clang_version",
+        "cmake_version",
+        "os",
+        "libc_version",
+        "python_version",
+        "python_platform",
+        "is_cuda_available",
+        "cuda_runtime_version",
+        "cuda_module_loading",
+        "nvidia_driver_version",
+        "nvidia_gpu_models",
+        "cudnn_version",
+        "is_xpu_available",
+        "pip_version",  # 'pip' or 'pip3'
+        "pip_packages",
+        "conda_packages",
+        "hip_compiled_version",
+        "hip_runtime_version",
+        "miopen_runtime_version",
+        "caching_allocator_config",
+        "is_xnnpack_available",
+        "cpu_info",
+    ],
+)
+
+COMMON_PATTERNS = [
+    "torch",
+    "numpy",
+    "triton",
+    "onnx",
+    "nccl",
+    "transformers",
+    "nvidia",
+    "flashinfer",
+    "bitblas",
+    "optimum",
+    "sglang",
+    "vllm"
+]
+
+NVIDIA_PATTERNS = [
+    "cuda-cudart",
+    "cuda-cupti",
+    "cuda-libraries",
+    "cuda-opencl",
+    "cuda-nvrtc",
+    "cuda-runtime",
+    "cublas",
+    "cudnn",
+    "cufft",
+    "curand",
+    "cusolver",
+    "cusparse",
+    "nccl",
+    "nvjitlink",
+    "nvtx",
+]
+
+ONEAPI_PATTERNS = [
+    "dpcpp-cpp-rt",
+    "intel-cmplr-lib-rt",
+    "intel-cmplr-lib-ur",
+    "intel-cmplr-lic-rt",
+    "intel-opencl-rt",
+    "intel-sycl-rt",
+    "mkl",
+    "onemkl-sycl-blas",
+    "onemkl-sycl-dft",
+    "onemkl-sycl-lapack",
+    "onemkl-sycl-rng",
+    "onemkl-sycl-sparse",
+    "intel-openmp",
+    "tbb",
+    "impi-rt",
+    "impi-devel",
+    "oneccl",
+    "oneccl-devel",
+    "intel-pti",
+    "umf",
+    "tcmlib",
+]
+
+CONDA_PATTERNS = []
+
+PIP_PATTERNS = []
+
+
+def run(command):
+    """Return (return-code, stdout, stderr)."""
+    shell = type(command) is str
+    p = subprocess.Popen(
+        command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell
+    )
+    raw_output, raw_err = p.communicate()
+    rc = p.returncode
+    if get_platform() == "win32":
+        enc = "oem"
+    else:
+        enc = locale.getpreferredencoding()
+    output = raw_output.decode(enc)
+    err = raw_err.decode(enc)
+    return rc, output.strip(), err.strip()
+
+
+def run_and_read_all(run_lambda, command):
+    """Run command using run_lambda; reads and returns entire output if rc is 0."""
+    rc, out, _ = run_lambda(command)
+    if rc != 0:
+        return None
+    return out
+
+
+def run_and_parse_first_match(run_lambda, command, regex):
+    """Run command using run_lambda, returns the first regex match if it exists."""
+    rc, out, _ = run_lambda(command)
+    if rc != 0:
+        return None
+    match = re.search(regex, out)
+    if match is None:
+        return None
+    return match.group(1)
+
+
+def run_and_return_first_line(run_lambda, command):
+    """Run command using run_lambda and returns first line if output is not empty."""
+    rc, out, _ = run_lambda(command)
+    if rc != 0:
+        return None
+    return out.split("\n")[0]
+
+
+def get_conda_packages(run_lambda, patterns=None):
+    if patterns is None:
+        patterns = CONDA_PATTERNS + COMMON_PATTERNS + NVIDIA_PATTERNS + ONEAPI_PATTERNS
+    conda = os.environ.get("CONDA_EXE", "conda")
+    out = run_and_read_all(run_lambda, "{} list".format(conda))
+    if out is None:
+        return out
+
+    return "\n".join(
+        line
+        for line in out.splitlines()
+        if not line.startswith("#") and any(name in line for name in patterns)
+    )
+
+
+def get_gcc_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, "gcc --version", r"gcc (.*)")
+
+
+def get_clang_version(run_lambda):
+    return run_and_parse_first_match(
+        run_lambda, "clang --version", r"clang version (.*)"
+    )
+
+
+def get_cmake_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, "cmake --version", r"cmake (.*)")
+
+
+def get_nvidia_driver_version(run_lambda):
+    if get_platform() == "darwin":
+        cmd = "kextstat | grep -i cuda"
+        return run_and_parse_first_match(
+            run_lambda, cmd, r"com[.]nvidia[.]CUDA [(](.*?)[)]"
+        )
+    smi = get_nvidia_smi()
+    return run_and_parse_first_match(run_lambda, smi, r"Driver Version: (.*?) ")
+
+
+def get_gpu_info(run_lambda):
+    """
+    Collect and return GPU/XPU device information.
+    - Supports CUDA and Intel XPU coexistence.
+    - Reports device name and total memory in GiB.
+    """
+
+    def mib_to_gib(mib: float) -> float:
+        """Convert MiB to GiB."""
+        return mib / 1024.0  # 1 GiB = 1024 MiB
+
+    info_lines = []
+
+    # ─────────────────────────────────────────────
+    # CUDA devices (NVIDIA)
+    # ─────────────────────────────────────────────
+    if hasattr(torch, "cuda") and torch.cuda.is_available():
+        try:
+            smi = get_nvidia_smi()
+            uuid_regex = re.compile(r" \(UUID: .+?\)")
+            rc, out, _ = run_lambda(smi + " -L")
+
+            if rc == 0 and out:
+                out = re.sub(uuid_regex, "", out)
+                # Query GPU memory (MiB)
+                rc2, mem_info, _ = run_lambda(
+                    smi + " --query-gpu=memory.total --format=csv,noheader,nounits"
+                )
+                if rc2 == 0 and mem_info:
+                    mems = [
+                        f"{mib_to_gib(float(m.strip())):.1f} GiB"
+                        for m in mem_info.strip().splitlines()
+                    ]
+                    out_lines = out.strip().splitlines()
+                    info_lines.append("CUDA devices:")
+                    for i, line in enumerate(out_lines):
+                        mem_str = mems[i] if i < len(mems) else "?"
+                        info_lines.append(f" - {line.strip()} ({mem_str})")
+                else:
+                    info_lines.append("CUDA devices detected, but memory info unavailable.")
+            else:
+                # fallback to torch cuda properties
+                info_lines.append("CUDA devices:")
+                for i in range(torch.cuda.device_count()):
+                    prop = torch.cuda.get_device_properties(i)
+                    mem_gib = prop.total_memory / (1024**3)
+                    info_lines.append(f" - GPU {i}: {prop.name} ({mem_gib:.1f} GiB)")
+        except Exception as e:
+            info_lines.append(f"CUDA info error: {e}")
+    else:
+        info_lines.append("CUDA stats not collected (CUDA not available)")
+
+    # ─────────────────────────────────────────────
+    # XPU devices (Intel GPU / NPU)
+    # ─────────────────────────────────────────────
+    if hasattr(torch, "xpu") and torch.xpu.is_available():
+        try:
+            info_lines.append("\nXPU devices:")
+            for i in range(torch.xpu.device_count()):
+                dev_name = torch.xpu.get_device_name(i)
+                dev_prop = torch.xpu.get_device_properties(i)
+                mem_gib = getattr(dev_prop, "total_memory", 0) / (1024**3)
+                info_lines.append(f" - XPU {i}: {dev_name} ({mem_gib:.1f} GiB)")
+        except Exception as e:
+            info_lines.append(f"XPU detected, but unable to read properties ({e})")
+    else:
+        info_lines.append("XPU stats not collected (XPU not available)")
+
+    return "\n".join(info_lines)
+
+
+def get_running_cuda_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, "nvcc --version", r"release .+ V(.*)")
+
+
+def get_cudnn_version(run_lambda):
+    """Return a list of libcudnn.so; it's hard to tell which one is being used."""
+    if get_platform() == "win32":
+        system_root = os.environ.get("SYSTEMROOT", "C:\\Windows")
+        cuda_path = os.environ.get("CUDA_PATH", "%CUDA_PATH%")
+        where_cmd = os.path.join(system_root, "System32", "where")
+        cudnn_cmd = '{} /R "{}\\bin" cudnn*.dll'.format(where_cmd, cuda_path)
+    elif get_platform() == "darwin":
+        # CUDA libraries and drivers can be found in /usr/local/cuda/. See
+        # https://docs.nvidia.com/cuda/archive/9.0/cuda-installation-guide-mac-os-x/index.html#installation
+        # https://docs.nvidia.com/deeplearning/cudnn/installation/latest/
+        # Use CUDNN_LIBRARY when cudnn library is installed elsewhere.
+        cudnn_cmd = "ls /usr/local/cuda/lib/libcudnn*"
+    else:
+        cudnn_cmd = 'ldconfig -p | grep libcudnn | rev | cut -d" " -f1 | rev'
+    rc, out, _ = run_lambda(cudnn_cmd)
+    # find will return 1 if there are permission errors or if not found
+    if len(out) == 0 or (rc != 1 and rc != 0):
+        l = os.environ.get("CUDNN_LIBRARY")
+        if l is not None and os.path.isfile(l):
+            return os.path.realpath(l)
+        return None
+    files_set = set()
+    for fn in out.split("\n"):
+        fn = os.path.realpath(fn)  # eliminate symbolic links
+        if os.path.isfile(fn):
+            files_set.add(fn)
+    if not files_set:
+        return None
+    # Alphabetize the result because the order is non-deterministic otherwise
+    files = sorted(files_set)
+    if len(files) == 1:
+        return files[0]
+    result = "\n".join(files)
+    return "Probably one of the following:\n{}".format(result)
+
+
+def get_nvidia_smi():
+    # Note: nvidia-smi is currently available only on Windows and Linux
+    smi = "nvidia-smi"
+    if get_platform() == "win32":
+        system_root = os.environ.get("SYSTEMROOT", "C:\\Windows")
+        program_files_root = os.environ.get("PROGRAMFILES", "C:\\Program Files")
+        legacy_path = os.path.join(
+            program_files_root, "NVIDIA Corporation", "NVSMI", smi
+        )
+        new_path = os.path.join(system_root, "System32", smi)
+        smis = [new_path, legacy_path]
+        for candidate_smi in smis:
+            if os.path.exists(candidate_smi):
+                smi = '"{}"'.format(candidate_smi)
+                break
+    return smi
+
+
+def _detect_linux_pkg_manager():
+    if get_platform() != "linux":
+        return "N/A"
+    for mgr_name in ["dpkg", "dnf", "yum", "zypper"]:
+        rc, _, _ = run(f"which {mgr_name}")
+        if rc == 0:
+            return mgr_name
+    return "N/A"
+
+
+def get_linux_pkg_version(run_lambda, pkg_name):
+    pkg_mgr = _detect_linux_pkg_manager()
+    if pkg_mgr == "N/A":
+        return "N/A"
+
+    grep_version = {
+        "dpkg": {
+            "field_index": 2,
+            "command": "dpkg -l | grep {}",
+        },
+        "dnf": {
+            "field_index": 1,
+            "command": "dnf list | grep {}",
+        },
+        "yum": {
+            "field_index": 1,
+            "command": "yum list | grep {}",
+        },
+        "zypper": {
+            "field_index": 2,
+            "command": "zypper info {} | grep Version",
+        },
+    }
+
+    field_index: int = int(_cast(int, grep_version[pkg_mgr]["field_index"]))
+    cmd: str = str(grep_version[pkg_mgr]["command"])
+    cmd = cmd.format(pkg_name)
+    ret = run_and_read_all(run_lambda, cmd)
+    if ret is None or ret == "":
+        return "N/A"
+    lst = re.sub(" +", " ", ret).split(" ")
+    if len(lst) <= field_index:
+        return "N/A"
+    return lst[field_index]
+
+
+def get_intel_gpu_driver_version(run_lambda):
+    lst = []
+    platform = get_platform()
+    if platform == "linux":
+        pkgs = {  # type: ignore[var-annotated]
+            "dpkg": {
+                "intel-opencl-icd",
+                "libze1",
+                "level-zero",
+            },
+            "dnf": {
+                "intel-opencl",
+                "level-zero",
+            },
+            "yum": {
+                "intel-opencl",
+                "level-zero",
+            },
+            "zypper": {
+                "intel-opencl",
+                "level-zero",
+            },
+        }.get(_detect_linux_pkg_manager(), {})
+        for pkg in pkgs:
+            ver = get_linux_pkg_version(run_lambda, pkg)
+            if ver != "N/A":
+                lst.append(f"* {pkg}:\t{ver}")
+    if platform in ["win32", "cygwin"]:
+        txt = run_and_read_all(
+            run_lambda,
+            'powershell.exe "gwmi -Class Win32_PnpSignedDriver | where{$_.DeviceClass -eq \\"DISPLAY\\"\
+            -and $_.Manufacturer -match \\"Intel\\"} | Select-Object -Property DeviceName,DriverVersion,DriverDate\
+            | ConvertTo-Json"',
+        )
+        try:
+            obj = json.loads(txt)
+            if type(obj) is list:
+                for o in obj:
+                    lst.append(
+                        f'* {o["DeviceName"]}: {o["DriverVersion"]} ({o["DriverDate"]})'
+                    )
+            else:
+                lst.append(f'* {obj["DriverVersion"]} ({obj["DriverDate"]})')
+        except ValueError as e:
+            lst.append(txt)
+            lst.append(str(e))
+    return "\n".join(lst)
+
+
+def get_intel_gpu_onboard(run_lambda):
+    lst: list[str] = []
+    platform = get_platform()
+    if platform == "linux":
+        txt = run_and_read_all(run_lambda, "xpu-smi discovery -j")
+        if txt:
+            try:
+                obj = json.loads(txt)
+                device_list = obj.get("device_list", [])
+                if isinstance(device_list, list) and device_list:
+                    lst.extend(f'* {device["device_name"]}' for device in device_list)
+                else:
+                    lst.append("N/A")
+            except (ValueError, TypeError) as e:
+                lst.append(txt)
+                lst.append(str(e))
+        else:
+            lst.append("N/A")
+    if platform in ["win32", "cygwin"]:
+        txt = run_and_read_all(
+            run_lambda,
+            'powershell.exe "gwmi -Class Win32_PnpSignedDriver | where{$_.DeviceClass -eq \\"DISPLAY\\"\
+            -and $_.Manufacturer -match \\"Intel\\"} | Select-Object -Property DeviceName | ConvertTo-Json"',
+        )
+        if txt:
+            try:
+                obj = json.loads(txt)
+                if isinstance(obj, list) and obj:
+                    lst.extend(f'* {device["DeviceName"]}' for device in obj)
+                else:
+                    lst.append(f'* {obj.get("DeviceName", "N/A")}')
+            except ValueError as e:
+                lst.append(txt)
+                lst.append(str(e))
+        else:
+            lst.append("N/A")
+    return "\n".join(lst)
+
+
+def get_intel_gpu_detected(run_lambda):
+    if not TORCH_AVAILABLE or not hasattr(torch, "xpu"):
+        return "N/A"
+
+    device_count = torch.xpu.device_count()
+    if device_count == 0:
+        return "N/A"
+
+    devices = [
+        f"* [{i}] {torch.xpu.get_device_properties(i)}" for i in range(device_count)
+    ]
+    return "\n".join(devices)
+
+
+# example outputs of CPU infos
+#  * linux
+#    Architecture:            x86_64
+#      CPU op-mode(s):        32-bit, 64-bit
+#      Address sizes:         46 bits physical, 48 bits virtual
+#      Byte Order:            Little Endian
+#    CPU(s):                  128
+#      On-line CPU(s) list:   0-127
+#    Vendor ID:               GenuineIntel
+#      Model name:            Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
+#        CPU family:          6
+#        Model:               106
+#        Thread(s) per core:  2
+#        Core(s) per socket:  32
+#        Socket(s):           2
+#        Stepping:            6
+#        BogoMIPS:            5799.78
+#        Flags:               fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr
+#                             sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl
+#                             xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq monitor ssse3 fma cx16
+#                             pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand
+#                             hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced
+#                             fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid avx512f avx512dq rdseed adx smap
+#                             avx512ifma clflushopt clwb avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1
+#                             xsaves wbnoinvd ida arat avx512vbmi pku ospke avx512_vbmi2 gfni vaes vpclmulqdq
+#                             avx512_vnni avx512_bitalg tme avx512_vpopcntdq rdpid md_clear flush_l1d arch_capabilities
+#    Virtualization features:
+#      Hypervisor vendor:     KVM
+#      Virtualization type:   full
+#    Caches (sum of all):
+#      L1d:                   3 MiB (64 instances)
+#      L1i:                   2 MiB (64 instances)
+#      L2:                    80 MiB (64 instances)
+#      L3:                    108 MiB (2 instances)
+#    NUMA:
+#      NUMA node(s):          2
+#      NUMA node0 CPU(s):     0-31,64-95
+#      NUMA node1 CPU(s):     32-63,96-127
+#    Vulnerabilities:
+#      Itlb multihit:         Not affected
+#      L1tf:                  Not affected
+#      Mds:                   Not affected
+#      Meltdown:              Not affected
+#      Mmio stale data:       Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown
+#      Retbleed:              Not affected
+#      Spec store bypass:     Mitigation; Speculative Store Bypass disabled via prctl and seccomp
+#      Spectre v1:            Mitigation; usercopy/swapgs barriers and __user pointer sanitization
+#      Spectre v2:            Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence
+#      Srbds:                 Not affected
+#      Tsx async abort:       Not affected
+#  * win32
+#    Architecture=9
+#    CurrentClockSpeed=2900
+#    DeviceID=CPU0
+#    Family=179
+#    L2CacheSize=40960
+#    L2CacheSpeed=
+#    Manufacturer=GenuineIntel
+#    MaxClockSpeed=2900
+#    Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
+#    ProcessorType=3
+#    Revision=27142
+#
+#    Architecture=9
+#    CurrentClockSpeed=2900
+#    DeviceID=CPU1
+#    Family=179
+#    L2CacheSize=40960
+#    L2CacheSpeed=
+#    Manufacturer=GenuineIntel
+#    MaxClockSpeed=2900
+#    Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
+#    ProcessorType=3
+#    Revision=27142
+
+
+def get_cpu_info(run_lambda):
+    rc, out, err = 0, "", ""
+    if get_platform() == "linux":
+        rc, out, err = run_lambda("lscpu")
+    elif get_platform() == "win32":
+        rc, out, err = run_lambda(
+            'powershell.exe "gwmi -Class Win32_Processor | Select-Object -Property Name,Manufacturer,Family,\
+            Architecture,ProcessorType,DeviceID,CurrentClockSpeed,MaxClockSpeed,L2CacheSize,L2CacheSpeed,Revision\
+            | ConvertTo-Json"'
+        )
+        if rc == 0:
+            lst = []
+            try:
+                obj = json.loads(out)
+                if type(obj) is list:
+                    for o in obj:
+                        lst.append("----------------------")
+                        lst.extend([f"{k}: {v}" for (k, v) in o.items()])
+                else:
+                    lst.extend([f"{k}: {v}" for (k, v) in obj.items()])
+            except ValueError as e:
+                lst.append(out)
+                lst.append(str(e))
+            out = "\n".join(lst)
+    elif get_platform() == "darwin":
+        rc, out, err = run_lambda("sysctl -n machdep.cpu.brand_string")
+    cpu_info = "None"
+    if rc == 0:
+        cpu_info = out
+    else:
+        cpu_info = err
+    return cpu_info
+
+
+def get_platform():
+    if sys.platform.startswith("linux"):
+        return "linux"
+    elif sys.platform.startswith("win32"):
+        return "win32"
+    elif sys.platform.startswith("cygwin"):
+        return "cygwin"
+    elif sys.platform.startswith("darwin"):
+        return "darwin"
+    else:
+        return sys.platform
+
+
+def get_mac_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, "sw_vers -productVersion", r"(.*)")
+
+
+def get_windows_version(run_lambda):
+    ret = run_and_read_all(
+        run_lambda,
+        'powershell.exe "gwmi -Class Win32_OperatingSystem | Select-Object -Property Caption,\
+        OSArchitecture,Version | ConvertTo-Json"',
+    )
+    try:
+        obj = json.loads(ret)
+        ret = f'{obj["Caption"]} ({obj["Version"]} {obj["OSArchitecture"]})'
+    except ValueError as e:
+        ret += f"\n{str(e)}"
+    return ret
+
+
+def get_lsb_version(run_lambda):
+    return run_and_parse_first_match(
+        run_lambda, "lsb_release -a", r"Description:\t(.*)"
+    )
+
+
+def check_release_file(run_lambda):
+    return run_and_parse_first_match(
+        run_lambda, "cat /etc/*-release", r'PRETTY_NAME="(.*)"'
+    )
+
+
+def get_os(run_lambda):
+    from platform import machine
+
+    platform = get_platform()
+
+    if platform in ["win32", "cygwin"]:
+        return get_windows_version(run_lambda)
+
+    if platform == "darwin":
+        version = get_mac_version(run_lambda)
+        if version is None:
+            return None
+        return "macOS {} ({})".format(version, machine())
+
+    if platform == "linux":
+        # Ubuntu/Debian based
+        desc = get_lsb_version(run_lambda)
+        if desc is not None:
+            return "{} ({})".format(desc, machine())
+
+        # Try reading /etc/*-release
+        desc = check_release_file(run_lambda)
+        if desc is not None:
+            return "{} ({})".format(desc, machine())
+
+        return "{} ({})".format(platform, machine())
+
+    # Unknown platform
+    return platform
+
+
+def get_python_platform():
+    import platform
+
+    return platform.platform()
+
+
+def get_libc_version():
+    import platform
+
+    if get_platform() != "linux":
+        return "N/A"
+    return "-".join(platform.libc_ver())
+
+
+def get_pip_packages(run_lambda, patterns=None):
+    """Return `pip list` output. Note: will also find conda-installed pytorch and numpy packages."""
+    if patterns is None:
+        patterns = PIP_PATTERNS + COMMON_PATTERNS + NVIDIA_PATTERNS + ONEAPI_PATTERNS
+
+    pip_version = "pip3" if sys.version_info.major == 3 else "pip"
+
+    os.environ["PIP_DISABLE_PIP_VERSION_CHECK"] = "1"
+    # People generally have pip as `pip` or `pip3`
+    # But here it is invoked as `python -mpip`
+    out = run_and_read_all(
+        run_lambda, [sys.executable, "-mpip", "list", "--format=freeze"]
+    )
+    if out is None:
+        return pip_version, out
+
+    filtered_out = "\n".join(
+        line for line in out.splitlines() if any(name in line for name in patterns)
+    )
+
+    return pip_version, filtered_out
+
+
+def get_cachingallocator_config():
+    ca_config = os.environ.get("PYTORCH_CUDA_ALLOC_CONF", "")
+    if not ca_config:
+        ca_config = os.environ.get("PYTORCH_HIP_ALLOC_CONF", "")
+    return ca_config
+
+
+def get_cuda_module_loading_config():
+    if TORCH_AVAILABLE and torch.cuda.is_available():
+        torch.cuda.init()
+        config = os.environ.get("CUDA_MODULE_LOADING", "")
+        return config
+    else:
+        return "N/A"
+
+
+def is_xnnpack_available():
+    if TORCH_AVAILABLE:
+        import torch.backends.xnnpack
+
+        return str(torch.backends.xnnpack.enabled)  # type: ignore[attr-defined]
+    else:
+        return "N/A"
+
+
+def get_env_info():
+    """
+    Collects environment information to aid in debugging.
+
+    The returned environment information contains details on torch version, is debug build
+    or not, cuda compiled version, gcc version, clang version, cmake version, operating
+    system, libc version, python version, python platform, CUDA availability, CUDA
+    runtime version, CUDA module loading config, GPU model and configuration, Nvidia
+    driver version, cuDNN version, pip version and versions of relevant pip and
+    conda packages, HIP runtime version, MIOpen runtime version,
+    Caching allocator config, XNNPACK availability and CPU information.
+
+    Returns:
+        SystemEnv (namedtuple): A tuple containing various environment details
+            and system information.
+    """
+    run_lambda = run
+    pip_version, pip_list_output = get_pip_packages(run_lambda)
+
+    if TORCH_AVAILABLE:
+        version_str = torch.__version__
+        debug_mode_str = str(torch.version.debug)
+        cuda_available_str = str(torch.cuda.is_available())
+        cuda_version_str = torch.version.cuda
+        xpu_available_str = str(torch.xpu.is_available())
+        if torch.xpu.is_available():
+            xpu_available_str = (
+                f"{xpu_available_str}\n"
+                + f"XPU used to build PyTorch: {torch.version.xpu}\n"
+                + f"Intel GPU driver version:\n{get_intel_gpu_driver_version(run_lambda)}\n"
+                + f"Intel GPU models onboard:\n{get_intel_gpu_onboard(run_lambda)}\n"
+                + f"Intel GPU models detected:\n{get_intel_gpu_detected(run_lambda)}"
+            )
+        if (
+            not hasattr(torch.version, "hip") or torch.version.hip is None
+        ):  # cuda version
+            hip_compiled_version = hip_runtime_version = miopen_runtime_version = "N/A"
+        else:  # HIP version
+
+            def get_version_or_na(cfg, prefix):
+                _lst = [s.rsplit(None, 1)[-1] for s in cfg if prefix in s]
+                return _lst[0] if _lst else "N/A"
+
+            cfg = torch._C._show_config().split("\n")
+            hip_runtime_version = get_version_or_na(cfg, "HIP Runtime")
+            miopen_runtime_version = get_version_or_na(cfg, "MIOpen")
+            cuda_version_str = "N/A"
+            hip_compiled_version = torch.version.hip
+    else:
+        version_str = debug_mode_str = cuda_available_str = cuda_version_str = xpu_available_str = "N/A"  # type: ignore[assignment]
+        hip_compiled_version = hip_runtime_version = miopen_runtime_version = "N/A"
+
+    sys_version = sys.version.replace("\n", " ")
+
+    conda_packages = get_conda_packages(run_lambda)
+
+    return SystemEnv(
+        torch_version=version_str,
+        is_debug_build=debug_mode_str,
+        python_version="{} ({}-bit runtime)".format(
+            sys_version, sys.maxsize.bit_length() + 1
+        ),
+        python_platform=get_python_platform(),
+        is_cuda_available=cuda_available_str,
+        cuda_compiled_version=cuda_version_str,
+        cuda_runtime_version=get_running_cuda_version(run_lambda),
+        cuda_module_loading=get_cuda_module_loading_config(),
+        nvidia_gpu_models=get_gpu_info(run_lambda),
+        nvidia_driver_version=get_nvidia_driver_version(run_lambda),
+        cudnn_version=get_cudnn_version(run_lambda),
+        is_xpu_available=xpu_available_str,
+        hip_compiled_version=hip_compiled_version,
+        hip_runtime_version=hip_runtime_version,
+        miopen_runtime_version=miopen_runtime_version,
+        pip_version=pip_version,
+        pip_packages=pip_list_output,
+        conda_packages=conda_packages,
+        os=get_os(run_lambda),
+        libc_version=get_libc_version(),
+        gcc_version=get_gcc_version(run_lambda),
+        clang_version=get_clang_version(run_lambda),
+        cmake_version=get_cmake_version(run_lambda),
+        caching_allocator_config=get_cachingallocator_config(),
+        is_xnnpack_available=is_xnnpack_available(),
+        cpu_info=get_cpu_info(run_lambda),
+    )
+
+
+env_info_fmt = """
+PyTorch version: {torch_version}
+Is debug build: {is_debug_build}
+CUDA used to build PyTorch: {cuda_compiled_version}
+ROCM used to build PyTorch: {hip_compiled_version}
+
+OS: {os}
+GCC version: {gcc_version}
+Clang version: {clang_version}
+CMake version: {cmake_version}
+Libc version: {libc_version}
+
+Python version: {python_version}
+Python platform: {python_platform}
+Is CUDA available: {is_cuda_available}
+CUDA runtime version: {cuda_runtime_version}
+CUDA_MODULE_LOADING set to: {cuda_module_loading}
+GPU models and configuration: {nvidia_gpu_models}
+Nvidia driver version: {nvidia_driver_version}
+cuDNN version: {cudnn_version}
+Is XPU available: {is_xpu_available}
+HIP runtime version: {hip_runtime_version}
+MIOpen runtime version: {miopen_runtime_version}
+Is XNNPACK available: {is_xnnpack_available}
+
+CPU:
+{cpu_info}
+
+Versions of relevant libraries:
+{pip_packages}
+{conda_packages}
+""".strip()
+
+
+def pretty_str(envinfo):
+    def replace_nones(dct, replacement="Could not collect"):
+        for key in dct:
+            if dct[key] is not None:
+                continue
+            dct[key] = replacement
+        return dct
+
+    def replace_bools(dct, true="Yes", false="No"):
+        for key in dct:
+            if dct[key] is True:
+                dct[key] = true
+            elif dct[key] is False:
+                dct[key] = false
+        return dct
+
+    def prepend(text, tag="[prepend]"):
+        lines = text.split("\n")
+        updated_lines = [tag + line for line in lines]
+        return "\n".join(updated_lines)
+
+    def replace_if_empty(text, replacement="No relevant packages"):
+        if text is not None and len(text) == 0:
+            return replacement
+        return text
+
+    def maybe_start_on_next_line(string):
+        # If `string` is multiline, prepend a \n to it.
+        if string is not None and len(string.split("\n")) > 1:
+            return "\n{}\n".format(string)
+        return string
+
+    mutable_dict = envinfo._asdict()
+
+    # If nvidia_gpu_models is multiline, start on the next line
+    mutable_dict["nvidia_gpu_models"] = maybe_start_on_next_line(
+        envinfo.nvidia_gpu_models
+    )
+
+    # If the machine doesn't have CUDA, report some fields as 'No CUDA'
+    dynamic_cuda_fields = [
+        "cuda_runtime_version",
+        "nvidia_gpu_models",
+        "nvidia_driver_version",
+    ]
+    all_cuda_fields = dynamic_cuda_fields + ["cudnn_version"]
+    all_dynamic_cuda_fields_missing = all(
+        mutable_dict[field] is None for field in dynamic_cuda_fields
+    )
+    if (
+        TORCH_AVAILABLE
+        and not torch.cuda.is_available()
+        and all_dynamic_cuda_fields_missing
+    ):
+        for field in all_cuda_fields:
+            mutable_dict[field] = "No CUDA"
+        if envinfo.cuda_compiled_version is None:
+            mutable_dict["cuda_compiled_version"] = "None"
+
+    # Replace True with Yes, False with No
+    mutable_dict = replace_bools(mutable_dict)
+
+    # Replace all None objects with 'Could not collect'
+    mutable_dict = replace_nones(mutable_dict)
+
+    # If either of these are '', replace with 'No relevant packages'
+    mutable_dict["pip_packages"] = replace_if_empty(mutable_dict["pip_packages"])
+    mutable_dict["conda_packages"] = replace_if_empty(mutable_dict["conda_packages"])
+
+    # Tag conda and pip packages with a prefix
+    # If they were previously None, they'll show up as ie '[conda] Could not collect'
+    if mutable_dict["pip_packages"]:
+        mutable_dict["pip_packages"] = prepend(
+            mutable_dict["pip_packages"], "[{}] ".format(envinfo.pip_version)
+        )
+    if mutable_dict["conda_packages"]:
+        mutable_dict["conda_packages"] = prepend(
+            mutable_dict["conda_packages"], "[conda] "
+        )
+    mutable_dict["cpu_info"] = envinfo.cpu_info
+    return env_info_fmt.format(**mutable_dict)
+
+
+def get_pretty_env_info():
+    """
+    Returns a pretty string of environment information.
+
+    This function retrieves environment information by calling the `get_env_info` function
+    and then formats the information into a human-readable string. The retrieved environment
+    information is listed in the document of `get_env_info`.
+    This function is used in `python collect_env.py` that should be executed when reporting a bug.
+
+    Returns:
+        str: A pretty string of the environment information.
+    """
+    return pretty_str(get_env_info())
+
+
+def collect_env() -> None:
+    print("Collecting environment information...")
+    output = get_pretty_env_info()
+    print(output)
+
+    if (
+        TORCH_AVAILABLE
+        and hasattr(torch, "utils")
+        and hasattr(torch.utils, "_crash_handler")
+    ):
+        minidump_dir = torch.utils._crash_handler.DEFAULT_MINIDUMP_DIR
+        if sys.platform == "linux" and os.path.exists(minidump_dir):
+            dumps = [
+                os.path.join(minidump_dir, dump) for dump in os.listdir(minidump_dir)
+            ]
+            latest = max(dumps, key=os.path.getctime)
+            ctime = os.path.getctime(latest)
+            creation_time = datetime.datetime.fromtimestamp(ctime).strftime(
+                "%Y-%m-%d %H:%M:%S"
+            )
+            msg = (
+                "\n*** Detected a minidump at {} created on {}, ".format(
+                    latest, creation_time
+                )
+                + "if this is related to your bug please include it when you file a report ***"
+            )
+            print(msg, file=sys.stderr)
+
+
+def _handle_env_command(_args: argparse.Namespace) -> None:
+    collect_env()
diff --git a/gptqmodel/cli/gptqmodel.py b/gptqmodel/cli/gptqmodel.py
new file mode 100644
index 000000000..d175a9d4c
--- /dev/null
+++ b/gptqmodel/cli/gptqmodel.py
@@ -0,0 +1,28 @@
+import argparse
+from typing import Iterable
+
+from gptqmodel.cli.env import _handle_env_command
+
+
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(prog="gptqmodel", description="Utilities for GPTQModel")
+    subparsers = parser.add_subparsers(dest="command")
+
+    env_parser = subparsers.add_parser("env", help="Inspect the local GPTQModel runtime environment")
+    env_parser.set_defaults(func=_handle_env_command)
+
+    return parser
+
+
+def main(argv: Iterable[str] | None = None) -> None:
+    parser = build_parser()
+    args = parser.parse_args(list(argv) if argv is not None else None)
+
+    if hasattr(args, "func"):
+        args.func(args)
+    else:
+        parser.print_help()
+
+
+if __name__ == "__main__":  # pragma: no cover - manual execution entrypoint
+    main()
diff --git a/pyproject.toml b/pyproject.toml
index aed30f355..eada03051 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -62,6 +62,9 @@ dependencies = [
 # "flash-attn>=2.8.3", <-- install for lower vram usage
 ]
 
+[project.scripts]
+gptqmodel = "gptqmodel.cli.gptqmodel:main"
+
 [project.urls]
 Homepage = "https://github.com/ModelCloud/GPTQModel"
 
diff --git a/tests/test_kernel_output_torch_fused.py b/tests/test_kernel_output_torch_fused.py
index aa1ddff02..3d03016e9 100644
--- a/tests/test_kernel_output_torch_fused.py
+++ b/tests/test_kernel_output_torch_fused.py
@@ -8,8 +8,8 @@
 import torch
 from logbar import LogBar
 from parameterized import parameterized
-from torch import Tensor
 from tabulate import tabulate
+from torch import Tensor
 
 from gptqmodel import BACKEND, GPTQModel
 from gptqmodel.nn_modules.qlinear.torch import TorchQuantLinear