diff --git a/gptqmodel/utils/nogil_patcher.py b/gptqmodel/utils/nogil_patcher.py index 7caf904fd..88a3118e1 100644 --- a/gptqmodel/utils/nogil_patcher.py +++ b/gptqmodel/utils/nogil_patcher.py @@ -5,8 +5,8 @@ """Straightforward monkey patch helpers for nogil runtimes.""" -import time import threading +import time from .safe import ThreadSafe @@ -33,8 +33,8 @@ def patch_safetensors_save_file() -> None: def patch_triton_autotuner() -> None: try: - from triton.runtime import autotuner as module import triton + from triton.runtime import autotuner as module except ImportError: return diff --git a/gptqmodel/utils/stream.py b/gptqmodel/utils/stream.py index 16a0327d8..38a48bc58 100644 --- a/gptqmodel/utils/stream.py +++ b/gptqmodel/utils/stream.py @@ -6,15 +6,16 @@ from __future__ import annotations import threading +from concurrent.futures import Future from dataclasses import dataclass from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple import torch -from concurrent.futures import Future from .logger import setup_logger from .threadx import DeviceThreadPool + log = setup_logger() diff --git a/gptqmodel/utils/threadx.py b/gptqmodel/utils/threadx.py index c55098688..226b078cd 100644 --- a/gptqmodel/utils/threadx.py +++ b/gptqmodel/utils/threadx.py @@ -13,12 +13,13 @@ import threading import time import traceback -from datetime import datetime, timezone from concurrent.futures import Future +from datetime import datetime, timezone from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Tuple, Union import torch + try: from device_smi import Device # type: ignore except Exception: # pragma: no cover - defensive: optional dependency may be unavailable diff --git a/setup.py b/setup.py index a52e681d5..28e2fc6b5 100644 --- a/setup.py +++ b/setup.py @@ -3,11 +3,11 @@ # SPDX-License-Identifier: Apache-2.0 # Contact: qubitium@modelcloud.ai, x.com/qubitium import os +import re import subprocess import sys from pathlib import Path -import pcre as re from setuptools import find_packages, setup from setuptools.command.bdist_wheel import bdist_wheel as _bdist_wheel diff --git a/tests/models/model_test.py b/tests/models/model_test.py index ce1a8f067..7a8626b4b 100644 --- a/tests/models/model_test.py +++ b/tests/models/model_test.py @@ -22,8 +22,8 @@ # -- end do not touch -from pathlib import Path # noqa: E402 from enum import Enum # noqa: E402 +from pathlib import Path # noqa: E402 from typing import Dict, List # noqa: E402 from logbar import LogBar # noqa: E402 diff --git a/tests/models/test_act_group_aware.py b/tests/models/test_act_group_aware.py index 66e5009bc..6263b2882 100644 --- a/tests/models/test_act_group_aware.py +++ b/tests/models/test_act_group_aware.py @@ -4,6 +4,7 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest + from gptqmodel.utils.eval import EVAL diff --git a/tests/models/test_apertus.py b/tests/models/test_apertus.py index a609c8b8f..a842fced4 100644 --- a/tests/models/test_apertus.py +++ b/tests/models/test_apertus.py @@ -4,9 +4,9 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest -from gptqmodel.utils.eval import EVAL from gptqmodel import BACKEND +from gptqmodel.utils.eval import EVAL class TestApertus(ModelTest): diff --git a/tests/models/test_cohere.py b/tests/models/test_cohere.py index 35261d42a..ef8e2d730 100644 --- a/tests/models/test_cohere.py +++ b/tests/models/test_cohere.py @@ -4,6 +4,7 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest + from gptqmodel.utils.eval import EVAL diff --git a/tests/models/test_cohere2.py b/tests/models/test_cohere2.py index 6b25bf6e6..ba087db36 100644 --- a/tests/models/test_cohere2.py +++ b/tests/models/test_cohere2.py @@ -4,6 +4,7 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest + from gptqmodel.utils.eval import EVAL diff --git a/tests/models/test_deci.py b/tests/models/test_deci.py index 9fb81818f..98be7587f 100644 --- a/tests/models/test_deci.py +++ b/tests/models/test_deci.py @@ -4,6 +4,7 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest + from gptqmodel.utils.eval import EVAL diff --git a/tests/models/test_dream.py b/tests/models/test_dream.py index bb053d1be..7af5f7ebc 100644 --- a/tests/models/test_dream.py +++ b/tests/models/test_dream.py @@ -4,6 +4,7 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest + from gptqmodel.utils.eval import EVAL diff --git a/tests/models/test_falcon.py b/tests/models/test_falcon.py index 99d696646..07c3392f8 100644 --- a/tests/models/test_falcon.py +++ b/tests/models/test_falcon.py @@ -5,6 +5,7 @@ import torch # noqa: E402from tests.model_test import ModelTest from model_test import ModelTest + from gptqmodel.utils.eval import EVAL diff --git a/tests/models/test_glm.py b/tests/models/test_glm.py index c315a0301..27d8139c7 100644 --- a/tests/models/test_glm.py +++ b/tests/models/test_glm.py @@ -4,8 +4,10 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest + from gptqmodel.utils.eval import EVAL + # | Metric | MARLIN | # |--------------------------------|----------| # | arc_challenge :: acc,none | 0.5154 | diff --git a/tests/models/test_glm4_moe.py b/tests/models/test_glm4_moe.py index a53942fb4..bf2e32561 100644 --- a/tests/models/test_glm4_moe.py +++ b/tests/models/test_glm4_moe.py @@ -4,8 +4,10 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest + from gptqmodel.utils.eval import EVAL + class TestGlm4Moe(ModelTest): NATIVE_MODEL_ID = "/monster/data/model/GLM-4.6/" DELETE_QUANTIZED_MODEL = False diff --git a/tests/models/test_gpt_oss.py b/tests/models/test_gpt_oss.py index 38675d188..d7e3f99f2 100644 --- a/tests/models/test_gpt_oss.py +++ b/tests/models/test_gpt_oss.py @@ -4,6 +4,7 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest + from gptqmodel.utils.eval import EVAL diff --git a/tests/models/test_granite.py b/tests/models/test_granite.py index 4ea23c751..2b22089eb 100644 --- a/tests/models/test_granite.py +++ b/tests/models/test_granite.py @@ -4,6 +4,7 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest + from gptqmodel.utils.eval import EVAL diff --git a/tests/models/test_hymba.py b/tests/models/test_hymba.py index c829f7b0f..e4f1d51df 100644 --- a/tests/models/test_hymba.py +++ b/tests/models/test_hymba.py @@ -4,6 +4,7 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest + from gptqmodel.utils.eval import EVAL diff --git a/tests/models/test_ling.py b/tests/models/test_ling.py index 65e6650dd..1af8547b4 100644 --- a/tests/models/test_ling.py +++ b/tests/models/test_ling.py @@ -4,6 +4,7 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest + from gptqmodel.utils.eval import EVAL diff --git a/tests/models/test_llama3_2.py b/tests/models/test_llama3_2.py index 5833808d8..5860f3133 100644 --- a/tests/models/test_llama3_2.py +++ b/tests/models/test_llama3_2.py @@ -4,6 +4,7 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest + from gptqmodel.utils.eval import EVAL diff --git a/tests/models/test_llama3_2_awq.py b/tests/models/test_llama3_2_awq.py index 4d7919158..14e371d58 100644 --- a/tests/models/test_llama3_2_awq.py +++ b/tests/models/test_llama3_2_awq.py @@ -4,9 +4,9 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest -from gptqmodel.utils.eval import EVAL from gptqmodel.quantization import FORMAT, METHOD +from gptqmodel.utils.eval import EVAL # a100:0 diff --git a/tests/models/test_llama4.py b/tests/models/test_llama4.py index f04410d2a..26cd5c0ef 100644 --- a/tests/models/test_llama4.py +++ b/tests/models/test_llama4.py @@ -4,6 +4,7 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest + from gptqmodel.utils.eval import EVAL diff --git a/tests/models/test_longllama.py b/tests/models/test_longllama.py index ea9992553..4b6a23711 100644 --- a/tests/models/test_longllama.py +++ b/tests/models/test_longllama.py @@ -4,6 +4,7 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest + from gptqmodel.utils.eval import EVAL diff --git a/tests/models/test_mimo.py b/tests/models/test_mimo.py index 68f140aed..1485cbfc4 100644 --- a/tests/models/test_mimo.py +++ b/tests/models/test_mimo.py @@ -4,6 +4,7 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest + from gptqmodel.utils.eval import EVAL diff --git a/tests/models/test_nemotron_ultra.py b/tests/models/test_nemotron_ultra.py index 44eba9a54..023a68bc1 100644 --- a/tests/models/test_nemotron_ultra.py +++ b/tests/models/test_nemotron_ultra.py @@ -4,6 +4,7 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest + from gptqmodel.utils.eval import EVAL diff --git a/tests/models/test_qwen2_5.py b/tests/models/test_qwen2_5.py index 672103c1a..02a7d3639 100644 --- a/tests/models/test_qwen2_5.py +++ b/tests/models/test_qwen2_5.py @@ -4,8 +4,10 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest + from gptqmodel.utils.eval import EVAL + # | Metric | MARLIN | # |--------------------------------|----------| # | arc_challenge :: acc,none | 0.2884 | diff --git a/tests/models/test_qwen2_5_omni.py b/tests/models/test_qwen2_5_omni.py index 26d5dca15..2da963cf4 100644 --- a/tests/models/test_qwen2_5_omni.py +++ b/tests/models/test_qwen2_5_omni.py @@ -6,9 +6,9 @@ import soundfile as sf from model_test import ModelTest -from gptqmodel.utils.eval import EVAL from gptqmodel.models.definitions.qwen2_5_omni import Qwen2_5_OmniGPTQ +from gptqmodel.utils.eval import EVAL class TestQwen2_5_Omni(ModelTest): diff --git a/tests/models/test_qwen2_5_vl.py b/tests/models/test_qwen2_5_vl.py index 75ba08f16..b21c629fe 100644 --- a/tests/models/test_qwen2_5_vl.py +++ b/tests/models/test_qwen2_5_vl.py @@ -4,9 +4,9 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest -from gptqmodel.utils.eval import EVAL from gptqmodel.models.definitions.qwen2_vl import Qwen2VLQModel +from gptqmodel.utils.eval import EVAL class TestQwen2_VL(ModelTest): diff --git a/tests/models/test_qwen2_moe_quant.py b/tests/models/test_qwen2_moe_quant.py index c5ff402f2..8b81c84e5 100644 --- a/tests/models/test_qwen2_moe_quant.py +++ b/tests/models/test_qwen2_moe_quant.py @@ -4,6 +4,7 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest + from gptqmodel.utils.eval import EVAL diff --git a/tests/models/test_qwen2_vl.py b/tests/models/test_qwen2_vl.py index b6297f5fd..40eb0ae58 100644 --- a/tests/models/test_qwen2_vl.py +++ b/tests/models/test_qwen2_vl.py @@ -4,9 +4,9 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest -from gptqmodel.utils.eval import EVAL from gptqmodel.models.definitions.qwen2_vl import Qwen2VLQModel +from gptqmodel.utils.eval import EVAL class TestQwen2_VL(ModelTest): diff --git a/tests/models/test_qwen3_moe.py b/tests/models/test_qwen3_moe.py index d398b3c24..ba80a66ba 100644 --- a/tests/models/test_qwen3_moe.py +++ b/tests/models/test_qwen3_moe.py @@ -4,8 +4,10 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest + from gptqmodel.utils.eval import EVAL + # | Metric | MARLIN | # |--------------------------------|----------| # | arc_challenge :: acc,none | 0.5094 | diff --git a/tests/models/test_qwen3_next.py b/tests/models/test_qwen3_next.py index 882473af6..8e81ddd6f 100644 --- a/tests/models/test_qwen3_next.py +++ b/tests/models/test_qwen3_next.py @@ -4,8 +4,10 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest + from gptqmodel.utils.eval import EVAL + # | Metric | MARLIN | # |--------------------------------|----------| # | arc_challenge :: acc,none | 0.6271 | diff --git a/tests/models/test_qwen3_omni.py b/tests/models/test_qwen3_omni.py index 76e6a2881..ecad2e3e2 100644 --- a/tests/models/test_qwen3_omni.py +++ b/tests/models/test_qwen3_omni.py @@ -4,6 +4,7 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest + from gptqmodel.utils.eval import EVAL diff --git a/tests/models/test_seed_oss.py b/tests/models/test_seed_oss.py index ab2a0c3eb..fe9933cd4 100644 --- a/tests/models/test_seed_oss.py +++ b/tests/models/test_seed_oss.py @@ -4,6 +4,7 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest + from gptqmodel.utils.eval import EVAL diff --git a/tests/models/test_stablelm.py b/tests/models/test_stablelm.py index ca4b2fac2..ffd9e859d 100644 --- a/tests/models/test_stablelm.py +++ b/tests/models/test_stablelm.py @@ -4,6 +4,7 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest + from gptqmodel.utils.eval import EVAL diff --git a/tests/models/test_xverse.py b/tests/models/test_xverse.py index a1de853f5..7d77ed9df 100644 --- a/tests/models/test_xverse.py +++ b/tests/models/test_xverse.py @@ -4,6 +4,7 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from model_test import ModelTest + from gptqmodel.utils.eval import EVAL diff --git a/tests/test_gptqv2.py b/tests/test_gptqv2.py index ee96572d0..284c1dfea 100644 --- a/tests/test_gptqv2.py +++ b/tests/test_gptqv2.py @@ -4,6 +4,7 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from models.model_test import ModelTest + from gptqmodel.utils.eval import EVAL diff --git a/tests/test_lm_head.py b/tests/test_lm_head.py index 3319a308d..98bada2ca 100644 --- a/tests/test_lm_head.py +++ b/tests/test_lm_head.py @@ -13,10 +13,10 @@ os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # -- end do not touch from models.model_test import ModelTest # noqa: E402 -from gptqmodel.utils.eval import EVAL # noqa: E402 from gptqmodel import GPTQModel, QuantizeConfig # noqa: E402 from gptqmodel.nn_modules.qlinear import BaseQuantLinear # noqa: E402 +from gptqmodel.utils.eval import EVAL # noqa: E402 class TestLmHeadLoad(ModelTest):