diff --git a/pyproject.toml b/pyproject.toml index 480b944fd..cb219ee9c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,7 @@ dependencies = [ "pyarrow>=21.0", "dill>=0.3.8", # datasets requirements "pypcre>=0.2.4", - "torchao>=0.14.0", # fix bad transformers 4.57.1 breaking torchao compat + "torchao>=0.14.1", # fix bad transformers 4.57.1 breaking torchao compat # "cython>=3.1.4", # required by hf-xet/hf-transfer # "flash-attn>=2.8.3", <-- install for lower vram usage ] diff --git a/requirements.txt b/requirements.txt index 697b032cb..128a701a3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,4 +19,4 @@ datasets>=3.6.0 pyarrow>=21.0 dill>=0.3.8 pypcre>=0.2.4 -torchao>=0.14.0 +torchao>=0.14.1 diff --git a/tests/models/model_test.py b/tests/models/model_test.py index 7d0ee4892..e2b75776c 100644 --- a/tests/models/model_test.py +++ b/tests/models/model_test.py @@ -109,6 +109,7 @@ class ModelTest(unittest.TestCase): ACT_GROUP_AWARE = True FAIL_SAFE = True EORA = None + DAMP_PERCENT = 0.05 SAVE_PATH = None # default is temp folder @@ -719,6 +720,7 @@ def quantModel(self, model_id_or_path, trust_remote_code=False, dtype="auto", ne adapter=self.EORA, pack_impl="cpu", vram_strategy=self.VRAM_STRATEGY, + damp_percent=self.DAMP_PERCENT, ) log.info(f"Quant config: {quantize_config}")