diff --git a/tests/test_ipex_xpu.py b/tests/test_ipex_xpu.py index 3eca87307..e21c56175 100644 --- a/tests/test_ipex_xpu.py +++ b/tests/test_ipex_xpu.py @@ -27,7 +27,7 @@ def test(self): device=DEVICE.XPU, ) tokenizer = self.load_tokenizer(self.NATIVE_MODEL_ID) - calibration_dataset = self.load_dataset(tokenizer) + calibration_dataset = self.load_dataset(tokenizer, self.DATASET_SIZE) origin_model.quantize(calibration_dataset, backend=BACKEND.TORCH_FUSED) with tempfile.TemporaryDirectory() as tmpdir: origin_model.save(tmpdir) diff --git a/tests/test_mlx.py b/tests/test_mlx.py index 32ca4125f..f854aa67d 100644 --- a/tests/test_mlx.py +++ b/tests/test_mlx.py @@ -20,7 +20,7 @@ class TestExport(ModelTest): @classmethod def setUpClass(self): self.tokenizer = AutoTokenizer.from_pretrained(self.NATIVE_MODEL_ID, use_fast=True) - self.calibration_dataset = self.load_dataset(self.tokenizer) + self.calibration_dataset = self.load_dataset(self.tokenizer, self.DATASET_SIZE) def test_export_mlx(self): with tempfile.TemporaryDirectory() as export_dir: diff --git a/tests/test_parameter_count.py b/tests/test_parameter_count.py index 599c5823a..f74d1996f 100644 --- a/tests/test_parameter_count.py +++ b/tests/test_parameter_count.py @@ -47,7 +47,7 @@ def test_parameter_count(self): def test_parameter_count_with_quant(self): model_id = "/monster/data/model/Llama-3.2-1B-Instruct" # meta-llama/Llama-3.2-1B-Instruct - calibration_dataset = self.load_dataset(self.load_tokenizer(model_id)) + calibration_dataset = self.load_dataset(self.load_tokenizer(model_id), self.DATASET_SIZE) quant_config = QuantizeConfig(bits=4, group_size=128) diff --git a/tests/test_quant_batch.py b/tests/test_quant_batch.py index 2cf0b5c0c..92440c1b1 100644 --- a/tests/test_quant_batch.py +++ b/tests/test_quant_batch.py @@ -45,7 +45,7 @@ def setUpClass(self): if not self.tokenizer.pad_token_id: self.tokenizer.pad_token_id = self.tokenizer.eos_token_id - self.calibration_dataset = self.load_dataset(self.tokenizer) + self.calibration_dataset = self.load_dataset(self.tokenizer, self.DATASET_SIZE) def test_diff_batch(self): quantize_config = QuantizeConfig( diff --git a/tests/test_quant_trust_remote.py b/tests/test_quant_trust_remote.py index dc4f10868..f6e65d76f 100644 --- a/tests/test_quant_trust_remote.py +++ b/tests/test_quant_trust_remote.py @@ -26,7 +26,7 @@ def setUpClass(self): if not self.tokenizer.pad_token_id: self.tokenizer.pad_token_id = self.tokenizer.eos_token_id - self.calibration_dataset = self.load_dataset(self.tokenizer) + self.calibration_dataset = self.load_dataset(self.tokenizer, self.DATASET_SIZE) def test_diff_batch(self): quantize_config = QuantizeConfig( diff --git a/tests/test_triton_xpu.py b/tests/test_triton_xpu.py index 9edde5360..625c367a6 100644 --- a/tests/test_triton_xpu.py +++ b/tests/test_triton_xpu.py @@ -27,7 +27,7 @@ def test(self): device=DEVICE.XPU, ) tokenizer = self.load_tokenizer(self.NATIVE_MODEL_ID) - calibration_dataset = self.load_dataset(tokenizer) + calibration_dataset = self.load_dataset(tokenizer, self.DATASET_SIZE) origin_model.quantize(calibration_dataset, backend=BACKEND.TRITON) with tempfile.TemporaryDirectory() as tmpdir: origin_model.save(tmpdir) diff --git a/tests/test_vllm.py b/tests/test_vllm.py index ea5b449f0..4e7f85528 100644 --- a/tests/test_vllm.py +++ b/tests/test_vllm.py @@ -76,7 +76,7 @@ def test_dynamic(self): if not tokenizer.pad_token_id: tokenizer.pad_token_id = tokenizer.eos_token_id - calibration_dataset = self.load_dataset(tokenizer) + calibration_dataset = self.load_dataset(tokenizer, self.DATASET_SIZE) # support dynamic override of bits, group_size, desc_act, sym for each layer/module match #