From 521894b95fda49ba56c14d68ca81de4154dcc259 Mon Sep 17 00:00:00 2001 From: Qubitium Date: Sat, 25 Oct 2025 03:52:29 +0000 Subject: [PATCH 1/3] log model save dir early --- tests/models/model_test.py | 138 +++++++++++++++++++++++++------------ 1 file changed, 93 insertions(+), 45 deletions(-) diff --git a/tests/models/model_test.py b/tests/models/model_test.py index e2b75776c..da834c62b 100644 --- a/tests/models/model_test.py +++ b/tests/models/model_test.py @@ -519,6 +519,49 @@ def walk(directory: Path, prefix: str, depth: int) -> None: print(f"\n{colorize(f'Index file: {rel_name}', 0, False)}") print(json.dumps(content, indent=2, sort_keys=True)) + def _prepare_quant_save_destination(self, need_eval): + if self.SAVE_PATH: + return contextlib.nullcontext(self.SAVE_PATH), self.SAVE_PATH, None + + if need_eval: + tmp_dir = tempfile.mkdtemp() + return contextlib.nullcontext(tmp_dir), tmp_dir, lambda: shutil.rmtree(tmp_dir, ignore_errors=True) + + tmp_context = tempfile.TemporaryDirectory() + return tmp_context, tmp_context.name, tmp_context.cleanup + + def _resolve_quantized_model_path(self, model_candidate): + if model_candidate is None: + return None + if isinstance(model_candidate, (list, tuple)): + model_candidate = model_candidate[0] + if isinstance(model_candidate, str): + return model_candidate + return getattr(model_candidate, "model_local_path", None) + + def _cleanup_quantized_model(self, model_candidate, enabled=True): + if not enabled: + return False + target_path = self._resolve_quantized_model_path(model_candidate) + if not target_path or not isinstance(target_path, str): + return False + + temp_root = os.path.realpath(tempfile.gettempdir()) + candidate_path = os.path.realpath(target_path) + if not candidate_path.startswith(temp_root): + return False + if not os.path.exists(candidate_path): + return False + + try: + shutil.rmtree(candidate_path) + except OSError as exc: + log.warn(f"Failed to delete temp model `{candidate_path}`: {exc}") + return False + + log.info(f"Deleting temp model: {candidate_path}") + return True + @staticmethod def _colorize(text, matched): color = "\033[92m" if matched else "\033[91m" @@ -764,43 +807,56 @@ def quantModel(self, model_id_or_path, trust_remote_code=False, dtype="auto", ne is_ovis_model = model.__class__.__name__ == "OvisGPTQ" need_create_processor = is_image_to_text_model and not is_ovis_model if not is_quantized: - model.quantize(calibration_dataset, calibration_sort=self.DATASET_SORT, backend=self.QUANT_BACKEND, batch_size=batch_size) - - self.check_kernel(model, self.KERNEL_QUANT) - - # TODO: make into shared method - with (contextlib.nullcontext(self.SAVE_PATH) if self.SAVE_PATH else contextlib.nullcontext(tempfile.mkdtemp()) if need_eval else tempfile.TemporaryDirectory()) as path: - os.makedirs(path, exist_ok=True) - self.clear_directory(path) - - model.save(path) - tokenizer.save_pretrained(path) - self._print_post_quant_artifacts(path) - log.info(f"Quantized Model saved to tmp dir: {path}") - - reuse_candidates, eval_records = self.perform_post_quant_validation(path, trust_remote_code=trust_remote_code) - self._post_quant_eval_records = eval_records - target_backend = self._current_load_backend() - - q_model = reuse_candidates.pop(target_backend, None) - if q_model is None: - # Ensure the post-quant reload stays on a single CUDA device when available. - use_cuda_map = torch.cuda.is_available() and target_backend != BACKEND.TORCH_FUSED - if use_cuda_map: - q_model = self.loadQuantModel( - path, - trust_remote_code=trust_remote_code, - backend=target_backend, - device_map={"": "cuda:0"}, - ) + save_context = None + planned_save_path = None + cleanup_callback = None + try: + save_context, planned_save_path, cleanup_callback = self._prepare_quant_save_destination(need_eval) + log.info(f"Quantized model artifacts will be saved to: {planned_save_path}") + model.quantize(calibration_dataset, calibration_sort=self.DATASET_SORT, backend=self.QUANT_BACKEND, batch_size=batch_size) + + self.check_kernel(model, self.KERNEL_QUANT) + + # TODO: make into shared method + with save_context as path: + cleanup_callback = None + os.makedirs(path, exist_ok=True) + self.clear_directory(path) + + model.save(path) + tokenizer.save_pretrained(path) + self._print_post_quant_artifacts(path) + + reuse_candidates, eval_records = self.perform_post_quant_validation(path, trust_remote_code=trust_remote_code) + self._post_quant_eval_records = eval_records + target_backend = self._current_load_backend() + + q_model = reuse_candidates.pop(target_backend, None) + if q_model is None: + # Ensure the post-quant reload stays on a single CUDA device when available. + use_cuda_map = torch.cuda.is_available() and target_backend != BACKEND.TORCH_FUSED + if use_cuda_map: + q_model = self.loadQuantModel( + path, + trust_remote_code=trust_remote_code, + backend=target_backend, + device_map={"": "cuda:0"}, + ) + else: + q_model = self.loadQuantModel(path, trust_remote_code=trust_remote_code, backend=target_backend) else: - q_model = self.loadQuantModel(path, trust_remote_code=trust_remote_code, backend=target_backend) - else: - log.info(f"Reusing post-quant validation model for backend `{target_backend.name}`") + log.info(f"Reusing post-quant validation model for backend `{target_backend.name}`") - q_tokenizer = q_model.tokenizer or self.load_tokenizer(path, trust_remote_code=trust_remote_code) - if need_create_processor: - processor = AutoProcessor.from_pretrained(path) + q_tokenizer = q_model.tokenizer or self.load_tokenizer(path, trust_remote_code=trust_remote_code) + if need_create_processor: + processor = AutoProcessor.from_pretrained(path) + except Exception: + if cleanup_callback is not None: + try: + cleanup_callback() + except Exception: + pass + raise else: if need_create_processor: @@ -949,16 +1005,7 @@ def lm_eval(self, model, apply_chat_template=False, trust_remote_code=False, del aggregated_results[normalized_task_name] = filtered_metrics print({normalized_task_name: filtered_metrics}) - # only delete tmp folders - model_local_path = getattr(model, "model_local_path", "") - if ( - delete_quantized_model - and isinstance(model_local_path, str) - and model_local_path.startswith("/tmp") - and os.path.exists(model_local_path) - ): - log.info(f"Deleting temp model: {model_local_path}") - shutil.rmtree(model_local_path) + self._cleanup_quantized_model(model, enabled=delete_quantized_model) return aggregated_results except BaseException as e: if isinstance(e, torch.OutOfMemoryError): @@ -1012,6 +1059,7 @@ def quant_lm_eval(self): delete_quantized_model=self.DELETE_QUANTIZED_MODEL, ) self.check_results(task_results) + self._cleanup_quantized_model(self.model, enabled=self.DELETE_QUANTIZED_MODEL) def check_results(self, task_results): baselines = self.get_eval_tasks() From 8325cf7f710da445d9f5dcafc9c599a829043645 Mon Sep 17 00:00:00 2001 From: Qubitium Date: Sat, 25 Oct 2025 05:02:49 +0000 Subject: [PATCH 2/3] ascii logo --- gptqmodel/models/auto.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gptqmodel/models/auto.py b/gptqmodel/models/auto.py index 2e5578034..69905983d 100644 --- a/gptqmodel/models/auto.py +++ b/gptqmodel/models/auto.py @@ -12,6 +12,18 @@ log = setup_logger() +ASCII_LOGO = r""" +_____/\\\\\\\\\\\\__/\\\\\\\\\\\\\____/\\\\\\\\\\\\\\\______________________/\\\________/\\\\____________/\\\\_______________________/\\\__________________/\\\\\\____ + ___/\\\//////////__\/\\\/////////\\\_\///////\\\/////____________________/\\\\/\\\\____\/\\\\\\________/\\\\\\______________________\/\\\_________________\////\\\____ + __/\\\_____________\/\\\_______\/\\\_______\/\\\_______________________/\\\//\////\\\__\/\\\//\\\____/\\\//\\\______________________\/\\\____________________\/\\\____ + _\/\\\____/\\\\\\\_\/\\\\\\\\\\\\\/________\/\\\________/\\\\\\\\\\\__/\\\______\//\\\_\/\\\\///\\\/\\\/_\/\\\_____/\\\\\___________\/\\\______/\\\\\\\\_____\/\\\____ + _\/\\\___\/////\\\_\/\\\/////////__________\/\\\_______\///////////__\//\\\______/\\\__\/\\\__\///\\\/___\/\\\___/\\\///\\\____/\\\\\\\\\____/\\\/////\\\____\/\\\____ + _\/\\\_______\/\\\_\/\\\___________________\/\\\______________________\///\\\\/\\\\/___\/\\\____\///_____\/\\\__/\\\__\//\\\__/\\\////\\\___/\\\\\\\\\\\_____\/\\\____ + _\/\\\_______\/\\\_\/\\\___________________\/\\\________________________\////\\\//_____\/\\\_____________\/\\\_\//\\\__/\\\__\/\\\__\/\\\__\//\\///////______\/\\\____ + _\//\\\\\\\\\\\\/__\/\\\___________________\/\\\___________________________\///\\\\\\__\/\\\_____________\/\\\__\///\\\\\/___\//\\\\\\\/\\__\//\\\\\\\\\\__/\\\\\\\\\_ + __\////////////____\///____________________\///______________________________\//////___\///______________\///_____\/////______\///////\//____\//////////__\/////////__ +""" + # if not os.environ.get("PYTHON_GIL", None): # os.environ["PYTHON_GIL"] = '0' # log.info("ENV: Auto disable GIL and use free-threading mode when applicable: Python 3.13t+. You must install the -t edition of Python.") @@ -249,6 +261,8 @@ def load( trust_remote_code: bool = False, **kwargs, ): + log.info("\n%s", ASCII_LOGO) + if isinstance(model_id_or_path, str): model_id_or_path = model_id_or_path.strip() From 280bfeb49b5f1b3d47a161801f284c8c42bf6298 Mon Sep 17 00:00:00 2001 From: Qubitium Date: Sat, 25 Oct 2025 10:22:57 +0000 Subject: [PATCH 3/3] move log to init --- gptqmodel/__init__.py | 4 ++++ gptqmodel/models/auto.py | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/gptqmodel/__init__.py b/gptqmodel/__init__.py index d6670c849..f3f2c0504 100644 --- a/gptqmodel/__init__.py +++ b/gptqmodel/__init__.py @@ -14,6 +14,7 @@ patch_triton_autotuner() from .utils.env import env_flag +from .utils.logger import setup_logger DEBUG_ON = env_flag("DEBUG") @@ -41,11 +42,14 @@ ) from .models import GPTQModel, get_best_device +from .models.auto import ASCII_LOGO from .quantization import BaseQuantizeConfig, QuantizeConfig from .utils import BACKEND from .utils.exllama import exllama_set_max_input_length from .version import __version__ +setup_logger().info("\n%s", ASCII_LOGO) + if os.getenv('GPTQMODEL_USE_MODELSCOPE', 'False').lower() in ['true', '1']: try: diff --git a/gptqmodel/models/auto.py b/gptqmodel/models/auto.py index 69905983d..9af1cca4c 100644 --- a/gptqmodel/models/auto.py +++ b/gptqmodel/models/auto.py @@ -261,8 +261,6 @@ def load( trust_remote_code: bool = False, **kwargs, ): - log.info("\n%s", ASCII_LOGO) - if isinstance(model_id_or_path, str): model_id_or_path = model_id_or_path.strip()