diff --git a/gptqmodel/looper/awq_processor.py b/gptqmodel/looper/awq_processor.py index 6ad426586..3ad6b8a51 100644 --- a/gptqmodel/looper/awq_processor.py +++ b/gptqmodel/looper/awq_processor.py @@ -709,12 +709,18 @@ def _apply_quant(self, module, named_linears: Dict[str, NamedModule], start_time # records duration = time.time() - start_time - avg_loss = 999999999 + avg_loss_value = None for _, layer_names, _, loss in scales_list: if any(named_module.name in layer_name for layer_name in layer_names): - avg_loss = loss + avg_loss_value = loss break + if avg_loss_value is None: + # Scaling not applied for this layer in AWQ; no meaningful loss metric. + loss_summary = "not applicable" + else: + loss_summary = f"{avg_loss_value:.10f}" + # TODO "loss" and "nsamples" may not be consistent with the semantics of gptq quantization. stat = { PROCESS_LOG_NAME: self.name(), @@ -722,7 +728,7 @@ def _apply_quant(self, module, named_linears: Dict[str, NamedModule], start_time PROCESS_LOG_MODULE: named_module.name, MODULE_FEATURE_COLUMN: self.module_feature_summary(named_module), DTYPE_SIZE_COLUMN: self.module_dtype_size_summary(named_module), - QUANT_LOG_LOSS: f"{avg_loss:.10f}", + QUANT_LOG_LOSS: loss_summary, QUANT_LOG_NSAMPLES: f"{self.nsamples}", # QUANT_LOG_DAMP: f"{damp_percent:.5f}", PROCESS_LOG_TIME: f"{duration:.3f}", diff --git a/tests/models/test_multi_vs_single_gpu.py b/tests/models/test_multi_vs_single_gpu.py index 0a58bd8ca..2a8d4254d 100644 --- a/tests/models/test_multi_vs_single_gpu.py +++ b/tests/models/test_multi_vs_single_gpu.py @@ -9,7 +9,7 @@ import sys from contextlib import ExitStack from dataclasses import dataclass -from decimal import Decimal +from decimal import Decimal, InvalidOperation from typing import Dict, Iterable, List, Tuple from unittest import mock @@ -262,9 +262,14 @@ def _extract_layer_metrics( if loss_value is None or sample_value is None: continue + try: + loss_decimal = Decimal(loss_value) + except (InvalidOperation, TypeError, ValueError): + continue + per_layer = layer_metrics.setdefault(layer_index, {}) per_layer[module_name] = LayerMetrics( - loss=Decimal(loss_value), + loss=loss_decimal, samples=int(sample_value), ) return layer_metrics