From fab1126992482cb3af61699d34f3029b4bc10b77 Mon Sep 17 00:00:00 2001 From: Qubitium Date: Sun, 5 Oct 2025 05:17:17 +0000 Subject: [PATCH 1/5] update progressbar for module finalization (packing and offloading) Signed-off-by: Qubitium --- gptqmodel/looper/module_looper.py | 60 ++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 20 deletions(-) diff --git a/gptqmodel/looper/module_looper.py b/gptqmodel/looper/module_looper.py index 06f1bd0a2..2f733b322 100644 --- a/gptqmodel/looper/module_looper.py +++ b/gptqmodel/looper/module_looper.py @@ -925,37 +925,57 @@ def _process_on_worker(proc: LoopProcessor, nm: NamedModule): torch_sync() # Gather finalize tasks (can offload to disk); run them via the pool - finalize_futures = [] + finalize_tasks = [] for reverse_p in reversed(self.processors): - for name in processed_subset: - @torch.inference_mode() - def finalize_module(process, module): - process.submodule_finalize(module, self.gptq_model) + for module in processed_subset.values(): + target_dev = get_device_new(module, recursive=True, assert_mode=True, expected="cpu") + module_label = getattr(module, "full_name", getattr(module, "name", "")) + finalize_tasks.append((reverse_p, module, module_label, target_dev)) - # Disk offload (lifecycle TODO note preserved) - if isinstance(process, (GPTQProcessor, QQQProcessor, AWQProcessor)): - offload_to_disk( - model=self.gptq_model.model, - module=self.gptq_model.model.get_submodule(module.full_name), - disk_path=self.gptq_model.quantize_config.offload_to_disk_path, - ) + finalize_count = len(finalize_tasks) + if finalize_count: + quant_modules_pb.subtitle( + f"Finalizing submodules ({finalize_count})" + ).draw() - module = processed_subset[name] + finalize_futures = [] - target_dev = get_device_new(module, recursive=True, assert_mode=True, expected="cpu") + @torch.inference_mode() + def _finalize_on_worker(process, module, idx, total, module_label): + quant_modules_pb.subtitle( + f"{process.name()}: finalizing {idx}/{total} ({module_label})" + ).draw() + + process.submodule_finalize(module, self.gptq_model) + + # Disk offload (lifecycle TODO note preserved) + if isinstance(process, (GPTQProcessor, QQQProcessor, AWQProcessor)): + offload_to_disk( + model=self.gptq_model.model, + module=self.gptq_model.model.get_submodule(module.full_name), + disk_path=self.gptq_model.quantize_config.offload_to_disk_path, + ) - # Submit on the module's device thread (safe & deterministic) - finalize_futures.append( - DEVICE_THREAD_POOL.submit_serial( - target_dev, finalize_module, reverse_p, module - ) + for index, (process, module, module_label, target_dev) in enumerate(finalize_tasks, start=1): + finalize_futures.append( + DEVICE_THREAD_POOL.submit_serial( + target_dev, + _finalize_on_worker, + process, + module, + index, + finalize_count, + module_label, ) + ) - # If any finalize tasks were queued, wait for them for fut in finalize_futures: fut.result() + if finalize_count: + quant_modules_pb.subtitle("").draw() + # LifeCycle: All sub-modules have finalized meaning quantization work is complete # Ensure ANY remaining tasks the looper submitted have drained DEVICE_THREAD_POOL.wait() # same as wait('all') From f486f8191d4d440d9baa1a2539729f13973fb779 Mon Sep 17 00:00:00 2001 From: Qubitium Date: Sun, 5 Oct 2025 05:51:21 +0000 Subject: [PATCH 2/5] merge offloaded fies into 1 single dat file instead of 1 per param/buffer Signed-off-by: Qubitium --- gptqmodel/utils/model.py | 19 ++++- gptqmodel/utils/offload.py | 156 +++++++++++++++++++++++++++++++++++- tests/test_offload_files.py | 61 ++++++++++++++ 3 files changed, 232 insertions(+), 4 deletions(-) create mode 100644 tests/test_offload_files.py diff --git a/gptqmodel/utils/model.py b/gptqmodel/utils/model.py index 3ef446fd2..f928c0605 100644 --- a/gptqmodel/utils/model.py +++ b/gptqmodel/utils/model.py @@ -1261,6 +1261,20 @@ def _resolve_offload_entry( data_offsets=offsets, ) + filename = entry.get("filename") + if filename: + path = filename if os.path.isabs(filename) else os.path.join(module_dir, filename) + start = int(entry.get("offset", 0)) + end = start + (_torch_dtype_num_bytes(resolved_dtype) * math.prod(shape or (1,))) + return OffloadTensorRef( + path=os.path.abspath(path), + dtype=resolved_dtype, + shape=shape, + format="dat", + weight_name=None, + data_offsets=(start, end), + ) + data_path = os.path.join(module_dir, f"{leaf}.dat") if not os.path.isfile(data_path): return None @@ -1450,7 +1464,10 @@ def _write_shard_file(path: str, entries: List[TensorSource], metadata: Dict[str if isinstance(source, OffloadTensorRef): if source.format == "dat": # print("offload tensor io buffered transfer DAT") - _copy_file_stream(source.path, out, entry.num_bytes) + start = 0 + if source.data_offsets is not None: + start = source.data_offsets[0] + _copy_file_stream(source.path, out, entry.num_bytes, offset=start) elif source.format == "safetensors" and source.data_offsets is not None: # print("offload tensor io buffered transfer SAFETENSOR stream") start, end = source.data_offsets diff --git a/gptqmodel/utils/offload.py b/gptqmodel/utils/offload.py index 37f7c765e..a40a6055b 100644 --- a/gptqmodel/utils/offload.py +++ b/gptqmodel/utils/offload.py @@ -4,17 +4,21 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium import contextlib +import json import os import shutil from threading import Lock from typing import Iterable, List, Optional, Set, Tuple import accelerate +import accelerate.utils.offload as accelerate_offload import torch +import numpy as np # move base_module tensors to disk from accelerate import disk_offload from accelerate.hooks import remove_hook_from_module, remove_hook_from_submodules +from accelerate.utils import OffloadedWeightsLoader as _AccelerateOffloadedWeightsLoader from accelerate.utils import align_module_device, has_offloaded_params from torch import nn @@ -65,6 +69,146 @@ def is_meta_module(m: nn.Module) -> bool: _OFFLOAD_LOCK = Lock() +_ORIGINAL_OFFLOADED_LOADER = accelerate_offload.OffloadedWeightsLoader +_ORIGINAL_LOAD_WEIGHT = accelerate_offload.load_offloaded_weight + + +class _ModuleBundledWeightsLoader(_AccelerateOffloadedWeightsLoader): + def __getitem__(self, key: str): + weight_info = self.index.get(key) + bundle_file = None if weight_info is None else weight_info.get("filename") + if bundle_file: + file_path = bundle_file if os.path.isabs(bundle_file) else os.path.join(self.save_folder, bundle_file) + storage_dtype = weight_info.get("storage_dtype", weight_info.get("dtype")) + offset = weight_info.get("offset", 0) + shape = tuple(weight_info.get("shape", ())) + squeezed = bool(weight_info.get("squeezed", False)) + + memmap_shape = shape if len(shape) > 0 else (1,) + np_dtype = np.dtype(storage_dtype) + mm = np.memmap(file_path, dtype=np_dtype, mode="r", offset=offset, shape=memmap_shape, order="C") + array = np.array(mm, copy=True) + del mm + + tensor = torch.from_numpy(array) + target_dtype = weight_info.get("dtype") + if target_dtype == "bfloat16": + tensor = tensor.view(torch.bfloat16) + elif target_dtype is not None and target_dtype != storage_dtype: + tensor = tensor.to(getattr(torch, target_dtype)) + + if squeezed and len(shape) == 0: + tensor = tensor.reshape(()) + return tensor + + return super().__getitem__(key) + + +def _load_offloaded_weight_with_bundle(weight_file, weight_info): + bundle_file = weight_info.get("filename") if isinstance(weight_info, dict) else None + if bundle_file: + folder = os.path.dirname(weight_file) + file_path = bundle_file if os.path.isabs(bundle_file) else os.path.join(folder, bundle_file) + storage_dtype = weight_info.get("storage_dtype", weight_info.get("dtype")) + offset = weight_info.get("offset", 0) + shape = tuple(weight_info.get("shape", ())) + squeezed = bool(weight_info.get("squeezed", False)) + + memmap_shape = shape if len(shape) > 0 else (1,) + np_dtype = np.dtype(storage_dtype) + mm = np.memmap(file_path, dtype=np_dtype, mode="r", offset=offset, shape=memmap_shape, order="C") + array = np.array(mm, copy=True) + del mm + + tensor = torch.from_numpy(array) + target_dtype = weight_info.get("dtype") + if target_dtype == "bfloat16": + tensor = tensor.view(torch.bfloat16) + elif target_dtype is not None and target_dtype != storage_dtype: + tensor = tensor.to(getattr(torch, target_dtype)) + + if squeezed and len(shape) == 0: + tensor = tensor.reshape(()) + return tensor + + return _ORIGINAL_LOAD_WEIGHT(weight_file, weight_info) + + +_ACCELERATE_PATCHED = False + + +def _ensure_accelerate_bundle_patch(): + global _ACCELERATE_PATCHED + if _ACCELERATE_PATCHED: + return + + accelerate.utils.OffloadedWeightsLoader = _ModuleBundledWeightsLoader + accelerate_offload.OffloadedWeightsLoader = _ModuleBundledWeightsLoader + accelerate.utils.load_offloaded_weight = _load_offloaded_weight_with_bundle + accelerate_offload.load_offloaded_weight = _load_offloaded_weight_with_bundle + _ACCELERATE_PATCHED = True + + +def _prepare_offload_directory(target_dir: str) -> None: + if os.path.isdir(target_dir): + shutil.rmtree(target_dir) + os.makedirs(target_dir, exist_ok=True) + + +def _bundle_module_state_dict(module: nn.Module, offload_dir: str) -> dict: + bundle_path = os.path.join(offload_dir, "module.dat") + index: dict[str, dict] = {} + offset = 0 + + with open(bundle_path, "wb") as bundle_fp: + state_items = module.state_dict() + + for key, tensor in state_items.items(): + with torch.no_grad(): + tensor_cpu = tensor.detach().to("cpu") + + storage_tensor = tensor_cpu + storage_dtype = storage_tensor.dtype + target_dtype = tensor_cpu.dtype + + if target_dtype == torch.bfloat16: + storage_tensor = storage_tensor.view(torch.int16) + storage_dtype = torch.int16 + + storage_tensor = storage_tensor.contiguous() + array = storage_tensor.numpy() + squeezed = False + if array.ndim == 0: + array = array.reshape(1) + squeezed = True + + bundle_fp.write(array.tobytes(order="C")) + + dtype_str = "bfloat16" if target_dtype == torch.bfloat16 else str(array.dtype) + entry = { + "dtype": dtype_str, + "shape": list(tensor.shape), + "filename": "module.dat", + "offset": offset, + } + + if squeezed: + entry["squeezed"] = True + + storage_dtype_str = str(array.dtype) + if storage_dtype_str != dtype_str: + entry["storage_dtype"] = storage_dtype_str + + index[key] = entry + offset += array.nbytes + + index_path = os.path.join(offload_dir, "index.json") + with open(index_path, "w", encoding="utf-8") as fp: + json.dump(index, fp, indent=2) + + return index + + def offload_to_disk(module: List[str] | nn.Module, model: nn.Module, disk_path: str = "."): with _OFFLOAD_LOCK: _offload_to_disk_impl(module=module, model=model, disk_path=disk_path) @@ -119,11 +263,17 @@ def _offload_disk(module: nn.Module, name: str, disk_path: str = "."): if not has_params and not has_buffers: return + module_offload_dir = os.path.join(disk_path, name) + + _prepare_offload_directory(module_offload_dir) + _bundle_module_state_dict(module, module_offload_dir) + + _ensure_accelerate_bundle_patch() + _ = disk_offload( module, - # device_map={ "" : "disk" }, # only touch this subtree - offload_dir=f"{disk_path}/{name}", - offload_buffers=True, # needed for buffers + offload_dir=module_offload_dir, + offload_buffers=True, execution_device=m_device, ) diff --git a/tests/test_offload_files.py b/tests/test_offload_files.py new file mode 100644 index 000000000..d118ce6bc --- /dev/null +++ b/tests/test_offload_files.py @@ -0,0 +1,61 @@ +# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai +# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai +# SPDX-License-Identifier: Apache-2.0 +# Contact: qubitium@modelcloud.ai, x.com/qubitium + +import json + +import torch +from tabulate import tabulate +from torch import nn + +from gptqmodel.utils.offload import offload_to_disk, undo_offload_to_disk + + +class _LinearWithBuffers(nn.Module): + def __init__(self, in_features: int, out_features: int): + super().__init__() + self.linear = nn.Linear(in_features, out_features, bias=True) + self.register_buffer("scale_buffer", torch.linspace(0.0, 1.0, out_features)) + self.register_buffer("mask_buffer", torch.randint(0, 2, (out_features, in_features)).bool()) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.linear(x * self.mask_buffer.float()) * self.scale_buffer + + +def _clone_state_dict(module: nn.Module) -> dict[str, torch.Tensor]: + return {k: v.detach().clone() for k, v in module.state_dict().items()} + + +def test_offload_to_disk_writes_single_dat_file(tmp_path): + model = _LinearWithBuffers(in_features=128, out_features=96) + original_state = _clone_state_dict(model.linear) + + offload_root = tmp_path / "offload_root" + offload_to_disk(module=model.linear, model=model, disk_path=str(offload_root)) + + module_dir = offload_root / "linear" + assert module_dir.is_dir(), "Expected per-module directory to exist" + + files = sorted(module_dir.iterdir(), key=lambda p: p.name) + rows = [(path.name, path.stat().st_size) for path in files] + print(tabulate(rows, headers=["file", "bytes"], tablefmt="github")) + + dat_files = [path for path in files if path.suffix == ".dat"] + assert len(dat_files) == 1, "offload_to_disk should produce exactly one .dat file per module" + assert dat_files[0].name == "module.dat" + + with open(module_dir / "index.json", encoding="utf-8") as fp: + index = json.load(fp) + + expected_keys = set(model.linear.state_dict().keys()) + assert set(index.keys()) == expected_keys + assert all(entry.get("filename") == "module.dat" for entry in index.values()) + + offsets = [entry["offset"] for entry in index.values()] + assert offsets == sorted(offsets), "Offsets should be monotonically increasing" + + # Materialize the module back and ensure values match the snapshot captured before offload. + undo_offload_to_disk(model.linear, delete_offload_folders=False) + for name, tensor in model.linear.state_dict().items(): + torch.testing.assert_close(tensor, original_state[name]) From 0a9cc4f80c5930f4ff1da48e525a5b10c15c9660 Mon Sep 17 00:00:00 2001 From: Qubitium Date: Sun, 5 Oct 2025 05:58:43 +0000 Subject: [PATCH 3/5] use safetensors instead of manual numpy mmap dump Signed-off-by: Qubitium --- gptqmodel/utils/offload.py | 149 +++++++----------------------------- tests/test_offload_files.py | 13 ++-- 2 files changed, 32 insertions(+), 130 deletions(-) diff --git a/gptqmodel/utils/offload.py b/gptqmodel/utils/offload.py index a40a6055b..cd4db3590 100644 --- a/gptqmodel/utils/offload.py +++ b/gptqmodel/utils/offload.py @@ -7,20 +7,19 @@ import json import os import shutil +import struct from threading import Lock from typing import Iterable, List, Optional, Set, Tuple import accelerate -import accelerate.utils.offload as accelerate_offload import torch -import numpy as np # move base_module tensors to disk from accelerate import disk_offload from accelerate.hooks import remove_hook_from_module, remove_hook_from_submodules -from accelerate.utils import OffloadedWeightsLoader as _AccelerateOffloadedWeightsLoader from accelerate.utils import align_module_device, has_offloaded_params from torch import nn +from safetensors.torch import save_file as safetensors_save_file from ..looper.named_module import NamedModule from .device import get_device @@ -69,86 +68,6 @@ def is_meta_module(m: nn.Module) -> bool: _OFFLOAD_LOCK = Lock() -_ORIGINAL_OFFLOADED_LOADER = accelerate_offload.OffloadedWeightsLoader -_ORIGINAL_LOAD_WEIGHT = accelerate_offload.load_offloaded_weight - - -class _ModuleBundledWeightsLoader(_AccelerateOffloadedWeightsLoader): - def __getitem__(self, key: str): - weight_info = self.index.get(key) - bundle_file = None if weight_info is None else weight_info.get("filename") - if bundle_file: - file_path = bundle_file if os.path.isabs(bundle_file) else os.path.join(self.save_folder, bundle_file) - storage_dtype = weight_info.get("storage_dtype", weight_info.get("dtype")) - offset = weight_info.get("offset", 0) - shape = tuple(weight_info.get("shape", ())) - squeezed = bool(weight_info.get("squeezed", False)) - - memmap_shape = shape if len(shape) > 0 else (1,) - np_dtype = np.dtype(storage_dtype) - mm = np.memmap(file_path, dtype=np_dtype, mode="r", offset=offset, shape=memmap_shape, order="C") - array = np.array(mm, copy=True) - del mm - - tensor = torch.from_numpy(array) - target_dtype = weight_info.get("dtype") - if target_dtype == "bfloat16": - tensor = tensor.view(torch.bfloat16) - elif target_dtype is not None and target_dtype != storage_dtype: - tensor = tensor.to(getattr(torch, target_dtype)) - - if squeezed and len(shape) == 0: - tensor = tensor.reshape(()) - return tensor - - return super().__getitem__(key) - - -def _load_offloaded_weight_with_bundle(weight_file, weight_info): - bundle_file = weight_info.get("filename") if isinstance(weight_info, dict) else None - if bundle_file: - folder = os.path.dirname(weight_file) - file_path = bundle_file if os.path.isabs(bundle_file) else os.path.join(folder, bundle_file) - storage_dtype = weight_info.get("storage_dtype", weight_info.get("dtype")) - offset = weight_info.get("offset", 0) - shape = tuple(weight_info.get("shape", ())) - squeezed = bool(weight_info.get("squeezed", False)) - - memmap_shape = shape if len(shape) > 0 else (1,) - np_dtype = np.dtype(storage_dtype) - mm = np.memmap(file_path, dtype=np_dtype, mode="r", offset=offset, shape=memmap_shape, order="C") - array = np.array(mm, copy=True) - del mm - - tensor = torch.from_numpy(array) - target_dtype = weight_info.get("dtype") - if target_dtype == "bfloat16": - tensor = tensor.view(torch.bfloat16) - elif target_dtype is not None and target_dtype != storage_dtype: - tensor = tensor.to(getattr(torch, target_dtype)) - - if squeezed and len(shape) == 0: - tensor = tensor.reshape(()) - return tensor - - return _ORIGINAL_LOAD_WEIGHT(weight_file, weight_info) - - -_ACCELERATE_PATCHED = False - - -def _ensure_accelerate_bundle_patch(): - global _ACCELERATE_PATCHED - if _ACCELERATE_PATCHED: - return - - accelerate.utils.OffloadedWeightsLoader = _ModuleBundledWeightsLoader - accelerate_offload.OffloadedWeightsLoader = _ModuleBundledWeightsLoader - accelerate.utils.load_offloaded_weight = _load_offloaded_weight_with_bundle - accelerate_offload.load_offloaded_weight = _load_offloaded_weight_with_bundle - _ACCELERATE_PATCHED = True - - def _prepare_offload_directory(target_dir: str) -> None: if os.path.isdir(target_dir): shutil.rmtree(target_dir) @@ -156,51 +75,37 @@ def _prepare_offload_directory(target_dir: str) -> None: def _bundle_module_state_dict(module: nn.Module, offload_dir: str) -> dict: - bundle_path = os.path.join(offload_dir, "module.dat") + bundle_path = os.path.join(offload_dir, "module.safetensors") index: dict[str, dict] = {} - offset = 0 - - with open(bundle_path, "wb") as bundle_fp: - state_items = module.state_dict() - - for key, tensor in state_items.items(): - with torch.no_grad(): - tensor_cpu = tensor.detach().to("cpu") - - storage_tensor = tensor_cpu - storage_dtype = storage_tensor.dtype - target_dtype = tensor_cpu.dtype + tensors: dict[str, torch.Tensor] = {} - if target_dtype == torch.bfloat16: - storage_tensor = storage_tensor.view(torch.int16) - storage_dtype = torch.int16 - - storage_tensor = storage_tensor.contiguous() - array = storage_tensor.numpy() - squeezed = False - if array.ndim == 0: - array = array.reshape(1) - squeezed = True - - bundle_fp.write(array.tobytes(order="C")) - - dtype_str = "bfloat16" if target_dtype == torch.bfloat16 else str(array.dtype) + with torch.inference_mode(): + for key, tensor in module.state_dict().items(): + cpu_tensor = tensor.detach().to("cpu") + tensors[key] = cpu_tensor.contiguous() entry = { - "dtype": dtype_str, - "shape": list(tensor.shape), - "filename": "module.dat", - "offset": offset, + "dtype": str(cpu_tensor.dtype).replace("torch.", ""), + "shape": list(cpu_tensor.shape), + "safetensors_file": os.path.abspath(bundle_path), + "weight_name": key, } + index[key] = entry - if squeezed: - entry["squeezed"] = True + safetensors_save_file(tensors, bundle_path) - storage_dtype_str = str(array.dtype) - if storage_dtype_str != dtype_str: - entry["storage_dtype"] = storage_dtype_str + with open(bundle_path, "rb") as fh: + header_len = struct.unpack(" Date: Sun, 5 Oct 2025 06:26:56 +0000 Subject: [PATCH 4/5] use safetensors instead of numpy mmap dump Signed-off-by: Qubitium --- gptqmodel/looper/loop_processor.py | 50 +++++++++++++++++++++--------- gptqmodel/utils/offload.py | 4 ++- tests/test_offload_files.py | 21 +++++++++++++ 3 files changed, 59 insertions(+), 16 deletions(-) diff --git a/gptqmodel/looper/loop_processor.py b/gptqmodel/looper/loop_processor.py index e177434d4..7c6fc4ceb 100644 --- a/gptqmodel/looper/loop_processor.py +++ b/gptqmodel/looper/loop_processor.py @@ -4,6 +4,7 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium import json import queue +import re import threading from datetime import datetime from typing import Any, Callable, Dict, List, Optional, Set, Tuple @@ -26,6 +27,8 @@ # global level lock PROCESSOR_GLOBAL_LOCK = threading.Lock() +ANSI_ESCAPE_RE = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]") + # LoopProcessor is a singleton(), not per module instance class LoopProcessor: def __init__( @@ -191,41 +194,58 @@ def loss_color(self, loss_value): else: return "\033[91m" # Red + def _strip_ansi(self, text: Any) -> str: + return ANSI_ESCAPE_RE.sub("", str(text)) + + def _visible_length(self, text: Any) -> int: + return len(self._strip_ansi(text)) + + def _ljust_visible(self, text: str, width: int) -> str: + padding = max(width - self._visible_length(text), 0) + if padding: + return f"{text}{' ' * padding}" + return text + def log_new_row(self, stat): self.log_call_count += 1 self.log_save_async(stat) # write to temp log file # Update max_widths with the new row's column widths for key, value in stat.items(): - current_width = max(len(str(key)), len(str(value))) + 4 # 4 is for padding + key_str = str(key) + value_str = str(value) + current_width = max(self._visible_length(key_str), self._visible_length(value_str)) + 4 # 4 is for padding if key not in self.log_max_widths or current_width > self.log_max_widths[key]: self.log_max_widths[key] = current_width if self.log_call_count % 20 == 1: # Format the header row - header_row = "| " + " | ".join( - str(key).ljust(self.log_max_widths[key]) for key in self.log_max_widths.keys()) + " |" + header_cells = [ + self._ljust_visible(str(key), self.log_max_widths[key]) for key in self.log_max_widths.keys() + ] + header_row = "| " + " | ".join(header_cells) + " |" + header_separator = "-" * self._visible_length(header_row) if self.log_call_count == 1: - log.info(len(header_row) * "-") + log.info(header_separator) log.info(header_row) - log.info(len(header_row) * "-") + log.info(header_separator) - formatted_row = "| " + row_cells = [] for key in self.log_max_widths.keys(): value = stat.get(key, "") - if key == "loss": - color_code = self.loss_color(float(value)) - formatted_value = f"{color_code}{value}\033[0m" + value_str = str(value) + if key == "loss" and value_str: + color_code = self.loss_color(float(value_str)) + formatted_value = f"{color_code}{value_str}\033[0m" else: - formatted_value = str(value) - formatted_row += formatted_value.ljust(self.log_max_widths[key]) + " | " - - # formatted_row = "| " + " | ".join( - # str(stat.get(key, "")).ljust(self.log_max_widths[key]) for key in self.log_max_widths.keys()) + " |" + formatted_value = value_str + row_cells.append(self._ljust_visible(formatted_value, self.log_max_widths[key])) + formatted_row = "| " + " | ".join(row_cells) + " |" + row_separator = "-" * self._visible_length(formatted_row) log.info(formatted_row) - log.info(len(formatted_row) * "-") + log.info(row_separator) def _init_device_smi_handles(self) -> Dict[str, Device]: handles: Dict[str, Device] = {} diff --git a/gptqmodel/utils/offload.py b/gptqmodel/utils/offload.py index cd4db3590..4075df08d 100644 --- a/gptqmodel/utils/offload.py +++ b/gptqmodel/utils/offload.py @@ -96,6 +96,7 @@ def _bundle_module_state_dict(module: nn.Module, offload_dir: str) -> dict: with open(bundle_path, "rb") as fh: header_len = struct.unpack(" dict: continue offsets = tensor_meta.get("data_offsets") if offsets is not None: - entry["data_offsets"] = offsets + start, end = (int(offsets[0]), int(offsets[1])) + entry["data_offsets"] = [data_offset_base + start, data_offset_base + end] index_path = os.path.join(offload_dir, "index.json") with open(index_path, "w", encoding="utf-8") as fp: diff --git a/tests/test_offload_files.py b/tests/test_offload_files.py index 29964299b..adf79e520 100644 --- a/tests/test_offload_files.py +++ b/tests/test_offload_files.py @@ -9,8 +9,10 @@ import torch from tabulate import tabulate from torch import nn +from safetensors import safe_open from gptqmodel.utils.offload import offload_to_disk, undo_offload_to_disk +from gptqmodel.utils.model import get_state_dict_for_save, streaming_state_dict_to_shards class _LinearWithBuffers(nn.Module): @@ -54,6 +56,25 @@ def test_offload_to_disk_writes_single_dat_file(tmp_path): assert all(Path(entry.get("safetensors_file")).name == "module.safetensors" for entry in index.values()) assert all(entry.get("data_offsets") is not None for entry in index.values()) + save_dir = tmp_path / "saved" + save_dir.mkdir() + state_dict = get_state_dict_for_save(model, offload_root=str(offload_root)) + expected_files, tensor_to_filename, _ = streaming_state_dict_to_shards( + state_dict, + save_dir=str(save_dir), + model_base_name="model", + single_file_name="model.safetensors", + metadata={}, + max_shard_size=None, + ) + + assert len(expected_files) == 1 + shard_path = save_dir / expected_files[0] + with safe_open(str(shard_path), framework="pt", device="cpu") as handler: + for name, tensor in original_state.items(): + saved = handler.get_tensor(f"linear.{name}") + torch.testing.assert_close(saved, tensor) + # Materialize the module back and ensure values match the snapshot captured before offload. undo_offload_to_disk(model.linear, delete_offload_folders=False) for name, tensor in model.linear.state_dict().items(): From 599e1311caf62e35fad1f0dddfe9199c1dee80c9 Mon Sep 17 00:00:00 2001 From: Qubitium Date: Sun, 5 Oct 2025 06:28:55 +0000 Subject: [PATCH 5/5] ruff Signed-off-by: Qubitium --- gptqmodel/utils/offload.py | 2 +- tests/test_offload_files.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gptqmodel/utils/offload.py b/gptqmodel/utils/offload.py index 4075df08d..c84285d75 100644 --- a/gptqmodel/utils/offload.py +++ b/gptqmodel/utils/offload.py @@ -18,8 +18,8 @@ from accelerate import disk_offload from accelerate.hooks import remove_hook_from_module, remove_hook_from_submodules from accelerate.utils import align_module_device, has_offloaded_params -from torch import nn from safetensors.torch import save_file as safetensors_save_file +from torch import nn from ..looper.named_module import NamedModule from .device import get_device diff --git a/tests/test_offload_files.py b/tests/test_offload_files.py index adf79e520..80288f17b 100644 --- a/tests/test_offload_files.py +++ b/tests/test_offload_files.py @@ -7,12 +7,12 @@ from pathlib import Path import torch +from safetensors import safe_open from tabulate import tabulate from torch import nn -from safetensors import safe_open -from gptqmodel.utils.offload import offload_to_disk, undo_offload_to_disk from gptqmodel.utils.model import get_state_dict_for_save, streaming_state_dict_to_shards +from gptqmodel.utils.offload import offload_to_disk, undo_offload_to_disk class _LinearWithBuffers(nn.Module):