Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion gptqmodel/looper/module_looper.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,8 @@ def store_input_hook(module, args, kwargs):
layer_inputs.append(layer_input)

# Keyword arguments.
if kwargs.get("attention_mask") is not None and self.gptq_model.ATTENTION_MASKS_REQUIRED_FOR_INPUT:
# Always capture attention_mask so downstream masking can drop padded tokens
if kwargs.get("attention_mask") is not None:
attention_masks.append(kwargs["attention_mask"].to(device=data_device))
else:
attention_masks.append(None)
Expand Down Expand Up @@ -632,6 +633,8 @@ def loop(self, fail_safe: bool = False, **kwargs):

layer_modules = self.gptq_model.simple_layer_modules(model_config=self.gptq_model.model.config, quantize_config=self.gptq_model.quantize_config)

# true-sequential will replay the quantized activations after each subset has been quantized to be used for next subset quantization
# this should always be true for gptq unless you want lower but misleading error_loss that is misleading and will lead to lower post-quantized model
if not self.gptq_model.quantize_config.true_sequential:
layer_modules = [sum(layer_modules, [])]

Expand Down
7 changes: 7 additions & 0 deletions tests/models/model_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# Following makes test results more deterministic but much slower
# # the CUBLAS env is required for use_deterministic_algorithms
# os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
#
# import torch
# torch.use_deterministic_algorithms(True)

# -- end do not touch

from pathlib import Path # noqa: E402
Expand Down