Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion gptqmodel/nn_modules/qlinear/awq_gemv.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class AwqGEMVQuantLinear(AWQuantLinear):
SUPPORTS_DTYPES = [torch.float16, torch.bfloat16]

# for transformers/optimum tests compat
QUANT_TYPE = "awq_gemm"
QUANT_TYPE = "awq_gemv"

def __init__(
self,
Expand Down
2 changes: 1 addition & 1 deletion gptqmodel/nn_modules/qlinear/awq_gemv_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class AwqGEMVFastQuantLinear(AWQuantLinear):
SUPPORTS_DTYPES = [torch.float16]

# for transformers/optimum tests compat
QUANT_TYPE = "awq_gemm"
QUANT_TYPE = "awq_gemv_fast"

def __init__(
self,
Expand Down
2 changes: 1 addition & 1 deletion gptqmodel/nn_modules/qlinear/awq_marlin.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class AwqMarlinQuantLinear(AWQuantLinear):
REQUIRES_FORMAT_V2 = False

# for transformers/optimum tests compat
QUANT_TYPE = "marlin"
QUANT_TYPE = "awq_marlin"

# num_bits -> type
TYPE_MAP = {
Expand Down
2 changes: 1 addition & 1 deletion gptqmodel/nn_modules/qlinear/torch_fused.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class TorchFusedQuantLinear(PackableQuantLinear):
REQUIRES_FORMAT_V2 = True

# for transformers/optimum tests compat
QUANT_TYPE = "torch"
QUANT_TYPE = "torch_fused"

def __init__(
self,
Expand Down