From a9996d00c514a2978ba77f3dd1fc688188c902c5 Mon Sep 17 00:00:00 2001 From: "s.malakhov" Date: Thu, 21 May 2026 09:19:12 +0300 Subject: [PATCH] [quantization] Change default bits for `lm_head` This PR changes `lm_head_weight_bits` to be equal to `embedding_weight_bits` and adjusts help string accordingly. TICO-DCO-1.0-Signed-off-by: s.malakhov --- .../wrapq/examples/quantize_full_qmodel_with_gptq.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tico/quantization/wrapq/examples/quantize_full_qmodel_with_gptq.py b/tico/quantization/wrapq/examples/quantize_full_qmodel_with_gptq.py index 80968a18..4a55c978 100644 --- a/tico/quantization/wrapq/examples/quantize_full_qmodel_with_gptq.py +++ b/tico/quantization/wrapq/examples/quantize_full_qmodel_with_gptq.py @@ -251,8 +251,11 @@ def parse_args(): parser.add_argument( "--lm_head_weight_bits", type=int, - default=4, - help="Number of bits to be used to quantize lm_head", + default=8, + help=( + "Number of bits to be used to quantize lm_head." + "For tied embedding/lm_head it must be the same as embedding_weight_bits." + ), ) parser.add_argument( "--spin_rotation_weight_bits",