diff --git a/tico/quantization/wrapq/examples/quantize_full_qmodel_with_gptq.py b/tico/quantization/wrapq/examples/quantize_full_qmodel_with_gptq.py index 80968a18..4a55c978 100644 --- a/tico/quantization/wrapq/examples/quantize_full_qmodel_with_gptq.py +++ b/tico/quantization/wrapq/examples/quantize_full_qmodel_with_gptq.py @@ -251,8 +251,11 @@ def parse_args(): parser.add_argument( "--lm_head_weight_bits", type=int, - default=4, - help="Number of bits to be used to quantize lm_head", + default=8, + help=( + "Number of bits to be used to quantize lm_head." + "For tied embedding/lm_head it must be the same as embedding_weight_bits." + ), ) parser.add_argument( "--spin_rotation_weight_bits",