Skip to content

Commit

Permalink
Add support for DeciLM models. (#481)
Browse files Browse the repository at this point in the history
  • Loading branch information
LaaZa committed Dec 13, 2023
1 parent e7c8fbe commit 97ba340
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 1 deletion.
3 changes: 2 additions & 1 deletion auto_gptq/modeling/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@
from .qwen import *
from .mistral import *
from .yi import *
from .xverse import *
from .xverse import *
from .decilm import *
1 change: 1 addition & 0 deletions auto_gptq/modeling/_const.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"internlm",
"qwen",
"xverse",
"deci_lm",

This comment has been minimized.

Copy link
@RajdeepBorgohain

RajdeepBorgohain Dec 22, 2023

@LaaZa Can you change it from "deci_lm" to "deci",
getting this error, please check the ss.
Right now I have to edit the json and make it "deci_lm" to make it work.
image

This comment has been minimized.

Copy link
@LaaZa

LaaZa Dec 22, 2023

Author Contributor

I think they didn't have model_type in the config before so it was deci_lm, but they added deci to the config.

This comment has been minimized.

Copy link
@RajdeepBorgohain

RajdeepBorgohain Dec 26, 2023

Yes, correct they have added deci to the config.Right now after I quantized the model I have to change the config.json to make it work with AutoGPTQ.
Can you make the changes to the model_type name?

Here's an SS I took after changing it to deci_lm
image

This comment has been minimized.

Copy link
@LaaZa

LaaZa Dec 26, 2023

Author Contributor

#491 I have already made this. Might be a bit slow with the merges during the holidays. Though, you'll need to change the model type to deci_lm still if you want people with 0.6.0 to be able to load it, changing it for quantization doesn't matter. I would recommend to use transformers for inference anyway.

This comment has been minimized.

Copy link
@RajdeepBorgohain

RajdeepBorgohain Dec 27, 2023

Yes, I have been using transformers for inference.
The quantized model has high latency and high loading time.

]
if compare_transformers_version("v4.28.0", op="ge"):
SUPPORTED_MODELS.append("llama")
Expand Down
2 changes: 2 additions & 0 deletions auto_gptq/modeling/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from .mistral import MistralGPTQForCausalLM
from .yi import YiGPTQForCausalLM
from .xverse import XverseGPTQForCausalLM
from .decilm import DeciLMGPTQForCausalLM

GPTQ_CAUSAL_LM_MODEL_MAP = {
"bloom": BloomGPTQForCausalLM,
Expand All @@ -39,6 +40,7 @@
"mistral": MistralGPTQForCausalLM,
"Yi": YiGPTQForCausalLM,
"xverse": XverseGPTQForCausalLM,
"deci_lm": DeciLMGPTQForCausalLM,
}


Expand Down
31 changes: 31 additions & 0 deletions auto_gptq/modeling/decilm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from logging import getLogger

from ._base import *
from ..utils.import_utils import compare_transformers_version

if compare_transformers_version("v4.28.0", op="ge"):
from ..nn_modules.fused_llama_attn import FusedLlamaAttentionForQuantizedModel
from ..nn_modules.fused_llama_mlp import FusedLlamaMLPForQuantizedModel
else:
FusedLlamaAttentionForQuantizedModel = None
FusedLlamaMLPForQuantizedModel = None

logger = getLogger(__name__)


class DeciLMGPTQForCausalLM(BaseGPTQForCausalLM):
layer_type = "DeciLMDecoderLayer"
layers_block_name = "model.layers"
outside_layer_modules = ["model.embed_tokens", "model.norm"]
inside_layer_modules = [
["self_attn.k_proj", "self_attn.v_proj", "self_attn.q_proj"],
["self_attn.o_proj"],
["mlp.up_proj", "mlp.gate_proj"],
["mlp.down_proj"]
]

fused_attn_module_type = FusedLlamaAttentionForQuantizedModel
fused_mlp_module_type = FusedLlamaMLPForQuantizedModel


__all__ = ["DeciLMGPTQForCausalLM"]

0 comments on commit 97ba340

Please sign in to comment.