Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
296 changes: 240 additions & 56 deletions tests/models/model_test.py

Large diffs are not rendered by default.

10 changes: 7 additions & 3 deletions tests/models/test_act_group_aware.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,17 @@
# Contact: qubitium@modelcloud.ai, x.com/qubitium

from model_test import ModelTest
from gptqmodel.utils.eval import EVAL


class TestHybridActOrder(ModelTest):
NATIVE_MODEL_ID = "/monster/data/model/Llama-3.2-1B-Instruct" # "meta-llama/Llama-3.2-1B-Instruct"
NATIVE_ARC_CHALLENGE_ACC = 0.3140 # A100
NATIVE_ARC_CHALLENGE_ACC_NORM = 0.3439 # A100
QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.10
EVAL_TASKS = {
EVAL.LM_EVAL.ARC_CHALLENGE: {
"acc": {"value": 0.3140, "floor_pct": 0.05},
"acc_norm": {"value": 0.3439, "floor_pct": 0.05},
},
}
APPLY_CHAT_TEMPLATE = True
V2 = False
ACT_GROUP_AWARE = True
Expand Down
10 changes: 7 additions & 3 deletions tests/models/test_apertus.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,19 @@
# Contact: qubitium@modelcloud.ai, x.com/qubitium

from model_test import ModelTest
from gptqmodel.utils.eval import EVAL

from gptqmodel import BACKEND


class TestApertus(ModelTest):
NATIVE_MODEL_ID = "/monster/data/model/Apertus-8B-Instruct-2509/"
QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.2
NATIVE_ARC_CHALLENGE_ACC = 0.5145
NATIVE_ARC_CHALLENGE_ACC_NORM = 0.5256
EVAL_TASKS = {
EVAL.LM_EVAL.ARC_CHALLENGE: {
"acc": {"value": 0.5145, "floor_pct": 0.2},
"acc_norm": {"value": 0.5256, "floor_pct": 0.2},
},
}
TRUST_REMOTE_CODE = False
APPLY_CHAT_TEMPLATE = True
EVAL_BATCH_SIZE = 6
Expand Down
10 changes: 7 additions & 3 deletions tests/models/test_cohere.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,17 @@
# Contact: qubitium@modelcloud.ai, x.com/qubitium

from model_test import ModelTest
from gptqmodel.utils.eval import EVAL


class TestCohere(ModelTest):
NATIVE_MODEL_ID = "/monster/data/model/aya-expanse-8b" # "CohereForAI/aya-expanse-8b"
NATIVE_ARC_CHALLENGE_ACC = 0.5401
NATIVE_ARC_CHALLENGE_ACC_NORM = 0.5640
QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.20
EVAL_TASKS = {
EVAL.LM_EVAL.ARC_CHALLENGE: {
"acc": {"value": 0.5401, "floor_pct": 0.20},
"acc_norm": {"value": 0.5640, "floor_pct": 0.20},
},
}
EVAL_BATCH_SIZE = 4

def test_cohere(self):
Expand Down
10 changes: 7 additions & 3 deletions tests/models/test_cohere2.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,17 @@
# Contact: qubitium@modelcloud.ai, x.com/qubitium

from model_test import ModelTest
from gptqmodel.utils.eval import EVAL


class TestCohere2(ModelTest):
NATIVE_MODEL_ID = "/monster/data/model/c4ai-command-r7b-12-2024"
NATIVE_ARC_CHALLENGE_ACC = 0.4680
NATIVE_ARC_CHALLENGE_ACC_NORM = 0.4693
QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.15
EVAL_TASKS = {
EVAL.LM_EVAL.ARC_CHALLENGE: {
"acc": {"value": 0.4680, "floor_pct": 0.15},
"acc_norm": {"value": 0.4693, "floor_pct": 0.15},
},
}
EVAL_BATCH_SIZE = 4
USE_FLASH_ATTN = False

Expand Down
10 changes: 7 additions & 3 deletions tests/models/test_deci.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,17 @@
# Contact: qubitium@modelcloud.ai, x.com/qubitium

from model_test import ModelTest
from gptqmodel.utils.eval import EVAL


class TestDeci(ModelTest):
NATIVE_MODEL_ID = "/monster/data/model/DeciLM-7B-instruct" # "Deci/DeciLM-7B-instruct"
NATIVE_ARC_CHALLENGE_ACC = 0.5239
NATIVE_ARC_CHALLENGE_ACC_NORM = 0.5222
QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.8
EVAL_TASKS = {
EVAL.LM_EVAL.ARC_CHALLENGE: {
"acc": {"value": 0.5239, "floor_pct": 0.8},
"acc_norm": {"value": 0.5222, "floor_pct": 0.8},
},
}
TRUST_REMOTE_CODE = True
USE_VLLM = False
EVAL_BATCH_SIZE = 6
Expand Down
10 changes: 7 additions & 3 deletions tests/models/test_dream.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,17 @@
# Contact: qubitium@modelcloud.ai, x.com/qubitium

from model_test import ModelTest
from gptqmodel.utils.eval import EVAL


class TestDream(ModelTest):
NATIVE_MODEL_ID = "/monster/data/model/Dream-v0-Instruct-7B"
NATIVE_ARC_CHALLENGE_ACC = 0.3567
NATIVE_ARC_CHALLENGE_ACC_NORM = 0.3805
QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.36
EVAL_TASKS = {
EVAL.LM_EVAL.ARC_CHALLENGE: {
"acc": {"value": 0.3567, "floor_pct": 0.36},
"acc_norm": {"value": 0.3805, "floor_pct": 0.36},
},
}
APPLY_CHAT_TEMPLATE = True
TRUST_REMOTE_CODE = True
EVAL_BATCH_SIZE = 1
Expand Down
10 changes: 7 additions & 3 deletions tests/models/test_falcon.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,20 @@

import torch # noqa: E402from tests.model_test import ModelTest
from model_test import ModelTest
from gptqmodel.utils.eval import EVAL


class TestFalcon(ModelTest):
NATIVE_MODEL_ID = "/monster/data/model/falcon-7b-instruct" # "tiiuae/falcon-7b-instruct"
NATIVE_ARC_CHALLENGE_ACC = 0.3993
NATIVE_ARC_CHALLENGE_ACC_NORM = 0.4292
APPLY_CHAT_TEMPLATE = True
TRUST_REMOTE_CODE = False
TORCH_DTYPE = torch.float16
QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.52
EVAL_TASKS = {
EVAL.LM_EVAL.ARC_CHALLENGE: {
"acc": {"value": 0.3993, "floor_pct": 0.52},
"acc_norm": {"value": 0.4292, "floor_pct": 0.52},
},
}
EVAL_BATCH_SIZE = 6
USE_VLLM = False

Expand Down
21 changes: 15 additions & 6 deletions tests/models/test_glm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,25 @@
# Contact: qubitium@modelcloud.ai, x.com/qubitium

from model_test import ModelTest
from gptqmodel.utils.eval import EVAL


# | Metric | MARLIN |
# |--------------------------------|----------|
# | arc_challenge :: acc,none | 0.5026 |
# | arc_challenge :: acc_norm,none | 0.5171 |
# | mmlu :: acc,none | 0.6362 |
class TestGlm(ModelTest):
# real: THUDM/glm-4-9b-chat-hf
NATIVE_MODEL_ID = "/monster/data/model/glm-4-9b-chat-hf"
NATIVE_ARC_CHALLENGE_ACC = 0.5154
NATIVE_ARC_CHALLENGE_ACC_NORM = 0.5316
QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.2
USE_VLLM = False
EVAL_TASKS = {
EVAL.LM_EVAL.ARC_CHALLENGE: {
"acc": {"value": 0.5026, "floor_pct": 0.04},
"acc_norm": {"value": 0.5171, "floor_pct": 0.04},
},
EVAL.LM_EVAL.MMLU: {
"acc": {"value": 0.6362, "floor_pct": 0.04},
},
}

def test_glm(self):
self.quant_lm_eval()

10 changes: 7 additions & 3 deletions tests/models/test_gpt_oss.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,17 @@
# Contact: qubitium@modelcloud.ai, x.com/qubitium

from model_test import ModelTest
from gptqmodel.utils.eval import EVAL


class TestGPTOSS(ModelTest):
NATIVE_MODEL_ID = "/monster/data/model/gpt-oss-20b-BF16/"
QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.2
NATIVE_ARC_CHALLENGE_ACC = 0.4411
NATIVE_ARC_CHALLENGE_ACC_NORM = 0.4718
EVAL_TASKS = {
EVAL.LM_EVAL.ARC_CHALLENGE: {
"acc": {"value": 0.4411, "floor_pct": 0.2},
"acc_norm": {"value": 0.4718, "floor_pct": 0.2},
},
}
TRUST_REMOTE_CODE = False
APPLY_CHAT_TEMPLATE = False
EVAL_BATCH_SIZE = 6
Expand Down
10 changes: 7 additions & 3 deletions tests/models/test_granite.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,19 @@
# Contact: qubitium@modelcloud.ai, x.com/qubitium

from model_test import ModelTest
from gptqmodel.utils.eval import EVAL


class TestGranite(ModelTest):
NATIVE_MODEL_ID = "/monster/data/model/granite-3.0-2b-instruct" # "ibm-granite/granite-3.0-2b-instruct"
NATIVE_ARC_CHALLENGE_ACC = 0.4505
NATIVE_ARC_CHALLENGE_ACC_NORM = 0.4770
APPLY_CHAT_TEMPLATE = True
TRUST_REMOTE_CODE = True
QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.2
EVAL_TASKS = {
EVAL.LM_EVAL.ARC_CHALLENGE: {
"acc": {"value": 0.4505, "floor_pct": 0.2},
"acc_norm": {"value": 0.4770, "floor_pct": 0.2},
},
}

def test_granite(self):
self.quant_lm_eval()
10 changes: 7 additions & 3 deletions tests/models/test_hymba.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,17 @@
# Contact: qubitium@modelcloud.ai, x.com/qubitium

from model_test import ModelTest
from gptqmodel.utils.eval import EVAL


class TestHymba(ModelTest):
NATIVE_MODEL_ID = "/monster/data/model/Hymba-1.5B-Instruct/" # "baichuan-inc/Baichuan2-7B-Chat"
NATIVE_ARC_CHALLENGE_ACC = 0.2073
NATIVE_ARC_CHALLENGE_ACC_NORM = 0.2713
QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.75
EVAL_TASKS = {
EVAL.LM_EVAL.ARC_CHALLENGE: {
"acc": {"value": 0.2073, "floor_pct": 0.75},
"acc_norm": {"value": 0.2713, "floor_pct": 0.75},
},
}
MODEL_MAX_LEN = 8192
TRUST_REMOTE_CODE = True
APPLY_CHAT_TEMPLATE = True
Expand Down
10 changes: 7 additions & 3 deletions tests/models/test_ling.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,17 @@
# Contact: qubitium@modelcloud.ai, x.com/qubitium

from model_test import ModelTest
from gptqmodel.utils.eval import EVAL


class TestLing(ModelTest):
NATIVE_MODEL_ID = "/monster/data/model/Ling-mini-2.0/"
QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.2
NATIVE_ARC_CHALLENGE_ACC = 0.5009
NATIVE_ARC_CHALLENGE_ACC_NORM = 0.5137
EVAL_TASKS = {
EVAL.LM_EVAL.ARC_CHALLENGE: {
"acc": {"value": 0.5009, "floor_pct": 0.2},
"acc_norm": {"value": 0.5137, "floor_pct": 0.2},
},
}
TRUST_REMOTE_CODE = True
APPLY_CHAT_TEMPLATE = True
# EVAL_BATCH_SIZE = 6
Expand Down
38 changes: 28 additions & 10 deletions tests/models/test_llama3_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# Contact: qubitium@modelcloud.ai, x.com/qubitium

from model_test import ModelTest
from gptqmodel.utils.eval import EVAL


# a100:7, MARLIN kernel
Expand All @@ -12,20 +13,37 @@
# desc_act = True, REGRESSION 0.3191/0.3601
# a100:6+7: MARLIN kernel
# desc_act = False, act_group_aware = True 0.3217/0.3643
# | Metric | MARLIN |
# |--------------------------------|----------|
# | arc_challenge :: acc,none | 0.3174 |
# | arc_challenge :: acc_norm,none | 0.3601 |
# | mmlu :: acc,none | 0.3186 |
class TestLlama3_2(ModelTest):
NATIVE_MODEL_ID = "/monster/data/model/Llama-3.2-1B-Instruct" # "meta-llama/Llama-3.2-1B-Instruct"
NATIVE_ARC_CHALLENGE_ACC = 0.3268
NATIVE_ARC_CHALLENGE_ACC_NORM = 0.3558
QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.04
EVAL_TASKS = {
EVAL.LM_EVAL.ARC_CHALLENGE: {
"acc": {
"value": 0.3174,
"floor_pct": 0.04,
"ceil_pct": 0.10,
},
"acc_norm": {
"value": 0.3601,
"floor_pct": 0.04,
"ceil_pct": 0.10,
},
},
EVAL.LM_EVAL.MMLU: {
"acc": {
"value": 0.3186,
"floor_pct": 0.04,
"ceil_pct": 0.10,
},
},
}
APPLY_CHAT_TEMPLATE = True
V2 = False
DEBUG = True
ACT_GROUP_AWARE = True
DESC_ACT = False
DATASET_SIZE = 1024
DATASET_SORT = "desc"
QUANT_BATCH_SIZE = 4
USE_FLASH_ATTN = True

# EORA = Lora(
# # for quant, path is save path. for load, it is loading path
# path="./eora_test",
Expand Down
10 changes: 7 additions & 3 deletions tests/models/test_llama3_2_awq.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# Contact: qubitium@modelcloud.ai, x.com/qubitium

from model_test import ModelTest
from gptqmodel.utils.eval import EVAL

from gptqmodel.quantization import FORMAT, METHOD

Expand All @@ -14,9 +15,12 @@
# desc_act = True, 0.3089/0.3328
class TestLlama3_2(ModelTest):
NATIVE_MODEL_ID = "/monster/data/model/Llama-3.2-1B-Instruct" # "meta-llama/Llama-3.2-1B-Instruct"
NATIVE_ARC_CHALLENGE_ACC = 0.3234
NATIVE_ARC_CHALLENGE_ACC_NORM = 0.3524
QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.36
EVAL_TASKS = {
EVAL.LM_EVAL.ARC_CHALLENGE: {
"acc": {"value": 0.3234, "floor_pct": 0.36},
"acc_norm": {"value": 0.3524, "floor_pct": 0.36},
},
}
APPLY_CHAT_TEMPLATE = True
V2 = False
DEBUG = True
Expand Down
10 changes: 7 additions & 3 deletions tests/models/test_llama4.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,17 @@
# Contact: qubitium@modelcloud.ai, x.com/qubitium

from model_test import ModelTest
from gptqmodel.utils.eval import EVAL


class TestLlama4(ModelTest):
NATIVE_MODEL_ID = "/monster/data/model/Llama-4-Scout-17B-16E-Instruct" # "meta-llama/Llama-4-Scout-17B-16E-Instruct"
NATIVE_ARC_CHALLENGE_ACC = 0.3567
NATIVE_ARC_CHALLENGE_ACC_NORM = 0.3805
QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.36
EVAL_TASKS = {
EVAL.LM_EVAL.ARC_CHALLENGE: {
"acc": {"value": 0.3567, "floor_pct": 0.36},
"acc_norm": {"value": 0.3805, "floor_pct": 0.36},
},
}
APPLY_CHAT_TEMPLATE = True
TRUST_REMOTE_CODE = False

Expand Down
10 changes: 7 additions & 3 deletions tests/models/test_longllama.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,18 @@
# Contact: qubitium@modelcloud.ai, x.com/qubitium

from model_test import ModelTest
from gptqmodel.utils.eval import EVAL


class TestLongLlama(ModelTest):
NATIVE_MODEL_ID = "/monster/data/model/long_llama_3b_instruct" # "syzymon/long_llama_3b_instruct"
NATIVE_ARC_CHALLENGE_ACC = 0.3515
NATIVE_ARC_CHALLENGE_ACC_NORM = 0.3652
TRUST_REMOTE_CODE = True
QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.5
EVAL_TASKS = {
EVAL.LM_EVAL.ARC_CHALLENGE: {
"acc": {"value": 0.3515, "floor_pct": 0.5},
"acc_norm": {"value": 0.3652, "floor_pct": 0.5},
},
}
USE_VLLM = False
USE_FLASH_ATTN = False

Expand Down
Loading