From 324086a3d99a43dc9fb6cf1d5be1767891241793 Mon Sep 17 00:00:00 2001 From: Qubitium Date: Thu, 30 Oct 2025 07:06:36 +0000 Subject: [PATCH 1/2] update marin scores --- tests/models/test_marin.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/models/test_marin.py b/tests/models/test_marin.py index e701a4d5e..4ad9dca07 100644 --- a/tests/models/test_marin.py +++ b/tests/models/test_marin.py @@ -14,8 +14,17 @@ class TestMarin(ModelTest): NATIVE_MODEL_ID = "/monster/data/model/marin-32b-base" - VRAM_STRATEGY = VRAMStrategy.BALANCED + # VRAM_STRATEGY = VRAMStrategy.BALANCED # Marin inherits Qwen3's backbone with QK-Norm attention. + EVAL_TASKS = { + EVAL.LM_EVAL.ARC_CHALLENGE: { + "acc": {"value": 0.5725, "floor_pct": 0.04}, + "acc_norm": {"value": 0.6007, "floor_pct": 0.04}, + }, + EVAL.LM_EVAL.MMLU_STEM: { + "acc": {"value": 0.6670, "floor_pct": 0.04}, + }, + } def test_marin_module_tree(self): config = AutoConfig.from_pretrained(self.NATIVE_MODEL_ID, trust_remote_code=True) From 83a6ef6835de52ec0a1f5159ff8e50813c788bf5 Mon Sep 17 00:00:00 2001 From: Qubitium Date: Thu, 30 Oct 2025 07:11:50 +0000 Subject: [PATCH 2/2] update news --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 2ca3f80ea..5fbd14aa8 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@

## Latest News +* 10/30/2025 5.1.0-dev: +Marin model. +AWQ Torch reference kernel. Fix AWQ Marlin kernel for bf16. Fix GLM 4.5/4.6 MoE missing `mtp` layers on model save (HF bug). Modular refractor. * 10/28/2025 5.1.0-dev: Minimax M2 support with [ModelCloud BF16 M2 Model](https://huggingface.co/ModelCloud/MiniMax-M2-BF16). New `VramStrategy.Balanced` quantization property for reduced memory usage for large MoE on multi-3090 (24GB) devices. * 10/24/2025 [5.0.0](https://github.com/ModelCloud/GPTQModel/releases/tag/v5.0.0): 🎉 Data-parallel quant support for `MoE` models on multi-gpu using `nogil` Python. `offload_to_disk` support enabled by default to massively reduce `cpu` ram usage. New `Intel` and `AMD` cpu hw accelerated `TorchFused` kernel. Packing stage is now 4x faster and now inlined with quantization. `Vram` pressure for large models reduced during quantization.