From 324086a3d99a43dc9fb6cf1d5be1767891241793 Mon Sep 17 00:00:00 2001
From: Qubitium <qubitium@modelcloud.ai>
Date: Thu, 30 Oct 2025 07:06:36 +0000
Subject: [PATCH 1/2] update marin scores

---
 tests/models/test_marin.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/tests/models/test_marin.py b/tests/models/test_marin.py
index e701a4d5e..4ad9dca07 100644
--- a/tests/models/test_marin.py
+++ b/tests/models/test_marin.py
@@ -14,8 +14,17 @@
 
 class TestMarin(ModelTest):
     NATIVE_MODEL_ID = "/monster/data/model/marin-32b-base"
-    VRAM_STRATEGY = VRAMStrategy.BALANCED
+    # VRAM_STRATEGY = VRAMStrategy.BALANCED
     # Marin inherits Qwen3's backbone with QK-Norm attention.
+    EVAL_TASKS = {
+        EVAL.LM_EVAL.ARC_CHALLENGE: {
+            "acc": {"value": 0.5725, "floor_pct": 0.04},
+            "acc_norm": {"value": 0.6007, "floor_pct": 0.04},
+        },
+        EVAL.LM_EVAL.MMLU_STEM: {
+            "acc": {"value": 0.6670, "floor_pct": 0.04},
+        },
+    }
 
     def test_marin_module_tree(self):
         config = AutoConfig.from_pretrained(self.NATIVE_MODEL_ID, trust_remote_code=True)

From 83a6ef6835de52ec0a1f5159ff8e50813c788bf5 Mon Sep 17 00:00:00 2001
From: Qubitium <qubitium@modelcloud.ai>
Date: Thu, 30 Oct 2025 07:11:50 +0000
Subject: [PATCH 2/2] update news

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 2ca3f80ea..5fbd14aa8 100644
--- a/README.md
+++ b/README.md
@@ -17,6 +17,7 @@
 </p>
 
 ## Latest News
+* 10/30/2025 5.1.0-dev: +Marin model. +AWQ Torch reference kernel. Fix AWQ Marlin kernel for bf16. Fix GLM 4.5/4.6 MoE missing `mtp` layers on model save (HF bug). Modular refractor. 
 * 10/28/2025 5.1.0-dev: Minimax M2 support with [ModelCloud BF16 M2 Model](https://huggingface.co/ModelCloud/MiniMax-M2-BF16). New `VramStrategy.Balanced` quantization property for reduced memory usage for large MoE on multi-3090 (24GB) devices.
 * 10/24/2025 [5.0.0](https://github.com/ModelCloud/GPTQModel/releases/tag/v5.0.0): 🎉 Data-parallel quant support for `MoE` models on multi-gpu using `nogil` Python. `offload_to_disk` support enabled by 
 default to massively reduce `cpu` ram usage. New `Intel` and `AMD` cpu hw accelerated `TorchFused` kernel. Packing stage is now 4x faster and now inlined with quantization. `Vram` pressure for large models reduced during quantization.