From 5bf62f67dc2c1418def52497300df22759e07550 Mon Sep 17 00:00:00 2001
From: Qubitium <qubitium@modelcloud.ai>
Date: Thu, 16 Oct 2025 09:24:32 +0000
Subject: [PATCH 1/3] update scores

---
 tests/models/test_qwen3_moe.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/tests/models/test_qwen3_moe.py b/tests/models/test_qwen3_moe.py
index 4916361f6..a36f4e480 100644
--- a/tests/models/test_qwen3_moe.py
+++ b/tests/models/test_qwen3_moe.py
@@ -6,11 +6,12 @@
 from model_test import ModelTest
 
 
+
 class TestQwen3Moe(ModelTest):
     NATIVE_MODEL_ID = "/monster/data/model/Qwen3-30B-A3B"
-    QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.2
-    NATIVE_ARC_CHALLENGE_ACC = 0.3700
-    NATIVE_ARC_CHALLENGE_ACC_NORM = 0.3700
+    QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.04
+    NATIVE_ARC_CHALLENGE_ACC = 0.3788 # a100 4,5,6,7
+    NATIVE_ARC_CHALLENGE_ACC_NORM = 0.3899 # a100 4,5,6,7
     # TRUST_REMOTE_CODE = False
     APPLY_CHAT_TEMPLATE = True
     # EVAL_BATCH_SIZE = 6

From f1e1ebe564df86d69716c9c9c33babaa53f12cf2 Mon Sep 17 00:00:00 2001
From: Qubitium <qubitium@modelcloud.ai>
Date: Thu, 16 Oct 2025 17:12:42 +0000
Subject: [PATCH 2/3] cleanup

---
 gptqmodel/__init__.py           |  2 +-
 tests/models/test_qwen3_next.py | 14 +++++++++++---
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/gptqmodel/__init__.py b/gptqmodel/__init__.py
index 10559ead4..b54284928 100644
--- a/gptqmodel/__init__.py
+++ b/gptqmodel/__init__.py
@@ -16,7 +16,7 @@
 DEVICE_THREAD_POOL = DeviceThreadPool(
     inference_mode=True,
     workers={
-        "cuda:per": 2,
+        "cuda:per": 4,
         "xpu:per": 1,
         "mps": 8,
         "cpu": 8,
diff --git a/tests/models/test_qwen3_next.py b/tests/models/test_qwen3_next.py
index 2c45f9dc7..dcb960d14 100644
--- a/tests/models/test_qwen3_next.py
+++ b/tests/models/test_qwen3_next.py
@@ -8,13 +8,21 @@
 
 class TestQwen3Next(ModelTest):
     NATIVE_MODEL_ID = "/monster/data/model/Qwen3-Next-80B-A3B-Instruct"
-    QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.05
+    QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.04
     NATIVE_ARC_CHALLENGE_ACC = 0.3900
     NATIVE_ARC_CHALLENGE_ACC_NORM = 0.3900
     TRUST_REMOTE_CODE = True
     APPLY_CHAT_TEMPLATE = True
-    EVAL_BATCH_SIZE = 6
-    #DATASET_SIZE = 1024
+    EVAL_BATCH_SIZE = 4
+    V2 = False
+    DEBUG = True
+    ACT_GROUP_AWARE = True
+    DESC_ACT = False
+    DATASET_SIZE = 1024
+    DATASET_SORT = "desc"
+    QUANT_BATCH_SIZE = 4
+    CALIB_NOISE_MODE = "unseen"
+    CALIB_NOISE_PERCENT = 0.025
 
     def test_mimo(self):
         self.quant_lm_eval()

From 0d3f45995886c0325e3d57ad290d6ae47d6f4abd Mon Sep 17 00:00:00 2001
From: Qubitium <qubitium@modelcloud.ai>
Date: Thu, 16 Oct 2025 17:14:41 +0000
Subject: [PATCH 3/3] cleanup

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 9365145a4..5611545b1 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,8 @@
 </p>
 
 ## Latest News
-* 09/30/2025 5.0.0-dev `main`: 👀: New Data Parallel + Multi-GPU + Python 3.13g (PYTHON_GIL=0) equals 80%+ overall quant time reduction of large MoE models va v4.2.5. 
+* 10/17/2025 5.0.0-dev `main`: 👀: EoRA now multi-gpu compatible. Fixed both quality stability of multi-gpu quanta and vram usage. New LFM and Ling models support.
+* 09/30/2025 5.0.0-dev `main`: 👀: New Data Parallel + Multi-GPU + Python 3.13T (PYTHON_GIL=0) equals 80%+ overall quant time reduction of large MoE models vs v4.2.5. 
 * 09/29/2025 5.0.0-dev `main`: 🎉 New Qwen3 Omni model support. AWQ Marlin kernel integrated + many disk offload, threading, and memory usage fixes. 
 * 09/24/2025 5.0.0-dev `main`: 🎉 Up to 90% cpu mem saving for large MoE models with faster/inline packing! 26% quant time reduction for Qwen3 MoE! AWQ Marlin kernel added. AWQ Gemm loading bug fixes. `act_group_aware` now faster and auto enabled for GPTQ when `desc_act` is False for higher quality recovery. 
 * 09/19/2025 5.0.0-dev `main`: 👀 Cpu memory saving of ~73.5% during quantization stage with new `offload_to_disk` quantization config property default to `True`.