From a9ce5759344ff4dd5330b5ba38f52b1e3ce5e90f Mon Sep 17 00:00:00 2001
From: Liu Yiqun <liuyiqun01@baidu.com>
Date: Sat, 11 Oct 2025 19:43:57 +0800
Subject: [PATCH 1/2] Restrict the running_var parameter of BatchNorm to be
 greater than 0.

---
 graph_net/paddle/test_compiler.py | 36 ++++++++++++++++++-------
 graph_net/test_compiler_util.py   | 10 ++++---
 graph_net/torch/test_compiler.py  | 44 +++++++++++--------------------
 graph_net/torch/utils.py          |  6 ++++-
 4 files changed, 54 insertions(+), 42 deletions(-)

diff --git a/graph_net/paddle/test_compiler.py b/graph_net/paddle/test_compiler.py
index 5f51600c3..5f15a39d0 100644
--- a/graph_net/paddle/test_compiler.py
+++ b/graph_net/paddle/test_compiler.py
@@ -180,7 +180,11 @@ def measure_performance(model_call, args, synchronizer_func, profile=False):
             duration_box = test_compiler_util.DurationBox(-1)
             with test_compiler_util.naive_timer(duration_box, synchronizer_func):
                 model_call()
-            print(f"Trial {i + 1}: e2e={duration_box.value:.4f} ms")
+            print(
+                f"Trial {i + 1}: e2e={duration_box.value:.4f} ms",
+                file=sys.stderr,
+                flush=True,
+            )
             e2e_times.append(duration_box.value)
         stats["e2e"] = test_compiler_util.get_timing_stats(e2e_times)
 
@@ -256,26 +260,34 @@ def test_single_model(args):
     # Run on eager mode
     eager_success = False
     try:
-        print("Run model in eager mode.")
+        print("Run model in eager mode.", file=sys.stderr, flush=True)
         static_model = get_static_model(args, model)
         expected_out, eager_time_stats = measure_performance(
             lambda: static_model(**input_dict), args, synchronizer_func, profile=False
         )
         eager_success = True
     except Exception as e:
-        print(f"Run model in eager mode failed: {str(e)}\n{traceback.format_exc()}")
+        print(
+            f"Run model in eager mode failed: {str(e)}\n{traceback.format_exc()}",
+            file=sys.stderr,
+            flush=True,
+        )
 
     # Run on compiling mode
     compiled_success = False
     try:
-        print("Run model in compiled mode.")
+        print("Run model in compiled mode.", file=sys.stderr, flush=True)
         compiled_model = get_compiled_model(args, model)
         compiled_out, compiled_time_stats = measure_performance(
             lambda: compiled_model(**input_dict), args, synchronizer_func, profile=False
         )
         compiled_success = True
     except Exception as e:
-        print(f"Run model in compiled mode failed: {str(e)}\n{traceback.format_exc()}")
+        print(
+            f"Run model in compiled mode failed: {str(e)}\n{traceback.format_exc()}",
+            file=sys.stderr,
+            flush=True,
+        )
 
     test_compiler_util.print_running_status(args, eager_success, compiled_success)
     if eager_success and compiled_success:
@@ -358,7 +370,7 @@ def test_multi_models(args):
     if args.verified_samples_list_path is not None:
         assert os.path.isfile(args.verified_samples_list_path)
         graphnet_root = path_utils.get_graphnet_root()
-        print(f"graphnet_root: {graphnet_root}")
+        print(f"graphnet_root: {graphnet_root}", file=sys.stderr, flush=True)
         verified_samples = []
         with open(args.verified_samples_list_path, "r") as f:
             for line in f.readlines():
@@ -368,7 +380,11 @@ def test_multi_models(args):
     failed_samples = []
     for model_path in path_utils.get_recursively_model_path(args.model_path):
         if verified_samples is None or os.path.abspath(model_path) in verified_samples:
-            print(f"[{sample_idx}] test_compiler, model_path: {model_path}")
+            print(
+                f"[{sample_idx}] test_compiler, model_path: {model_path}",
+                file=sys.stderr,
+                flush=True,
+            )
             cmd = " ".join(
                 [
                     sys.executable,
@@ -388,10 +404,12 @@ def test_multi_models(args):
             sample_idx += 1
 
     print(
-        f"Totally {sample_idx} verified samples, failed {len(failed_samples)} samples."
+        f"Totally {sample_idx} verified samples, failed {len(failed_samples)} samples.",
+        file=sys.stderr,
+        flush=True,
     )
     for model_path in failed_samples:
-        print(f"- {model_path}")
+        print(f"- {model_path}", file=sys.stderr, flush=True)
 
 
 def main(args):
diff --git a/graph_net/test_compiler_util.py b/graph_net/test_compiler_util.py
index 2b4d083b2..84b0aba21 100644
--- a/graph_net/test_compiler_util.py
+++ b/graph_net/test_compiler_util.py
@@ -204,14 +204,16 @@ def check_allclose(
     cmp_all_close_func,
     cmp_max_diff_func,
     cmp_mean_diff_func,
-    cmp_max_relative_diff_func,
-    cmp_mean_relative_diff_func,
+    cmp_max_relative_diff_func=None,
+    cmp_mean_relative_diff_func=None,
 ):
     cmp_configs = generate_allclose_configs(cmp_all_close_func)
     cmp_configs.append(("[max_diff]", cmp_max_diff_func, {}))
     cmp_configs.append(("[mean_diff]", cmp_mean_diff_func, {}))
-    cmp_configs.append(("[max_relative_diff]", cmp_max_relative_diff_func, {}))
-    cmp_configs.append(("[mean_relative_diff]", cmp_mean_relative_diff_func, {}))
+    if cmp_max_relative_diff_func is not None:
+        cmp_configs.append(("[max_relative_diff]", cmp_max_relative_diff_func, {}))
+    if cmp_mean_relative_diff_func is not None:
+        cmp_configs.append(("[mean_relative_diff]", cmp_mean_relative_diff_func, {}))
 
     for key, func, kwargs in cmp_configs:
         print_and_store_cmp(
diff --git a/graph_net/torch/test_compiler.py b/graph_net/torch/test_compiler.py
index f99b3354e..034cdf297 100644
--- a/graph_net/torch/test_compiler.py
+++ b/graph_net/torch/test_compiler.py
@@ -21,7 +21,7 @@
 from graph_net.torch.backend.tensorrt_backend import TensorRTBackend
 from graph_net.torch.backend.blade_disc_backend import BladeDISCBackend
 from graph_net.torch.backend.nope_backend import NopeBackend
-from graph_net.test_compiler_util import generate_allclose_configs
+from graph_net import test_compiler_util
 
 registry_backend = {
     "tvm": TvmBackend(),
@@ -374,33 +374,21 @@ def print_and_store_cmp(key, cmp_func, args, expected_out, compiled_out, **kwarg
 
 
 def compare_correctness(expected_out, compiled_out, args):
-    # cmp_configs = [
-    #     ("[equal]", get_cmp_equal, {}),
-    #     ("[all_close_atol8_rtol8]", get_cmp_all_close, {"atol": 1e-8, "rtol": 1e-8}),
-    #     ("[all_close_atol8_rtol5]", get_cmp_all_close, {"atol": 1e-8, "rtol": 1e-5}),
-    #     ("[all_close_atol5_rtol5]", get_cmp_all_close, {"atol": 1e-5, "rtol": 1e-5}),
-    #     ("[all_close_atol3_rtol2]", get_cmp_all_close, {"atol": 1e-3, "rtol": 1e-2}),
-    #     ("[all_close_atol2_rtol1]", get_cmp_all_close, {"atol": 1e-2, "rtol": 1e-1}),
-    #     ("[max_diff]", get_cmp_max_diff, {}),
-    #     ("[mean_diff]", get_cmp_mean_diff, {}),
-    #     ("[diff_count_atol8_rtol8]", get_cmp_diff_count, {"atol": 1e-8, "rtol": 1e-8}),
-    #     ("[diff_count_atol8_rtol5]", get_cmp_diff_count, {"atol": 1e-8, "rtol": 1e-5}),
-    #     ("[diff_count_atol5_rtol5]", get_cmp_diff_count, {"atol": 1e-5, "rtol": 1e-5}),
-    #     ("[diff_count_atol3_rtol2]", get_cmp_diff_count, {"atol": 1e-3, "rtol": 1e-2}),
-    #     ("[diff_count_atol2_rtol1]", get_cmp_diff_count, {"atol": 1e-2, "rtol": 1e-1}),
-    # ]
-    cmp_configs = generate_allclose_configs(get_cmp_all_close)
-    cmp_configs.append(("[equal]", get_cmp_equal, {}))
-
-    for key, func, kwargs in cmp_configs:
-        print_and_store_cmp(
-            key=key,
-            cmp_func=func,
-            args=args,
-            expected_out=expected_out,
-            compiled_out=compiled_out,
-            **kwargs,
-        )
+    test_compiler_util.check_equal(
+        args,
+        expected_out,
+        compiled_out,
+        cmp_equal_func=get_cmp_equal,
+    )
+
+    test_compiler_util.check_allclose(
+        args,
+        expected_out,
+        compiled_out,
+        cmp_all_close_func=get_cmp_all_close,
+        cmp_max_diff_func=get_cmp_max_diff,
+        cmp_mean_diff_func=get_cmp_mean_diff,
+    )
 
 
 def get_cmp_equal(expected_out, compiled_out):
diff --git a/graph_net/torch/utils.py b/graph_net/torch/utils.py
index a0a05fc73..97a3c26ab 100644
--- a/graph_net/torch/utils.py
+++ b/graph_net/torch/utils.py
@@ -260,6 +260,7 @@ def extract_dynamic_shapes(example_inputs):
 
 
 def replay_tensor(info):
+    name = info["name"]
     device = info["info"]["device"]
     dtype = info["info"]["dtype"]
     shape = info["info"]["shape"]
@@ -270,7 +271,10 @@ def replay_tensor(info):
         return info["data"].to(device)
     if dtype is torch.bool:
         return (torch.randn(size=shape) > 0.5).to(dtype).to(device)
-    return torch.randn(size=shape).to(dtype).to(device) * std * 0.2 + mean
+    tensor = torch.randn(size=shape).to(dtype).to(device) * std * 0.2 + mean
+    if name.startswith("L_self_modules") and "buffers_running_var" in name:
+        tensor = torch.clip(tensor, min=0)
+    return tensor
 
 
 def modify_code_by_device(code, new_device_str):

From 3b549da1409099f33a82bbaf0a74154be7da0367 Mon Sep 17 00:00:00 2001
From: Liu Yiqun <liuyiqun01@baidu.com>
Date: Mon, 13 Oct 2025 11:20:10 +0800
Subject: [PATCH 2/2] Add TODO.

---
 graph_net/torch/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/graph_net/torch/utils.py b/graph_net/torch/utils.py
index 97a3c26ab..13f1f342e 100644
--- a/graph_net/torch/utils.py
+++ b/graph_net/torch/utils.py
@@ -272,6 +272,7 @@ def replay_tensor(info):
     if dtype is torch.bool:
         return (torch.randn(size=shape) > 0.5).to(dtype).to(device)
     tensor = torch.randn(size=shape).to(dtype).to(device) * std * 0.2 + mean
+    # TODO(Xreki): remove this ugly code, and change the weight_meta instead.
     if name.startswith("L_self_modules") and "buffers_running_var" in name:
         tensor = torch.clip(tensor, min=0)
     return tensor