From a9ce5759344ff4dd5330b5ba38f52b1e3ce5e90f Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Sat, 11 Oct 2025 19:43:57 +0800 Subject: [PATCH 1/2] Restrict the running_var parameter of BatchNorm to be greater than 0. --- graph_net/paddle/test_compiler.py | 36 ++++++++++++++++++------- graph_net/test_compiler_util.py | 10 ++++--- graph_net/torch/test_compiler.py | 44 +++++++++++-------------------- graph_net/torch/utils.py | 6 ++++- 4 files changed, 54 insertions(+), 42 deletions(-) diff --git a/graph_net/paddle/test_compiler.py b/graph_net/paddle/test_compiler.py index 5f51600c3..5f15a39d0 100644 --- a/graph_net/paddle/test_compiler.py +++ b/graph_net/paddle/test_compiler.py @@ -180,7 +180,11 @@ def measure_performance(model_call, args, synchronizer_func, profile=False): duration_box = test_compiler_util.DurationBox(-1) with test_compiler_util.naive_timer(duration_box, synchronizer_func): model_call() - print(f"Trial {i + 1}: e2e={duration_box.value:.4f} ms") + print( + f"Trial {i + 1}: e2e={duration_box.value:.4f} ms", + file=sys.stderr, + flush=True, + ) e2e_times.append(duration_box.value) stats["e2e"] = test_compiler_util.get_timing_stats(e2e_times) @@ -256,26 +260,34 @@ def test_single_model(args): # Run on eager mode eager_success = False try: - print("Run model in eager mode.") + print("Run model in eager mode.", file=sys.stderr, flush=True) static_model = get_static_model(args, model) expected_out, eager_time_stats = measure_performance( lambda: static_model(**input_dict), args, synchronizer_func, profile=False ) eager_success = True except Exception as e: - print(f"Run model in eager mode failed: {str(e)}\n{traceback.format_exc()}") + print( + f"Run model in eager mode failed: {str(e)}\n{traceback.format_exc()}", + file=sys.stderr, + flush=True, + ) # Run on compiling mode compiled_success = False try: - print("Run model in compiled mode.") + print("Run model in compiled mode.", file=sys.stderr, flush=True) compiled_model = get_compiled_model(args, model) compiled_out, compiled_time_stats = measure_performance( lambda: compiled_model(**input_dict), args, synchronizer_func, profile=False ) compiled_success = True except Exception as e: - print(f"Run model in compiled mode failed: {str(e)}\n{traceback.format_exc()}") + print( + f"Run model in compiled mode failed: {str(e)}\n{traceback.format_exc()}", + file=sys.stderr, + flush=True, + ) test_compiler_util.print_running_status(args, eager_success, compiled_success) if eager_success and compiled_success: @@ -358,7 +370,7 @@ def test_multi_models(args): if args.verified_samples_list_path is not None: assert os.path.isfile(args.verified_samples_list_path) graphnet_root = path_utils.get_graphnet_root() - print(f"graphnet_root: {graphnet_root}") + print(f"graphnet_root: {graphnet_root}", file=sys.stderr, flush=True) verified_samples = [] with open(args.verified_samples_list_path, "r") as f: for line in f.readlines(): @@ -368,7 +380,11 @@ def test_multi_models(args): failed_samples = [] for model_path in path_utils.get_recursively_model_path(args.model_path): if verified_samples is None or os.path.abspath(model_path) in verified_samples: - print(f"[{sample_idx}] test_compiler, model_path: {model_path}") + print( + f"[{sample_idx}] test_compiler, model_path: {model_path}", + file=sys.stderr, + flush=True, + ) cmd = " ".join( [ sys.executable, @@ -388,10 +404,12 @@ def test_multi_models(args): sample_idx += 1 print( - f"Totally {sample_idx} verified samples, failed {len(failed_samples)} samples." + f"Totally {sample_idx} verified samples, failed {len(failed_samples)} samples.", + file=sys.stderr, + flush=True, ) for model_path in failed_samples: - print(f"- {model_path}") + print(f"- {model_path}", file=sys.stderr, flush=True) def main(args): diff --git a/graph_net/test_compiler_util.py b/graph_net/test_compiler_util.py index 2b4d083b2..84b0aba21 100644 --- a/graph_net/test_compiler_util.py +++ b/graph_net/test_compiler_util.py @@ -204,14 +204,16 @@ def check_allclose( cmp_all_close_func, cmp_max_diff_func, cmp_mean_diff_func, - cmp_max_relative_diff_func, - cmp_mean_relative_diff_func, + cmp_max_relative_diff_func=None, + cmp_mean_relative_diff_func=None, ): cmp_configs = generate_allclose_configs(cmp_all_close_func) cmp_configs.append(("[max_diff]", cmp_max_diff_func, {})) cmp_configs.append(("[mean_diff]", cmp_mean_diff_func, {})) - cmp_configs.append(("[max_relative_diff]", cmp_max_relative_diff_func, {})) - cmp_configs.append(("[mean_relative_diff]", cmp_mean_relative_diff_func, {})) + if cmp_max_relative_diff_func is not None: + cmp_configs.append(("[max_relative_diff]", cmp_max_relative_diff_func, {})) + if cmp_mean_relative_diff_func is not None: + cmp_configs.append(("[mean_relative_diff]", cmp_mean_relative_diff_func, {})) for key, func, kwargs in cmp_configs: print_and_store_cmp( diff --git a/graph_net/torch/test_compiler.py b/graph_net/torch/test_compiler.py index f99b3354e..034cdf297 100644 --- a/graph_net/torch/test_compiler.py +++ b/graph_net/torch/test_compiler.py @@ -21,7 +21,7 @@ from graph_net.torch.backend.tensorrt_backend import TensorRTBackend from graph_net.torch.backend.blade_disc_backend import BladeDISCBackend from graph_net.torch.backend.nope_backend import NopeBackend -from graph_net.test_compiler_util import generate_allclose_configs +from graph_net import test_compiler_util registry_backend = { "tvm": TvmBackend(), @@ -374,33 +374,21 @@ def print_and_store_cmp(key, cmp_func, args, expected_out, compiled_out, **kwarg def compare_correctness(expected_out, compiled_out, args): - # cmp_configs = [ - # ("[equal]", get_cmp_equal, {}), - # ("[all_close_atol8_rtol8]", get_cmp_all_close, {"atol": 1e-8, "rtol": 1e-8}), - # ("[all_close_atol8_rtol5]", get_cmp_all_close, {"atol": 1e-8, "rtol": 1e-5}), - # ("[all_close_atol5_rtol5]", get_cmp_all_close, {"atol": 1e-5, "rtol": 1e-5}), - # ("[all_close_atol3_rtol2]", get_cmp_all_close, {"atol": 1e-3, "rtol": 1e-2}), - # ("[all_close_atol2_rtol1]", get_cmp_all_close, {"atol": 1e-2, "rtol": 1e-1}), - # ("[max_diff]", get_cmp_max_diff, {}), - # ("[mean_diff]", get_cmp_mean_diff, {}), - # ("[diff_count_atol8_rtol8]", get_cmp_diff_count, {"atol": 1e-8, "rtol": 1e-8}), - # ("[diff_count_atol8_rtol5]", get_cmp_diff_count, {"atol": 1e-8, "rtol": 1e-5}), - # ("[diff_count_atol5_rtol5]", get_cmp_diff_count, {"atol": 1e-5, "rtol": 1e-5}), - # ("[diff_count_atol3_rtol2]", get_cmp_diff_count, {"atol": 1e-3, "rtol": 1e-2}), - # ("[diff_count_atol2_rtol1]", get_cmp_diff_count, {"atol": 1e-2, "rtol": 1e-1}), - # ] - cmp_configs = generate_allclose_configs(get_cmp_all_close) - cmp_configs.append(("[equal]", get_cmp_equal, {})) - - for key, func, kwargs in cmp_configs: - print_and_store_cmp( - key=key, - cmp_func=func, - args=args, - expected_out=expected_out, - compiled_out=compiled_out, - **kwargs, - ) + test_compiler_util.check_equal( + args, + expected_out, + compiled_out, + cmp_equal_func=get_cmp_equal, + ) + + test_compiler_util.check_allclose( + args, + expected_out, + compiled_out, + cmp_all_close_func=get_cmp_all_close, + cmp_max_diff_func=get_cmp_max_diff, + cmp_mean_diff_func=get_cmp_mean_diff, + ) def get_cmp_equal(expected_out, compiled_out): diff --git a/graph_net/torch/utils.py b/graph_net/torch/utils.py index a0a05fc73..97a3c26ab 100644 --- a/graph_net/torch/utils.py +++ b/graph_net/torch/utils.py @@ -260,6 +260,7 @@ def extract_dynamic_shapes(example_inputs): def replay_tensor(info): + name = info["name"] device = info["info"]["device"] dtype = info["info"]["dtype"] shape = info["info"]["shape"] @@ -270,7 +271,10 @@ def replay_tensor(info): return info["data"].to(device) if dtype is torch.bool: return (torch.randn(size=shape) > 0.5).to(dtype).to(device) - return torch.randn(size=shape).to(dtype).to(device) * std * 0.2 + mean + tensor = torch.randn(size=shape).to(dtype).to(device) * std * 0.2 + mean + if name.startswith("L_self_modules") and "buffers_running_var" in name: + tensor = torch.clip(tensor, min=0) + return tensor def modify_code_by_device(code, new_device_str): From 3b549da1409099f33a82bbaf0a74154be7da0367 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Mon, 13 Oct 2025 11:20:10 +0800 Subject: [PATCH 2/2] Add TODO. --- graph_net/torch/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/graph_net/torch/utils.py b/graph_net/torch/utils.py index 97a3c26ab..13f1f342e 100644 --- a/graph_net/torch/utils.py +++ b/graph_net/torch/utils.py @@ -272,6 +272,7 @@ def replay_tensor(info): if dtype is torch.bool: return (torch.randn(size=shape) > 0.5).to(dtype).to(device) tensor = torch.randn(size=shape).to(dtype).to(device) * std * 0.2 + mean + # TODO(Xreki): remove this ugly code, and change the weight_meta instead. if name.startswith("L_self_modules") and "buffers_running_var" in name: tensor = torch.clip(tensor, min=0) return tensor