From d2afb65b148243a60dac4b09e4645fbc8c4bb444 Mon Sep 17 00:00:00 2001 From: wooway777 Date: Thu, 13 Nov 2025 15:49:08 +0800 Subject: [PATCH 1/3] issue/593 - added verbose test mode --- test/infinicore/framework/base.py | 60 ++++++++++++------------- test/infinicore/framework/config.py | 9 +++- test/infinicore/framework/runner.py | 1 + test/infinicore/run.py | 68 +++++++++++++++++++++++++++-- 4 files changed, 102 insertions(+), 36 deletions(-) diff --git a/test/infinicore/framework/base.py b/test/infinicore/framework/base.py index 95d4a55f2..022f10c75 100644 --- a/test/infinicore/framework/base.py +++ b/test/infinicore/framework/base.py @@ -1,5 +1,6 @@ import torch import infinicore +import traceback # Add import for traceback from abc import ABC, abstractmethod from typing import List, Dict, Any, Optional @@ -216,14 +217,19 @@ def __str__(self): class TestConfig: """Test configuration""" - def __init__(self, debug=False, bench=False, num_prerun=10, num_iterations=1000): + def __init__( + self, + debug=False, + bench=False, + num_prerun=10, + num_iterations=1000, + verbose=False, + ): self.debug = debug self.bench = bench self.num_prerun = num_prerun self.num_iterations = num_iterations - - -# In base.py - update the TestRunner class + self.verbose = verbose class TestRunner: @@ -273,16 +279,11 @@ def run_tests(self, devices, test_func, test_type="Test"): # Test was skipped due to both operators not being implemented skip_msg = f"{test_case} - {InfiniDeviceNames[device]} - Both operators not implemented" self.skipped_tests.append(skip_msg) - print( - f"\033[93m⚠\033[0m Skipped - both operators not implemented" - ) elif status == "partial": # Test was partially executed (one operator not implemented) partial_msg = f"{test_case} - {InfiniDeviceNames[device]} - One operator not implemented" self.partial_tests.append(partial_msg) - print( - f"\033[93m⚠\033[0m Partial - one operator not implemented" - ) + # Failed tests are handled in the exception handler below except Exception as e: @@ -291,6 +292,12 @@ def run_tests(self, devices, test_func, test_type="Test"): ) print(f"\033[91m✗\033[0m {error_msg}") self.failed_tests.append(error_msg) + + # In verbose mode, print full traceback and stop execution + if self.config.verbose: + traceback.print_exc() + return False # Stop test execution immediately + if self.config.debug: raise @@ -312,34 +319,16 @@ def print_summary(self): print(f"\n{'='*60}") print("TEST SUMMARY") - print(f"{'='*60}") print(f"Total tests: {total_tests}") print(f"\033[92mPassed: {passed_count}\033[0m") - # Display partial tests (one operator not implemented) - if self.partial_tests: - print( - f"\033[93mPartial (one operator not implemented): {partial_count}\033[0m" - ) - for test in self.partial_tests: - print(f" - {test}") - - # Display skipped tests (both operators not implemented) - if self.skipped_tests: - print( - f"\033[93mSkipped (both operators not implemented): {skipped_count}\033[0m" - ) - for test in self.skipped_tests: - print(f" - {test}") - + result = True # Display failed tests if self.failed_tests: print(f"\033[91mFailed: {failed_count}\033[0m") - for failure in self.failed_tests: - print(f" - {failure}") # Return False only if there are actual test failures - return False + result = False else: # Calculate success rate based on actual executed tests executed_tests = passed_count + partial_count + failed_count @@ -352,10 +341,11 @@ def print_summary(self): print( f"\n\033[93mTests completed with some implementations missing\033[0m" ) - return True # Skipped/partial tests don't count as failures else: print(f"\n\033[92mAll tests passed!\033[0m") - return True + + print(f"{'='*60}") + return result class BaseOperatorTest(ABC): @@ -537,6 +527,9 @@ def run_test(self, device, test_case, config): if torch_result is None: torch_implemented = False except NotImplementedError: + if config.verbose: + traceback.print_exc() + return False # Stop test execution immediately torch_implemented = False torch_result = None @@ -545,6 +538,9 @@ def run_test(self, device, test_case, config): if infini_result is None: infini_implemented = False except NotImplementedError: + if config.verbose: + traceback.print_exc() + return False # Stop test execution immediately infini_implemented = False infini_result = None diff --git a/test/infinicore/framework/config.py b/test/infinicore/framework/config.py index 8b09ea90c..9c87774e4 100644 --- a/test/infinicore/framework/config.py +++ b/test/infinicore/framework/config.py @@ -1,7 +1,6 @@ import argparse from .devices import InfiniDeviceEnum -# hardware_info.py """ Shared hardware platform information for the InfiniCore testing framework """ @@ -61,6 +60,9 @@ def get_args(): # Run with debug mode on multiple devices python test_operator.py --cpu --nvidia --debug + # Run with verbose mode to stop on first error with full traceback + python test_operator.py --cpu --nvidia --verbose + # Run performance profiling with custom iterations python test_operator.py --nvidia --bench --num_prerun 50 --num_iterations 5000 @@ -90,6 +92,11 @@ def get_args(): action="store_true", help="Enable debug mode for detailed tensor comparison", ) + parser.add_argument( + "--verbose", + action="store_true", + help="Enable verbose mode to stop on first error with full traceback", + ) # Device options using shared hardware info hardware_group = get_hardware_args_group(parser) diff --git a/test/infinicore/framework/runner.py b/test/infinicore/framework/runner.py index 336686824..4858fecdc 100644 --- a/test/infinicore/framework/runner.py +++ b/test/infinicore/framework/runner.py @@ -28,6 +28,7 @@ def run(self): bench=self.args.bench, num_prerun=self.args.num_prerun, num_iterations=self.args.num_iterations, + verbose=self.args.verbose, # Pass verbose flag to TestConfig ) runner = TestRunner(self.operator_test.test_cases, config) diff --git a/test/infinicore/run.py b/test/infinicore/run.py index 32d52bfc6..a4303ce78 100644 --- a/test/infinicore/run.py +++ b/test/infinicore/run.py @@ -122,6 +122,9 @@ def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None): results = {} + # Check if verbose mode is enabled + verbose_mode = extra_args and "--verbose" in extra_args + for test_file in operator_test_files: test_name = test_file.stem @@ -199,14 +202,27 @@ def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None): f"{status_icon} {test_name}: {status_text} (return code: {returncode})" ) + # In verbose mode, stop execution on first failure + if verbose_mode and not success and returncode not in [-2, -3]: + break + except Exception as e: print(f"💥 {test_name}: ERROR - {str(e)}") results[test_name] = (False, -1, "", str(e)) + # In verbose mode, stop execution on any exception + if verbose_mode: + print(f"\n{'!'*60}") + print( + f"VERBOSE MODE: Stopping execution due to exception in {test_name}" + ) + print(f"{'!'*60}") + break + return results -def print_summary(results): +def print_summary(results, verbose_mode=False, total_expected_tests=0): """Print a comprehensive summary of test results.""" print(f"\n{'='*80}") print("CUMULATIVE TEST SUMMARY") @@ -242,7 +258,11 @@ def print_summary(results): total = len(results) - print(f"Total tests: {total}") + print(f"Total tests run: {total}") + if total_expected_tests > 0 and total < total_expected_tests: + print(f"Total tests expected: {total_expected_tests}") + print(f"Tests not executed: {total_expected_tests - total}") + print(f"Passed: {passed}") print(f"Failed: {failed}") @@ -290,6 +310,10 @@ def print_summary(results): success_rate = passed / executed_tests * 100 print(f"\nSuccess rate: {success_rate:.1f}%") + if verbose_mode and total < total_expected_tests: + print(f"\n💡 Verbose mode: Execution stopped after first failure") + print(f" {total_expected_tests - total} tests were not executed") + if failed == 0: if skipped > 0 or partial > 0: print(f"\n⚠️ Tests completed with some operators not implemented") @@ -358,6 +382,11 @@ def generate_help_epilog(ops_dir): epilog_parts.append(" # Run with debug mode on multiple devices") epilog_parts.append(" python run.py --cpu --nvidia --debug") epilog_parts.append("") + epilog_parts.append( + " # Run with verbose mode to stop on first error with full traceback" + ) + epilog_parts.append(" python run.py --cpu --nvidia --verbose") + epilog_parts.append("") epilog_parts.append(" # List available tests without running") epilog_parts.append(" python run.py --list") epilog_parts.append("") @@ -386,6 +415,12 @@ def generate_help_epilog(ops_dir): epilog_parts.append( " - --bench option is disabled in batch mode (run individual tests for benchmarking)" ) + epilog_parts.append( + " - --verbose mode stops execution on first error and shows full traceback" + ) + epilog_parts.append( + " - In verbose mode, subsequent tests are skipped after first failure" + ) return "\n".join(epilog_parts) @@ -413,6 +448,11 @@ def main(): action="store_true", help="List all available test files without running them", ) + parser.add_argument( + "--verbose", + action="store_true", + help="Enable verbose mode to stop on first error with full traceback (passed to individual tests)", + ) from framework import get_hardware_args_group @@ -442,6 +482,10 @@ def main(): print(f"Error: Ops directory '{ops_dir}' does not exist.") sys.exit(1) + # Add verbose flag to extra arguments if specified + if args.verbose and "--verbose" not in unknown_args: + unknown_args.append("--verbose") + # Show what extra arguments will be passed if unknown_args: print(f"Passing extra arguments to test scripts: {unknown_args}") @@ -453,6 +497,9 @@ def main(): print(f"Operating directory: {ops_dir}") print(f"Available operators: {len(available_operators)}") + if args.verbose: + print(f"Verbose mode: ENABLED (will stop on first error with full traceback)") + if args.ops: # Validate requested operators valid_ops = [] @@ -469,10 +516,13 @@ def main(): if valid_ops: print(f"Testing operators: {', '.join(valid_ops)}") + total_expected_tests = len(valid_ops) else: print("No valid operators specified. Running all available tests.") + total_expected_tests = len(available_operators) else: print("Testing all available operators") + total_expected_tests = len(available_operators) print() @@ -484,7 +534,7 @@ def main(): ) # Print summary and exit with appropriate code - all_passed = print_summary(results) + all_passed = print_summary(results, args.verbose, total_expected_tests) # Check if there were any tests with missing implementations has_missing_implementations = any( @@ -495,6 +545,18 @@ def main(): print(f"\n⚠️ Note: Some operators are not fully implemented") print(f" Run individual tests for details on missing implementations") + if args.verbose and not all_passed: + print( + f"\n💡 Verbose mode tip: Use individual test commands for detailed debugging:" + ) + failed_ops = [ + name + for name, (success, _, _, _) in results.items() + if not success and name in results + ] + for op in failed_ops[:3]: # Show first 3 failed operators + print(f" python {ops_dir / (op + '.py')} --verbose") + sys.exit(0 if all_passed else 1) From 5c88cbbda5368289fa57136666d7761e9d7d76b4 Mon Sep 17 00:00:00 2001 From: wooway777 Date: Thu, 13 Nov 2025 20:53:01 +0800 Subject: [PATCH 2/3] issue/594 - cumulating total time in tests --- test/infinicore/framework/base.py | 55 +++++++++++++++++++--- test/infinicore/framework/runner.py | 4 ++ test/infinicore/framework/utils.py | 12 +++-- test/infinicore/run.py | 72 +++++++++++++++++++++++++---- 4 files changed, 126 insertions(+), 17 deletions(-) diff --git a/test/infinicore/framework/base.py b/test/infinicore/framework/base.py index 022f10c75..105a033a3 100644 --- a/test/infinicore/framework/base.py +++ b/test/infinicore/framework/base.py @@ -1,6 +1,6 @@ import torch import infinicore -import traceback # Add import for traceback +import traceback from abc import ABC, abstractmethod from typing import List, Dict, Any, Optional @@ -12,8 +12,6 @@ create_test_comparator, infinicore_tensor_from_torch, profile_operation, - synchronize_device, - convert_infinicore_to_torch, ) @@ -244,6 +242,12 @@ def __init__(self, test_cases, test_config): self.passed_tests = ( [] ) # Track passed tests (both operators implemented and passed) + # Add benchmark timing statistics + self.benchmark_times = { + "torch_total": 0.0, + "infinicore_total": 0.0, + "per_test_case": {}, # Store timing per test case + } def run_tests(self, devices, test_func, test_type="Test"): """ @@ -344,9 +348,35 @@ def print_summary(self): else: print(f"\n\033[92mAll tests passed!\033[0m") + # Print benchmark summary if benchmarking was enabled + if self.config.bench and ( + self.benchmark_times["torch_total"] > 0 + or self.benchmark_times["infinicore_total"] > 0 + ): + self._print_benchmark_summary() + print(f"{'='*60}") return result + def _print_benchmark_summary(self): + """Print benchmark timing summary""" + print(f"{'-'*60}") + print("BENCHMARK SUMMARY") + + torch_total = self.benchmark_times["torch_total"] + infinicore_total = self.benchmark_times["infinicore_total"] + + if torch_total > 0: + print(f"PyTorch Total Time: {torch_total * 1000:.3f} ms") + if infinicore_total > 0: + print(f"InfiniCore Total Time: {infinicore_total * 1000:.3f} ms") + + if torch_total > 0 and infinicore_total > 0: + speedup = ( + torch_total / infinicore_total if infinicore_total > 0 else float("inf") + ) + print(f"Speedup (PyTorch/InfiniCore): {speedup:.2f}x") + class BaseOperatorTest(ABC): """Base operator test""" @@ -711,8 +741,13 @@ def _run_benchmarking( comparison_target, ): """ - Unified benchmarking logic + Unified benchmarking logic with timing accumulation """ + + # Initialize timing variables + torch_time = 0.0 + infini_time = 0.0 + if torch_implemented: if output_count > 1: # For multiple outputs, just call the operator @@ -735,12 +770,13 @@ def torch_op(): else inputs[comparison_target] ) - profile_operation( + torch_time = profile_operation( "PyTorch ", torch_op, device_str, config.num_prerun, config.num_iterations, + total=True, ) if infini_implemented: @@ -759,10 +795,17 @@ def infini_op(): else infini_inputs[comparison_target] ) - profile_operation( + infini_time = profile_operation( "InfiniCore", infini_op, device_str, config.num_prerun, config.num_iterations, + total=True, ) + + # Store timing information in the test runner + if hasattr(config, "_test_runner") and config._test_runner: + # Accumulate total times + config._test_runner.benchmark_times["torch_total"] += torch_time + config._test_runner.benchmark_times["infinicore_total"] += infini_time diff --git a/test/infinicore/framework/runner.py b/test/infinicore/framework/runner.py index 4858fecdc..0b86396b5 100644 --- a/test/infinicore/framework/runner.py +++ b/test/infinicore/framework/runner.py @@ -32,6 +32,10 @@ def run(self): ) runner = TestRunner(self.operator_test.test_cases, config) + + # Pass the test runner instance to config for benchmark timing accumulation + config._test_runner = runner + devices = get_test_devices(self.args) # Run unified tests - returns True if no tests failed diff --git a/test/infinicore/framework/utils.py b/test/infinicore/framework/utils.py index 2448e3857..051a30321 100644 --- a/test/infinicore/framework/utils.py +++ b/test/infinicore/framework/utils.py @@ -22,10 +22,12 @@ def timed_op(func, num_iterations, device): for _ in range(num_iterations): func() synchronize_device(device) - return (time.time() - start) / num_iterations + return time.time() - start -def profile_operation(desc, func, torch_device, num_prerun, num_iterations): +def profile_operation( + desc, func, torch_device, num_prerun, num_iterations, total=False +): """ Performance profiling workflow """ @@ -35,7 +37,11 @@ def profile_operation(desc, func, torch_device, num_prerun, num_iterations): # Timed execution elapsed = timed_op(lambda: func(), num_iterations, torch_device) - print(f" {desc} time: {elapsed * 1000 :6f} ms") + print(f" {desc} time: {elapsed / num_iterations * 1000 :6f} ms") + if total: + return elapsed + else: + return elapsed / num_iterations def debug(actual, desired, atol=0, rtol=1e-2, equal_nan=False, verbose=True): diff --git a/test/infinicore/run.py b/test/infinicore/run.py index a4303ce78..18a0b4801 100644 --- a/test/infinicore/run.py +++ b/test/infinicore/run.py @@ -125,6 +125,14 @@ def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None): # Check if verbose mode is enabled verbose_mode = extra_args and "--verbose" in extra_args + # Check if bench mode is enabled for cumulative timing + bench_mode = extra_args and "--bench" in extra_args + cumulative_timing = { + "total_torch_time": 0.0, + "total_infinicore_time": 0.0, + "operators_tested": 0, + } + for test_file in operator_test_files: test_name = test_file.stem @@ -157,7 +165,7 @@ def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None): # Both operators not implemented - skipped test success = False # Not a failure, but skipped returncode = -2 # Special code for skipped - elif "one operator not implemented" in stdout_lower: + elif "operator not implemented" in stdout_lower: # One operator not implemented - partial test success = False # Not fully successful returncode = -3 # Special code for partial @@ -202,6 +210,34 @@ def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None): f"{status_icon} {test_name}: {status_text} (return code: {returncode})" ) + # Extract benchmark timing if in bench mode + if bench_mode and success: + # Look for benchmark summary in stdout + lines = result.stdout.split("\n") + torch_time = 0.0 + infini_time = 0.0 + + for line in lines: + if "PyTorch Total Time:" in line: + try: + # Extract time value (e.g., "PyTorch Total Time: 123.456 ms") + torch_time = ( + float(line.split(":")[1].strip().split()[0]) / 1000.0 + ) # Convert to seconds + except: + pass + elif "InfiniCore Total Time:" in line: + try: + infini_time = ( + float(line.split(":")[1].strip().split()[0]) / 1000.0 + ) # Convert to seconds + except: + pass + + cumulative_timing["total_torch_time"] += torch_time + cumulative_timing["total_infinicore_time"] += infini_time + cumulative_timing["operators_tested"] += 1 + # In verbose mode, stop execution on first failure if verbose_mode and not success and returncode not in [-2, -3]: break @@ -219,11 +255,13 @@ def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None): print(f"{'!'*60}") break - return results + return results, cumulative_timing -def print_summary(results, verbose_mode=False, total_expected_tests=0): - """Print a comprehensive summary of test results.""" +def print_summary( + results, verbose_mode=False, total_expected_tests=0, cumulative_timing=None +): + """Print a comprehensive summary of test results including benchmark data.""" print(f"\n{'='*80}") print("CUMULATIVE TEST SUMMARY") print(f"{'='*80}") @@ -272,6 +310,19 @@ def print_summary(results, verbose_mode=False, total_expected_tests=0): if partial > 0: print(f"Partial: {partial}") + # Print benchmark summary if cumulative_timing data is available + if cumulative_timing and cumulative_timing["operators_tested"] > 0: + print(f"{'-'*40}") + print("BENCHMARK SUMMARY:") + print(f" Operators Tested: {cumulative_timing['operators_tested']}") + print( + f" Total PyTorch Time: {cumulative_timing['total_torch_time'] * 1000:.3f} ms" + ) + print( + f" Total InfiniCore Time: {cumulative_timing['total_infinicore_time'] * 1000:.3f} ms" + ) + print(f"{'-'*40}") + # Display passed operators if passed_operators: print(f"\n✅ PASSED OPERATORS ({len(passed_operators)}):") @@ -304,7 +355,7 @@ def print_summary(results, verbose_mode=False, total_expected_tests=0): print(" " + ", ".join(line_ops)) if total > 0: - # Calculate success rate based on executed tests only + # Calculate success rate based on actual executed tests executed_tests = passed + failed + partial if executed_tests > 0: success_rate = passed / executed_tests * 100 @@ -387,6 +438,9 @@ def generate_help_epilog(ops_dir): ) epilog_parts.append(" python run.py --cpu --nvidia --verbose") epilog_parts.append("") + epilog_parts.append(" # Run with benchmarking to get cumulative timing") + epilog_parts.append(" python run.py --cpu --bench") + epilog_parts.append("") epilog_parts.append(" # List available tests without running") epilog_parts.append(" python run.py --list") epilog_parts.append("") @@ -413,7 +467,7 @@ def generate_help_epilog(ops_dir): " - Operators are automatically discovered from the ops directory" ) epilog_parts.append( - " - --bench option is disabled in batch mode (run individual tests for benchmarking)" + " - --bench mode now shows cumulative timing across all operators" ) epilog_parts.append( " - --verbose mode stops execution on first error and shows full traceback" @@ -527,14 +581,16 @@ def main(): print() # Run all tests - results = run_all_op_tests( + results, cumulative_timing = run_all_op_tests( ops_dir=ops_dir, specific_ops=args.ops, extra_args=unknown_args, ) # Print summary and exit with appropriate code - all_passed = print_summary(results, args.verbose, total_expected_tests) + all_passed = print_summary( + results, args.verbose, total_expected_tests, cumulative_timing + ) # Check if there were any tests with missing implementations has_missing_implementations = any( From 1e6ccdc9dbccf9bf87685e7dff94e7f1cc27d41a Mon Sep 17 00:00:00 2001 From: wooway777 Date: Thu, 13 Nov 2025 20:53:01 +0800 Subject: [PATCH 3/3] issue/598 - optimize run.py performance --- README.md | 4 +- test/infinicore/framework/base.py | 117 +++++--- test/infinicore/framework/config.py | 3 +- test/infinicore/framework/runner.py | 8 +- test/infinicore/ops/elu.py | 6 +- test/infinicore/ops/multi_margin_loss.py | 8 +- test/infinicore/run.py | 352 ++++++++++++++--------- 7 files changed, 318 insertions(+), 180 deletions(-) diff --git a/README.md b/README.md index 10ec103ae..7bef3ba08 100644 --- a/README.md +++ b/README.md @@ -182,9 +182,9 @@ pip install . -e ```bash # 测试单算子 -python test/infinicore/ops/[operator].py [--bench | --debug] [--cpu | --nvidia | --cambricon | --ascend | --iluvatar | --metax | --moore | --kunlun | --Hygon] +python test/infinicore/ops/[operator].py [--bench | --debug | --verbose] [--cpu | --nvidia | --cambricon | --ascend | --iluvatar | --metax | --moore | --kunlun | --Hygon] # 测试全部算子 -python test/infinicore/run.py [--bench | --debug] [--cpu | --nvidia | --cambricon | --ascend | --iluvatar | --metax | --moore | --kunlun] +python test/infinicore/run.py [--bench | --debug | --verbose] [--cpu | --nvidia | --cambricon | --ascend | --iluvatar | --metax | --moore | --kunlun] ``` 使用 -h 查看更多参数。 diff --git a/test/infinicore/framework/base.py b/test/infinicore/framework/base.py index 105a033a3..ebb889244 100644 --- a/test/infinicore/framework/base.py +++ b/test/infinicore/framework/base.py @@ -1,9 +1,9 @@ import torch import infinicore import traceback - +from dataclasses import dataclass from abc import ABC, abstractmethod -from typing import List, Dict, Any, Optional +from typing import List, Dict, Any, Optional, Tuple from .datatypes import to_torch_dtype, to_infinicore_dtype from .devices import InfiniDeviceNames, torch_device_map @@ -15,6 +15,18 @@ ) +@dataclass +class TestResult: + """Test result data structure""" + success: bool + return_code: int # 0: success, -1: failure, -2: skipped, -3: partial + torch_time: float = 0.0 + infini_time: float = 0.0 + error_message: str = "" + test_case: Any = None + device: Any = None + + class TestCase: """Test case with all configuration included""" @@ -23,11 +35,11 @@ def __init__( inputs, kwargs=None, output_spec=None, + output_specs=None, comparison_target=None, description="", tolerance=None, output_count=1, - output_specs=None, ): """ Initialize a test case with complete configuration @@ -248,6 +260,8 @@ def __init__(self, test_cases, test_config): "infinicore_total": 0.0, "per_test_case": {}, # Store timing per test case } + # Store test results + self.test_results = [] def run_tests(self, devices, test_func, test_type="Test"): """ @@ -270,25 +284,30 @@ def run_tests(self, devices, test_func, test_type="Test"): try: print(f"{test_case}") - # Execute test and get result status - success, status = test_func(device, test_case, self.config) + # Execute test and get TestResult object + test_result = test_func(device, test_case, self.config) + self.test_results.append(test_result) - # Handle different test statuses - if status == "passed": + # Handle different test statuses based on return_code + if test_result.return_code == 0: # Success self.passed_tests.append( f"{test_case} - {InfiniDeviceNames[device]}" ) print(f"\033[92m✓\033[0m Passed") - elif status == "skipped": - # Test was skipped due to both operators not being implemented + elif test_result.return_code == -1: + fail_msg = f"{test_case} - {InfiniDeviceNames[device]} - Test terminated in verbose mode." + self.failed_tests.append(fail_msg) + elif test_result.return_code == -2: # Skipped skip_msg = f"{test_case} - {InfiniDeviceNames[device]} - Both operators not implemented" self.skipped_tests.append(skip_msg) - elif status == "partial": - # Test was partially executed (one operator not implemented) + print(f"\033[93m⚠\033[0m Both operators not implemented - test skipped") + elif test_result.return_code == -3: # Partial partial_msg = f"{test_case} - {InfiniDeviceNames[device]} - One operator not implemented" self.partial_tests.append(partial_msg) + print(f"\033[93m⚠\033[0m One operator not implemented - running single operator without comparison") - # Failed tests are handled in the exception handler below + if self.config.verbose and test_result.return_code != 0: + return False except Exception as e: error_msg = ( @@ -296,7 +315,16 @@ def run_tests(self, devices, test_func, test_type="Test"): ) print(f"\033[91m✗\033[0m {error_msg}") self.failed_tests.append(error_msg) - + + # Create a failed TestResult + failed_result = TestResult( + success=False, + return_code=-1, + error_message=str(e), + test_case=test_case, + device=device + ) + self.test_results.append(failed_result) # In verbose mode, print full traceback and stop execution if self.config.verbose: traceback.print_exc() @@ -305,8 +333,7 @@ def run_tests(self, devices, test_func, test_type="Test"): if self.config.debug: raise - # Return True if no tests failed (skipped/partial tests don't count as failures) - return len(self.failed_tests) == 0 + return len(self.failed_tests) == 0 and len(self.skipped_tests) == 0 and len(self.partial_tests) == 0 def print_summary(self): """ @@ -377,6 +404,10 @@ def _print_benchmark_summary(self): ) print(f"Speedup (PyTorch/InfiniCore): {speedup:.2f}x") + def get_test_results(self): + """Get all test results""" + return self.test_results + class BaseOperatorTest(ABC): """Base operator test""" @@ -480,11 +511,17 @@ def run_test(self, device, test_case, config): config: Test configuration Returns: - tuple: (success, status) where: - success: bool indicating if test passed - status: str describing test status ("passed", "skipped", "partial") + TestResult: Test result object containing status and timing information """ device_str = torch_device_map[device] + + # Initialize test result + test_result = TestResult( + success=False, + return_code=-1, # Default to failure + test_case=test_case, + device=device + ) # Prepare inputs and kwargs with actual tensors inputs, kwargs = self.prepare_inputs_and_kwargs(test_case, device) @@ -559,7 +596,10 @@ def run_test(self, device, test_case, config): except NotImplementedError: if config.verbose: traceback.print_exc() - return False # Stop test execution immediately + # Return test result immediately in verbose mode + test_result.return_code = -1 + test_result.error_message = "torch_operator not implemented" + return test_result torch_implemented = False torch_result = None @@ -570,26 +610,24 @@ def run_test(self, device, test_case, config): except NotImplementedError: if config.verbose: traceback.print_exc() - return False # Stop test execution immediately + # Return test result immediately in verbose mode + test_result.return_code = -1 + test_result.error_message = "infinicore_operator not implemented" + return test_result infini_implemented = False infini_result = None # Skip if neither operator is implemented if not torch_implemented and not infini_implemented: - print(f"\033[93m⚠\033[0m Both operators not implemented - test skipped") - return False, "skipped" + test_result.return_code = -2 # Skipped + return test_result # Single operator execution without comparison if not torch_implemented or not infini_implemented: - missing_op = ( - "torch_operator" if not torch_implemented else "infinicore_operator" - ) - print( - f"\033[93m⚠\033[0m {missing_op} not implemented - running single operator without comparison" - ) - + test_result.return_code = -3 # Partial + # Run benchmarking for partial tests if enabled if config.bench: - self._run_benchmarking( + torch_time, infini_time = self._run_benchmarking( config, device_str, torch_implemented, @@ -601,8 +639,9 @@ def run_test(self, device, test_case, config): test_case.output_count, comparison_target, ) - return False, "partial" - + test_result.torch_time = torch_time + test_result.infini_time = infini_time + return test_result # ========================================================================== # MULTIPLE OUTPUTS COMPARISON LOGIC # ========================================================================== @@ -711,7 +750,7 @@ def run_test(self, device, test_case, config): # UNIFIED BENCHMARKING LOGIC # ========================================================================== if config.bench: - self._run_benchmarking( + torch_time, infini_time = self._run_benchmarking( config, device_str, True, @@ -723,9 +762,13 @@ def run_test(self, device, test_case, config): test_case.output_count, comparison_target, ) + test_result.torch_time = torch_time + test_result.infini_time = infini_time # Test passed successfully - return True, "passed" + test_result.success = True + test_result.return_code = 0 + return test_result def _run_benchmarking( self, @@ -742,8 +785,10 @@ def _run_benchmarking( ): """ Unified benchmarking logic with timing accumulation - """ + Returns: + tuple: (torch_time, infini_time) timing results + """ # Initialize timing variables torch_time = 0.0 infini_time = 0.0 @@ -809,3 +854,5 @@ def infini_op(): # Accumulate total times config._test_runner.benchmark_times["torch_total"] += torch_time config._test_runner.benchmark_times["infinicore_total"] += infini_time + + return torch_time, infini_time diff --git a/test/infinicore/framework/config.py b/test/infinicore/framework/config.py index 9c87774e4..ccbff88e6 100644 --- a/test/infinicore/framework/config.py +++ b/test/infinicore/framework/config.py @@ -100,8 +100,9 @@ def get_args(): # Device options using shared hardware info hardware_group = get_hardware_args_group(parser) + args, unknown = parser.parse_known_args() - return parser.parse_args() + return args def get_test_devices(args): diff --git a/test/infinicore/framework/runner.py b/test/infinicore/framework/runner.py index 0b86396b5..c0de4a7f9 100644 --- a/test/infinicore/framework/runner.py +++ b/test/infinicore/framework/runner.py @@ -21,7 +21,9 @@ def run(self): """Execute the complete test suite Returns: - bool: True if all tests passed or were skipped/partial, False if any tests failed + tuple: (success, test_runner) where: + success: bool indicating if all tests passed or were skipped/partial + test_runner: TestRunner instance with test results """ config = TestConfig( debug=self.args.debug, @@ -51,7 +53,7 @@ def run(self): # Both conditions must be True for overall success # - has_no_failures: no test failures during execution # - summary_passed: summary confirms no failures - return has_no_failures and summary_passed + return (has_no_failures and summary_passed), runner def run_and_exit(self): """Run tests and exit with appropriate status code @@ -60,5 +62,5 @@ def run_and_exit(self): 0: All tests passed or were skipped/partial (no failures) 1: One or more tests failed """ - success = self.run() + success, runner = self.run() sys.exit(0 if success else 1) diff --git a/test/infinicore/ops/elu.py b/test/infinicore/ops/elu.py index 48cd846c0..92d2072d3 100644 --- a/test/infinicore/ops/elu.py +++ b/test/infinicore/ops/elu.py @@ -133,9 +133,9 @@ def torch_operator(self, *args, **kwargs): """PyTorch ELU implementation""" return torch.nn.functional.elu(*args, **kwargs) - def infinicore_operator(self, x, alpha=1.0, out=None, **kwargs): - """InfiniCore ELU implementation""" - return None + # def infinicore_operator(self, x, alpha=1.0, out=None, **kwargs): + # """InfiniCore ELU implementation""" + # return None def main(): diff --git a/test/infinicore/ops/multi_margin_loss.py b/test/infinicore/ops/multi_margin_loss.py index d4620f109..cc8f0da5c 100644 --- a/test/infinicore/ops/multi_margin_loss.py +++ b/test/infinicore/ops/multi_margin_loss.py @@ -103,7 +103,7 @@ def parse_test_cases(): return test_cases -class MultiMarginLossOpTest(BaseOperatorTest): +class OpTest(BaseOperatorTest): """MultiMarginLoss operator test with device handling""" def __init__(self): @@ -116,9 +116,9 @@ def torch_operator(self, *args, **kwargs): """PyTorch multi_margin_loss implementation with device handling""" return F.multi_margin_loss(*args, **kwargs) - def infinicore_operator(self, *args, **kwargs): - """InfiniCore multi_margin_loss implementation""" - return None + # def infinicore_operator(self, *args, **kwargs): + # """InfiniCore multi_margin_loss implementation""" + # return None def main(): diff --git a/test/infinicore/run.py b/test/infinicore/run.py index 18a0b4801..ff642b8db 100644 --- a/test/infinicore/run.py +++ b/test/infinicore/run.py @@ -1,9 +1,10 @@ import os import sys -import subprocess import argparse from pathlib import Path -from typing import Dict, Tuple, List +import importlib.util + +from framework import get_hardware_args_group def find_ops_directory(location=None): @@ -58,9 +59,59 @@ def get_available_operators(ops_dir): return sorted(operators) -def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None): +def import_operator_test(test_file_path): """ - Run all operator test scripts in the ops directory. + Import an operator test module and return the test class instance. + + Args: + test_file_path: Path to the test file + + Returns: + tuple: (success, test_instance_or_error) + """ + try: + # Create a unique module name + module_name = f"op_test_{test_file_path.stem}" + + # Load the module from file + spec = importlib.util.spec_from_file_location(module_name, test_file_path) + if spec is None or spec.loader is None: + return False, f"Could not load module from {test_file_path}" + + module = importlib.util.module_from_spec(spec) + + # Add the module to sys.modules + sys.modules[module_name] = module + + # Execute the module + spec.loader.exec_module(module) + + # Find the test class (usually named OpTest) + test_class = None + for attr_name in dir(module): + attr = getattr(module, attr_name) + if ( + isinstance(attr, type) + and hasattr(attr, "__bases__") + and any("BaseOperatorTest" in str(base) for base in attr.__bases__) + ): + test_class = attr + break + + if test_class is None: + return False, f"No test class found in {test_file_path}" + + # Create an instance + test_instance = test_class() + return True, test_instance + + except Exception as e: + return False, f"Error importing {test_file_path}: {str(e)}" + + +def run_all_op_tests(ops_dir=None, specific_ops=None, bench=False, verbose=False): + """ + Run all operator test scripts in the ops directory using direct import. Args: ops_dir (str, optional): Path to the ops directory. If None, uses auto-detection. @@ -68,7 +119,7 @@ def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None): extra_args (list, optional): Extra command line arguments to pass to test scripts. Returns: - dict: Results dictionary with test names as keys and (success, return_code, stdout, stderr) as values. + dict: Results dictionary with test names as keys and (success, test_runner, stdout, stderr) as values. """ if ops_dir is None: ops_dir = find_ops_directory() @@ -122,11 +173,6 @@ def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None): results = {} - # Check if verbose mode is enabled - verbose_mode = extra_args and "--verbose" in extra_args - - # Check if bench mode is enabled for cumulative timing - bench_mode = extra_args and "--bench" in extra_args cumulative_timing = { "total_torch_time": 0.0, "total_infinicore_time": 0.0, @@ -137,117 +183,160 @@ def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None): test_name = test_file.stem try: - # Run the test script - use the absolute path and run from current directory - cmd = [sys.executable, str(test_file.absolute())] + # Import and run the test directly + success, test_instance_or_error = import_operator_test(test_file) + + if not success: + print(f"💥 {test_name}: ERROR - {test_instance_or_error}") + results[test_name] = { + "success": False, + "return_code": -1, + "torch_time": 0.0, + "infini_time": 0.0, + "error_message": test_instance_or_error, + "test_runner": None, + "stdout": "", + "stderr": test_instance_or_error, + } + continue + + # Get the test runner class from the module + test_module = sys.modules[f"op_test_{test_file.stem}"] + if not hasattr(test_module, "GenericTestRunner"): + print(f"💥 {test_name}: ERROR - No GenericTestRunner found") + results[test_name] = { + "success": False, + "return_code": -1, + "torch_time": 0.0, + "infini_time": 0.0, + "error_message": "No GenericTestRunner found", + "test_runner": None, + "stdout": "", + "stderr": "No GenericTestRunner found", + } + continue + + # Create and run the test runner + test_runner_class = test_module.GenericTestRunner + runner_instance = test_runner_class(test_instance_or_error.__class__) + + # Temporarily redirect stdout to capture output + from io import StringIO + + stdout_capture = StringIO() + stderr_capture = StringIO() + + old_stdout = sys.stdout + old_stderr = sys.stderr + sys.stdout = stdout_capture + sys.stderr = stderr_capture + + try: + # Run the test + test_success, test_runner = runner_instance.run() + + # Get captured output + stdout_output = stdout_capture.getvalue() + stderr_output = stderr_capture.getvalue() + + # Restore stdout/stderr + sys.stdout = old_stdout + sys.stderr = old_stderr + + # Print the captured output + if stdout_output: + print(stdout_output.rstrip()) + if stderr_output: + print("\nSTDERR:") + print(stderr_output.rstrip()) + + # Analyze test results + test_results = test_runner.get_test_results() if test_runner else [] + + # Determine overall test status + if test_success: + return_code = 0 + status_icon = "✅" + status_text = "PASSED" + else: + # Check if there are any failed tests + has_failures = any( + result.return_code == -1 for result in test_results + ) + has_partial = any( + result.return_code == -3 for result in test_results + ) + has_skipped = any( + result.return_code == -2 for result in test_results + ) + + if has_failures: + return_code = -1 + status_icon = "❌" + status_text = "FAILED" + elif has_partial: + return_code = -3 + status_icon = "⚠️" + status_text = "PARTIAL" + elif has_skipped: + return_code = -2 + status_icon = "⏭️" + status_text = "SKIPPED" + else: + return_code = -1 + status_icon = "❌" + status_text = "FAILED" + + # Calculate timing + torch_time = sum(result.torch_time for result in test_results) + infini_time = sum(result.infini_time for result in test_results) + + results[test_name] = { + "success": test_success, + "return_code": return_code, + "torch_time": torch_time, + "infini_time": infini_time, + "error_message": "", + "test_runner": test_runner, + "stdout": stdout_output, + "stderr": stderr_output, + } - # Add extra arguments if provided - if extra_args: - cmd.extend(extra_args) - - result = subprocess.run( - cmd, - capture_output=True, # Capture output to analyze - text=True, - ) - - # Analyze output to determine test status - stdout_lower = result.stdout.lower() - stderr_lower = result.stderr.lower() - - # Check for operator not implemented patterns - if ( - "all tests passed!" in stdout_lower - and "success rate: 100.0%" in stdout_lower - ): - success = True - returncode = 0 - elif "both operators not implemented" in stdout_lower: - # Both operators not implemented - skipped test - success = False # Not a failure, but skipped - returncode = -2 # Special code for skipped - elif "operator not implemented" in stdout_lower: - # One operator not implemented - partial test - success = False # Not fully successful - returncode = -3 # Special code for partial - else: - success = False - returncode = -1 - - results[test_name] = ( - success, - returncode, - result.stdout, - result.stderr, - ) - - # Print the output from the test script - print(f"\n{'='*60}") - print(f"TEST: {test_name}") - print(f"{'='*60}") - - if result.stdout: - print(result.stdout.rstrip()) - - if result.stderr: - print("\nSTDERR:") - print(result.stderr.rstrip()) - - # Enhanced status display - if returncode == -2: - status_icon = "⏭️" - status_text = "SKIPPED" - elif returncode == -3: - status_icon = "⚠️" - status_text = "PARTIAL" - elif success: - status_icon = "✅" - status_text = "PASSED" - else: - status_icon = "❌" - status_text = "FAILED" + print( + f"{status_icon} {test_name}: {status_text} (return code: {return_code})" + ) - print( - f"{status_icon} {test_name}: {status_text} (return code: {returncode})" - ) + # Extract benchmark timing if in bench mode + if bench and test_success and return_code == 0: + cumulative_timing["total_torch_time"] += torch_time + cumulative_timing["total_infinicore_time"] += infini_time + cumulative_timing["operators_tested"] += 1 - # Extract benchmark timing if in bench mode - if bench_mode and success: - # Look for benchmark summary in stdout - lines = result.stdout.split("\n") - torch_time = 0.0 - infini_time = 0.0 - - for line in lines: - if "PyTorch Total Time:" in line: - try: - # Extract time value (e.g., "PyTorch Total Time: 123.456 ms") - torch_time = ( - float(line.split(":")[1].strip().split()[0]) / 1000.0 - ) # Convert to seconds - except: - pass - elif "InfiniCore Total Time:" in line: - try: - infini_time = ( - float(line.split(":")[1].strip().split()[0]) / 1000.0 - ) # Convert to seconds - except: - pass - - cumulative_timing["total_torch_time"] += torch_time - cumulative_timing["total_infinicore_time"] += infini_time - cumulative_timing["operators_tested"] += 1 + except Exception as e: + # Restore stdout/stderr in case of exception + sys.stdout = old_stdout + sys.stderr = old_stderr + raise e # In verbose mode, stop execution on first failure - if verbose_mode and not success and returncode not in [-2, -3]: + if verbose and not test_success and return_code != 0: break except Exception as e: print(f"💥 {test_name}: ERROR - {str(e)}") - results[test_name] = (False, -1, "", str(e)) + results[test_name] = { + "success": False, + "return_code": -1, + "torch_time": 0.0, + "infini_time": 0.0, + "error_message": str(e), + "test_runner": None, + "stdout": "", + "stderr": str(e), + } # In verbose mode, stop execution on any exception - if verbose_mode: + if verbose: print(f"\n{'!'*60}") print( f"VERBOSE MODE: Stopping execution due to exception in {test_name}" @@ -259,7 +348,7 @@ def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None): def print_summary( - results, verbose_mode=False, total_expected_tests=0, cumulative_timing=None + results, verbose=False, total_expected_tests=0, cumulative_timing=None ): """Print a comprehensive summary of test results including benchmark data.""" print(f"\n{'='*80}") @@ -280,14 +369,15 @@ def print_summary( skipped_operators = [] # Store skipped operator names partial_operators = [] # Store partial operator names - for test_name, (success, returncode, stdout, stderr) in results.items(): - if success: + for test_name, result_data in results.items(): + return_code = result_data["return_code"] + if return_code == 0: passed += 1 passed_operators.append(test_name) - elif returncode == -2: # Special code for skipped tests + elif return_code == -2: # Special code for skipped tests skipped += 1 skipped_operators.append(test_name) - elif returncode == -3: # Special code for partial tests + elif return_code == -3: # Special code for partial tests partial += 1 partial_operators.append(test_name) else: @@ -316,10 +406,10 @@ def print_summary( print("BENCHMARK SUMMARY:") print(f" Operators Tested: {cumulative_timing['operators_tested']}") print( - f" Total PyTorch Time: {cumulative_timing['total_torch_time'] * 1000:.3f} ms" + f" PyTorch Total Time: {cumulative_timing['total_torch_time'] * 1000:12.3f} ms" ) print( - f" Total InfiniCore Time: {cumulative_timing['total_infinicore_time'] * 1000:.3f} ms" + f" InfiniCore Total Time: {cumulative_timing['total_infinicore_time'] * 1000:12.3f} ms" ) print(f"{'-'*40}") @@ -361,7 +451,7 @@ def print_summary( success_rate = passed / executed_tests * 100 print(f"\nSuccess rate: {success_rate:.1f}%") - if verbose_mode and total < total_expected_tests: + if verbose and total < total_expected_tests: print(f"\n💡 Verbose mode: Execution stopped after first failure") print(f" {total_expected_tests - total} tests were not executed") @@ -505,17 +595,18 @@ def main(): parser.add_argument( "--verbose", action="store_true", - help="Enable verbose mode to stop on first error with full traceback (passed to individual tests)", + help="Enable verbose mode to stop on first error with full traceback", + ) + parser.add_argument( + "--bench", + action="store_true", + help="Enable bench mode to show performance data", ) - from framework import get_hardware_args_group - - if "-h" in sys.argv or "--help" in sys.argv: - get_hardware_args_group(parser) + get_hardware_args_group(parser) # Parse known args first, leave the rest for the test scripts args, unknown_args = parser.parse_known_args() - get_hardware_args_group(parser) # Handle list command if args.list: @@ -536,10 +627,6 @@ def main(): print(f"Error: Ops directory '{ops_dir}' does not exist.") sys.exit(1) - # Add verbose flag to extra arguments if specified - if args.verbose and "--verbose" not in unknown_args: - unknown_args.append("--verbose") - # Show what extra arguments will be passed if unknown_args: print(f"Passing extra arguments to test scripts: {unknown_args}") @@ -584,7 +671,8 @@ def main(): results, cumulative_timing = run_all_op_tests( ops_dir=ops_dir, specific_ops=args.ops, - extra_args=unknown_args, + bench=args.bench, + verbose=args.verbose, ) # Print summary and exit with appropriate code @@ -594,7 +682,7 @@ def main(): # Check if there were any tests with missing implementations has_missing_implementations = any( - returncode in [-2, -3] for _, (_, returncode, _, _) in results.items() + result_data["return_code"] in [-2, -3] for result_data in results.values() ) if all_passed and has_missing_implementations: @@ -607,8 +695,8 @@ def main(): ) failed_ops = [ name - for name, (success, _, _, _) in results.items() - if not success and name in results + for name, result_data in results.items() + if result_data["return_code"] == -1 ] for op in failed_ops[:3]: # Show first 3 failed operators print(f" python {ops_dir / (op + '.py')} --verbose")