diff --git a/README.md b/README.md index 10ec103ae..7bef3ba08 100644 --- a/README.md +++ b/README.md @@ -182,9 +182,9 @@ pip install . -e ```bash # 测试单算子 -python test/infinicore/ops/[operator].py [--bench | --debug] [--cpu | --nvidia | --cambricon | --ascend | --iluvatar | --metax | --moore | --kunlun | --Hygon] +python test/infinicore/ops/[operator].py [--bench | --debug | --verbose] [--cpu | --nvidia | --cambricon | --ascend | --iluvatar | --metax | --moore | --kunlun | --Hygon] # 测试全部算子 -python test/infinicore/run.py [--bench | --debug] [--cpu | --nvidia | --cambricon | --ascend | --iluvatar | --metax | --moore | --kunlun] +python test/infinicore/run.py [--bench | --debug | --verbose] [--cpu | --nvidia | --cambricon | --ascend | --iluvatar | --metax | --moore | --kunlun] ``` 使用 -h 查看更多参数。 diff --git a/test/infinicore/framework/base.py b/test/infinicore/framework/base.py index 95d4a55f2..ebb889244 100644 --- a/test/infinicore/framework/base.py +++ b/test/infinicore/framework/base.py @@ -1,8 +1,9 @@ import torch import infinicore - +import traceback +from dataclasses import dataclass from abc import ABC, abstractmethod -from typing import List, Dict, Any, Optional +from typing import List, Dict, Any, Optional, Tuple from .datatypes import to_torch_dtype, to_infinicore_dtype from .devices import InfiniDeviceNames, torch_device_map @@ -11,11 +12,21 @@ create_test_comparator, infinicore_tensor_from_torch, profile_operation, - synchronize_device, - convert_infinicore_to_torch, ) +@dataclass +class TestResult: + """Test result data structure""" + success: bool + return_code: int # 0: success, -1: failure, -2: skipped, -3: partial + torch_time: float = 0.0 + infini_time: float = 0.0 + error_message: str = "" + test_case: Any = None + device: Any = None + + class TestCase: """Test case with all configuration included""" @@ -24,11 +35,11 @@ def __init__( inputs, kwargs=None, output_spec=None, + output_specs=None, comparison_target=None, description="", tolerance=None, output_count=1, - output_specs=None, ): """ Initialize a test case with complete configuration @@ -216,14 +227,19 @@ def __str__(self): class TestConfig: """Test configuration""" - def __init__(self, debug=False, bench=False, num_prerun=10, num_iterations=1000): + def __init__( + self, + debug=False, + bench=False, + num_prerun=10, + num_iterations=1000, + verbose=False, + ): self.debug = debug self.bench = bench self.num_prerun = num_prerun self.num_iterations = num_iterations - - -# In base.py - update the TestRunner class + self.verbose = verbose class TestRunner: @@ -238,6 +254,14 @@ def __init__(self, test_cases, test_config): self.passed_tests = ( [] ) # Track passed tests (both operators implemented and passed) + # Add benchmark timing statistics + self.benchmark_times = { + "torch_total": 0.0, + "infinicore_total": 0.0, + "per_test_case": {}, # Store timing per test case + } + # Store test results + self.test_results = [] def run_tests(self, devices, test_func, test_type="Test"): """ @@ -260,30 +284,30 @@ def run_tests(self, devices, test_func, test_type="Test"): try: print(f"{test_case}") - # Execute test and get result status - success, status = test_func(device, test_case, self.config) + # Execute test and get TestResult object + test_result = test_func(device, test_case, self.config) + self.test_results.append(test_result) - # Handle different test statuses - if status == "passed": + # Handle different test statuses based on return_code + if test_result.return_code == 0: # Success self.passed_tests.append( f"{test_case} - {InfiniDeviceNames[device]}" ) print(f"\033[92m✓\033[0m Passed") - elif status == "skipped": - # Test was skipped due to both operators not being implemented + elif test_result.return_code == -1: + fail_msg = f"{test_case} - {InfiniDeviceNames[device]} - Test terminated in verbose mode." + self.failed_tests.append(fail_msg) + elif test_result.return_code == -2: # Skipped skip_msg = f"{test_case} - {InfiniDeviceNames[device]} - Both operators not implemented" self.skipped_tests.append(skip_msg) - print( - f"\033[93m⚠\033[0m Skipped - both operators not implemented" - ) - elif status == "partial": - # Test was partially executed (one operator not implemented) + print(f"\033[93m⚠\033[0m Both operators not implemented - test skipped") + elif test_result.return_code == -3: # Partial partial_msg = f"{test_case} - {InfiniDeviceNames[device]} - One operator not implemented" self.partial_tests.append(partial_msg) - print( - f"\033[93m⚠\033[0m Partial - one operator not implemented" - ) - # Failed tests are handled in the exception handler below + print(f"\033[93m⚠\033[0m One operator not implemented - running single operator without comparison") + + if self.config.verbose and test_result.return_code != 0: + return False except Exception as e: error_msg = ( @@ -291,11 +315,25 @@ def run_tests(self, devices, test_func, test_type="Test"): ) print(f"\033[91m✗\033[0m {error_msg}") self.failed_tests.append(error_msg) + + # Create a failed TestResult + failed_result = TestResult( + success=False, + return_code=-1, + error_message=str(e), + test_case=test_case, + device=device + ) + self.test_results.append(failed_result) + # In verbose mode, print full traceback and stop execution + if self.config.verbose: + traceback.print_exc() + return False # Stop test execution immediately + if self.config.debug: raise - # Return True if no tests failed (skipped/partial tests don't count as failures) - return len(self.failed_tests) == 0 + return len(self.failed_tests) == 0 and len(self.skipped_tests) == 0 and len(self.partial_tests) == 0 def print_summary(self): """ @@ -312,34 +350,16 @@ def print_summary(self): print(f"\n{'='*60}") print("TEST SUMMARY") - print(f"{'='*60}") print(f"Total tests: {total_tests}") print(f"\033[92mPassed: {passed_count}\033[0m") - # Display partial tests (one operator not implemented) - if self.partial_tests: - print( - f"\033[93mPartial (one operator not implemented): {partial_count}\033[0m" - ) - for test in self.partial_tests: - print(f" - {test}") - - # Display skipped tests (both operators not implemented) - if self.skipped_tests: - print( - f"\033[93mSkipped (both operators not implemented): {skipped_count}\033[0m" - ) - for test in self.skipped_tests: - print(f" - {test}") - + result = True # Display failed tests if self.failed_tests: print(f"\033[91mFailed: {failed_count}\033[0m") - for failure in self.failed_tests: - print(f" - {failure}") # Return False only if there are actual test failures - return False + result = False else: # Calculate success rate based on actual executed tests executed_tests = passed_count + partial_count + failed_count @@ -352,10 +372,41 @@ def print_summary(self): print( f"\n\033[93mTests completed with some implementations missing\033[0m" ) - return True # Skipped/partial tests don't count as failures else: print(f"\n\033[92mAll tests passed!\033[0m") - return True + + # Print benchmark summary if benchmarking was enabled + if self.config.bench and ( + self.benchmark_times["torch_total"] > 0 + or self.benchmark_times["infinicore_total"] > 0 + ): + self._print_benchmark_summary() + + print(f"{'='*60}") + return result + + def _print_benchmark_summary(self): + """Print benchmark timing summary""" + print(f"{'-'*60}") + print("BENCHMARK SUMMARY") + + torch_total = self.benchmark_times["torch_total"] + infinicore_total = self.benchmark_times["infinicore_total"] + + if torch_total > 0: + print(f"PyTorch Total Time: {torch_total * 1000:.3f} ms") + if infinicore_total > 0: + print(f"InfiniCore Total Time: {infinicore_total * 1000:.3f} ms") + + if torch_total > 0 and infinicore_total > 0: + speedup = ( + torch_total / infinicore_total if infinicore_total > 0 else float("inf") + ) + print(f"Speedup (PyTorch/InfiniCore): {speedup:.2f}x") + + def get_test_results(self): + """Get all test results""" + return self.test_results class BaseOperatorTest(ABC): @@ -460,11 +511,17 @@ def run_test(self, device, test_case, config): config: Test configuration Returns: - tuple: (success, status) where: - success: bool indicating if test passed - status: str describing test status ("passed", "skipped", "partial") + TestResult: Test result object containing status and timing information """ device_str = torch_device_map[device] + + # Initialize test result + test_result = TestResult( + success=False, + return_code=-1, # Default to failure + test_case=test_case, + device=device + ) # Prepare inputs and kwargs with actual tensors inputs, kwargs = self.prepare_inputs_and_kwargs(test_case, device) @@ -537,6 +594,12 @@ def run_test(self, device, test_case, config): if torch_result is None: torch_implemented = False except NotImplementedError: + if config.verbose: + traceback.print_exc() + # Return test result immediately in verbose mode + test_result.return_code = -1 + test_result.error_message = "torch_operator not implemented" + return test_result torch_implemented = False torch_result = None @@ -545,25 +608,26 @@ def run_test(self, device, test_case, config): if infini_result is None: infini_implemented = False except NotImplementedError: + if config.verbose: + traceback.print_exc() + # Return test result immediately in verbose mode + test_result.return_code = -1 + test_result.error_message = "infinicore_operator not implemented" + return test_result infini_implemented = False infini_result = None # Skip if neither operator is implemented if not torch_implemented and not infini_implemented: - print(f"\033[93m⚠\033[0m Both operators not implemented - test skipped") - return False, "skipped" + test_result.return_code = -2 # Skipped + return test_result # Single operator execution without comparison if not torch_implemented or not infini_implemented: - missing_op = ( - "torch_operator" if not torch_implemented else "infinicore_operator" - ) - print( - f"\033[93m⚠\033[0m {missing_op} not implemented - running single operator without comparison" - ) - + test_result.return_code = -3 # Partial + # Run benchmarking for partial tests if enabled if config.bench: - self._run_benchmarking( + torch_time, infini_time = self._run_benchmarking( config, device_str, torch_implemented, @@ -575,8 +639,9 @@ def run_test(self, device, test_case, config): test_case.output_count, comparison_target, ) - return False, "partial" - + test_result.torch_time = torch_time + test_result.infini_time = infini_time + return test_result # ========================================================================== # MULTIPLE OUTPUTS COMPARISON LOGIC # ========================================================================== @@ -685,7 +750,7 @@ def run_test(self, device, test_case, config): # UNIFIED BENCHMARKING LOGIC # ========================================================================== if config.bench: - self._run_benchmarking( + torch_time, infini_time = self._run_benchmarking( config, device_str, True, @@ -697,9 +762,13 @@ def run_test(self, device, test_case, config): test_case.output_count, comparison_target, ) + test_result.torch_time = torch_time + test_result.infini_time = infini_time # Test passed successfully - return True, "passed" + test_result.success = True + test_result.return_code = 0 + return test_result def _run_benchmarking( self, @@ -715,8 +784,15 @@ def _run_benchmarking( comparison_target, ): """ - Unified benchmarking logic + Unified benchmarking logic with timing accumulation + + Returns: + tuple: (torch_time, infini_time) timing results """ + # Initialize timing variables + torch_time = 0.0 + infini_time = 0.0 + if torch_implemented: if output_count > 1: # For multiple outputs, just call the operator @@ -739,12 +815,13 @@ def torch_op(): else inputs[comparison_target] ) - profile_operation( + torch_time = profile_operation( "PyTorch ", torch_op, device_str, config.num_prerun, config.num_iterations, + total=True, ) if infini_implemented: @@ -763,10 +840,19 @@ def infini_op(): else infini_inputs[comparison_target] ) - profile_operation( + infini_time = profile_operation( "InfiniCore", infini_op, device_str, config.num_prerun, config.num_iterations, + total=True, ) + + # Store timing information in the test runner + if hasattr(config, "_test_runner") and config._test_runner: + # Accumulate total times + config._test_runner.benchmark_times["torch_total"] += torch_time + config._test_runner.benchmark_times["infinicore_total"] += infini_time + + return torch_time, infini_time diff --git a/test/infinicore/framework/config.py b/test/infinicore/framework/config.py index 8b09ea90c..ccbff88e6 100644 --- a/test/infinicore/framework/config.py +++ b/test/infinicore/framework/config.py @@ -1,7 +1,6 @@ import argparse from .devices import InfiniDeviceEnum -# hardware_info.py """ Shared hardware platform information for the InfiniCore testing framework """ @@ -61,6 +60,9 @@ def get_args(): # Run with debug mode on multiple devices python test_operator.py --cpu --nvidia --debug + # Run with verbose mode to stop on first error with full traceback + python test_operator.py --cpu --nvidia --verbose + # Run performance profiling with custom iterations python test_operator.py --nvidia --bench --num_prerun 50 --num_iterations 5000 @@ -90,11 +92,17 @@ def get_args(): action="store_true", help="Enable debug mode for detailed tensor comparison", ) + parser.add_argument( + "--verbose", + action="store_true", + help="Enable verbose mode to stop on first error with full traceback", + ) # Device options using shared hardware info hardware_group = get_hardware_args_group(parser) + args, unknown = parser.parse_known_args() - return parser.parse_args() + return args def get_test_devices(args): diff --git a/test/infinicore/framework/runner.py b/test/infinicore/framework/runner.py index 336686824..c0de4a7f9 100644 --- a/test/infinicore/framework/runner.py +++ b/test/infinicore/framework/runner.py @@ -21,16 +21,23 @@ def run(self): """Execute the complete test suite Returns: - bool: True if all tests passed or were skipped/partial, False if any tests failed + tuple: (success, test_runner) where: + success: bool indicating if all tests passed or were skipped/partial + test_runner: TestRunner instance with test results """ config = TestConfig( debug=self.args.debug, bench=self.args.bench, num_prerun=self.args.num_prerun, num_iterations=self.args.num_iterations, + verbose=self.args.verbose, # Pass verbose flag to TestConfig ) runner = TestRunner(self.operator_test.test_cases, config) + + # Pass the test runner instance to config for benchmark timing accumulation + config._test_runner = runner + devices = get_test_devices(self.args) # Run unified tests - returns True if no tests failed @@ -46,7 +53,7 @@ def run(self): # Both conditions must be True for overall success # - has_no_failures: no test failures during execution # - summary_passed: summary confirms no failures - return has_no_failures and summary_passed + return (has_no_failures and summary_passed), runner def run_and_exit(self): """Run tests and exit with appropriate status code @@ -55,5 +62,5 @@ def run_and_exit(self): 0: All tests passed or were skipped/partial (no failures) 1: One or more tests failed """ - success = self.run() + success, runner = self.run() sys.exit(0 if success else 1) diff --git a/test/infinicore/framework/utils.py b/test/infinicore/framework/utils.py index 2448e3857..051a30321 100644 --- a/test/infinicore/framework/utils.py +++ b/test/infinicore/framework/utils.py @@ -22,10 +22,12 @@ def timed_op(func, num_iterations, device): for _ in range(num_iterations): func() synchronize_device(device) - return (time.time() - start) / num_iterations + return time.time() - start -def profile_operation(desc, func, torch_device, num_prerun, num_iterations): +def profile_operation( + desc, func, torch_device, num_prerun, num_iterations, total=False +): """ Performance profiling workflow """ @@ -35,7 +37,11 @@ def profile_operation(desc, func, torch_device, num_prerun, num_iterations): # Timed execution elapsed = timed_op(lambda: func(), num_iterations, torch_device) - print(f" {desc} time: {elapsed * 1000 :6f} ms") + print(f" {desc} time: {elapsed / num_iterations * 1000 :6f} ms") + if total: + return elapsed + else: + return elapsed / num_iterations def debug(actual, desired, atol=0, rtol=1e-2, equal_nan=False, verbose=True): diff --git a/test/infinicore/ops/elu.py b/test/infinicore/ops/elu.py index 48cd846c0..92d2072d3 100644 --- a/test/infinicore/ops/elu.py +++ b/test/infinicore/ops/elu.py @@ -133,9 +133,9 @@ def torch_operator(self, *args, **kwargs): """PyTorch ELU implementation""" return torch.nn.functional.elu(*args, **kwargs) - def infinicore_operator(self, x, alpha=1.0, out=None, **kwargs): - """InfiniCore ELU implementation""" - return None + # def infinicore_operator(self, x, alpha=1.0, out=None, **kwargs): + # """InfiniCore ELU implementation""" + # return None def main(): diff --git a/test/infinicore/ops/multi_margin_loss.py b/test/infinicore/ops/multi_margin_loss.py index d4620f109..cc8f0da5c 100644 --- a/test/infinicore/ops/multi_margin_loss.py +++ b/test/infinicore/ops/multi_margin_loss.py @@ -103,7 +103,7 @@ def parse_test_cases(): return test_cases -class MultiMarginLossOpTest(BaseOperatorTest): +class OpTest(BaseOperatorTest): """MultiMarginLoss operator test with device handling""" def __init__(self): @@ -116,9 +116,9 @@ def torch_operator(self, *args, **kwargs): """PyTorch multi_margin_loss implementation with device handling""" return F.multi_margin_loss(*args, **kwargs) - def infinicore_operator(self, *args, **kwargs): - """InfiniCore multi_margin_loss implementation""" - return None + # def infinicore_operator(self, *args, **kwargs): + # """InfiniCore multi_margin_loss implementation""" + # return None def main(): diff --git a/test/infinicore/run.py b/test/infinicore/run.py index 32d52bfc6..ff642b8db 100644 --- a/test/infinicore/run.py +++ b/test/infinicore/run.py @@ -1,9 +1,10 @@ import os import sys -import subprocess import argparse from pathlib import Path -from typing import Dict, Tuple, List +import importlib.util + +from framework import get_hardware_args_group def find_ops_directory(location=None): @@ -58,9 +59,59 @@ def get_available_operators(ops_dir): return sorted(operators) -def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None): +def import_operator_test(test_file_path): + """ + Import an operator test module and return the test class instance. + + Args: + test_file_path: Path to the test file + + Returns: + tuple: (success, test_instance_or_error) + """ + try: + # Create a unique module name + module_name = f"op_test_{test_file_path.stem}" + + # Load the module from file + spec = importlib.util.spec_from_file_location(module_name, test_file_path) + if spec is None or spec.loader is None: + return False, f"Could not load module from {test_file_path}" + + module = importlib.util.module_from_spec(spec) + + # Add the module to sys.modules + sys.modules[module_name] = module + + # Execute the module + spec.loader.exec_module(module) + + # Find the test class (usually named OpTest) + test_class = None + for attr_name in dir(module): + attr = getattr(module, attr_name) + if ( + isinstance(attr, type) + and hasattr(attr, "__bases__") + and any("BaseOperatorTest" in str(base) for base in attr.__bases__) + ): + test_class = attr + break + + if test_class is None: + return False, f"No test class found in {test_file_path}" + + # Create an instance + test_instance = test_class() + return True, test_instance + + except Exception as e: + return False, f"Error importing {test_file_path}: {str(e)}" + + +def run_all_op_tests(ops_dir=None, specific_ops=None, bench=False, verbose=False): """ - Run all operator test scripts in the ops directory. + Run all operator test scripts in the ops directory using direct import. Args: ops_dir (str, optional): Path to the ops directory. If None, uses auto-detection. @@ -68,7 +119,7 @@ def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None): extra_args (list, optional): Extra command line arguments to pass to test scripts. Returns: - dict: Results dictionary with test names as keys and (success, return_code, stdout, stderr) as values. + dict: Results dictionary with test names as keys and (success, test_runner, stdout, stderr) as values. """ if ops_dir is None: ops_dir = find_ops_directory() @@ -122,92 +173,184 @@ def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None): results = {} + cumulative_timing = { + "total_torch_time": 0.0, + "total_infinicore_time": 0.0, + "operators_tested": 0, + } + for test_file in operator_test_files: test_name = test_file.stem try: - # Run the test script - use the absolute path and run from current directory - cmd = [sys.executable, str(test_file.absolute())] - - # Add extra arguments if provided - if extra_args: - cmd.extend(extra_args) - - result = subprocess.run( - cmd, - capture_output=True, # Capture output to analyze - text=True, - ) - - # Analyze output to determine test status - stdout_lower = result.stdout.lower() - stderr_lower = result.stderr.lower() - - # Check for operator not implemented patterns - if ( - "all tests passed!" in stdout_lower - and "success rate: 100.0%" in stdout_lower - ): - success = True - returncode = 0 - elif "both operators not implemented" in stdout_lower: - # Both operators not implemented - skipped test - success = False # Not a failure, but skipped - returncode = -2 # Special code for skipped - elif "one operator not implemented" in stdout_lower: - # One operator not implemented - partial test - success = False # Not fully successful - returncode = -3 # Special code for partial - else: - success = False - returncode = -1 - - results[test_name] = ( - success, - returncode, - result.stdout, - result.stderr, - ) - - # Print the output from the test script - print(f"\n{'='*60}") - print(f"TEST: {test_name}") - print(f"{'='*60}") - - if result.stdout: - print(result.stdout.rstrip()) - - if result.stderr: - print("\nSTDERR:") - print(result.stderr.rstrip()) - - # Enhanced status display - if returncode == -2: - status_icon = "⏭️" - status_text = "SKIPPED" - elif returncode == -3: - status_icon = "⚠️" - status_text = "PARTIAL" - elif success: - status_icon = "✅" - status_text = "PASSED" - else: - status_icon = "❌" - status_text = "FAILED" - - print( - f"{status_icon} {test_name}: {status_text} (return code: {returncode})" - ) + # Import and run the test directly + success, test_instance_or_error = import_operator_test(test_file) + + if not success: + print(f"💥 {test_name}: ERROR - {test_instance_or_error}") + results[test_name] = { + "success": False, + "return_code": -1, + "torch_time": 0.0, + "infini_time": 0.0, + "error_message": test_instance_or_error, + "test_runner": None, + "stdout": "", + "stderr": test_instance_or_error, + } + continue + + # Get the test runner class from the module + test_module = sys.modules[f"op_test_{test_file.stem}"] + if not hasattr(test_module, "GenericTestRunner"): + print(f"💥 {test_name}: ERROR - No GenericTestRunner found") + results[test_name] = { + "success": False, + "return_code": -1, + "torch_time": 0.0, + "infini_time": 0.0, + "error_message": "No GenericTestRunner found", + "test_runner": None, + "stdout": "", + "stderr": "No GenericTestRunner found", + } + continue + + # Create and run the test runner + test_runner_class = test_module.GenericTestRunner + runner_instance = test_runner_class(test_instance_or_error.__class__) + + # Temporarily redirect stdout to capture output + from io import StringIO + + stdout_capture = StringIO() + stderr_capture = StringIO() + + old_stdout = sys.stdout + old_stderr = sys.stderr + sys.stdout = stdout_capture + sys.stderr = stderr_capture + + try: + # Run the test + test_success, test_runner = runner_instance.run() + + # Get captured output + stdout_output = stdout_capture.getvalue() + stderr_output = stderr_capture.getvalue() + + # Restore stdout/stderr + sys.stdout = old_stdout + sys.stderr = old_stderr + + # Print the captured output + if stdout_output: + print(stdout_output.rstrip()) + if stderr_output: + print("\nSTDERR:") + print(stderr_output.rstrip()) + + # Analyze test results + test_results = test_runner.get_test_results() if test_runner else [] + + # Determine overall test status + if test_success: + return_code = 0 + status_icon = "✅" + status_text = "PASSED" + else: + # Check if there are any failed tests + has_failures = any( + result.return_code == -1 for result in test_results + ) + has_partial = any( + result.return_code == -3 for result in test_results + ) + has_skipped = any( + result.return_code == -2 for result in test_results + ) + + if has_failures: + return_code = -1 + status_icon = "❌" + status_text = "FAILED" + elif has_partial: + return_code = -3 + status_icon = "⚠️" + status_text = "PARTIAL" + elif has_skipped: + return_code = -2 + status_icon = "⏭️" + status_text = "SKIPPED" + else: + return_code = -1 + status_icon = "❌" + status_text = "FAILED" + + # Calculate timing + torch_time = sum(result.torch_time for result in test_results) + infini_time = sum(result.infini_time for result in test_results) + + results[test_name] = { + "success": test_success, + "return_code": return_code, + "torch_time": torch_time, + "infini_time": infini_time, + "error_message": "", + "test_runner": test_runner, + "stdout": stdout_output, + "stderr": stderr_output, + } + + print( + f"{status_icon} {test_name}: {status_text} (return code: {return_code})" + ) + + # Extract benchmark timing if in bench mode + if bench and test_success and return_code == 0: + cumulative_timing["total_torch_time"] += torch_time + cumulative_timing["total_infinicore_time"] += infini_time + cumulative_timing["operators_tested"] += 1 + + except Exception as e: + # Restore stdout/stderr in case of exception + sys.stdout = old_stdout + sys.stderr = old_stderr + raise e + + # In verbose mode, stop execution on first failure + if verbose and not test_success and return_code != 0: + break except Exception as e: print(f"💥 {test_name}: ERROR - {str(e)}") - results[test_name] = (False, -1, "", str(e)) - - return results - - -def print_summary(results): - """Print a comprehensive summary of test results.""" + results[test_name] = { + "success": False, + "return_code": -1, + "torch_time": 0.0, + "infini_time": 0.0, + "error_message": str(e), + "test_runner": None, + "stdout": "", + "stderr": str(e), + } + + # In verbose mode, stop execution on any exception + if verbose: + print(f"\n{'!'*60}") + print( + f"VERBOSE MODE: Stopping execution due to exception in {test_name}" + ) + print(f"{'!'*60}") + break + + return results, cumulative_timing + + +def print_summary( + results, verbose=False, total_expected_tests=0, cumulative_timing=None +): + """Print a comprehensive summary of test results including benchmark data.""" print(f"\n{'='*80}") print("CUMULATIVE TEST SUMMARY") print(f"{'='*80}") @@ -226,14 +369,15 @@ def print_summary(results): skipped_operators = [] # Store skipped operator names partial_operators = [] # Store partial operator names - for test_name, (success, returncode, stdout, stderr) in results.items(): - if success: + for test_name, result_data in results.items(): + return_code = result_data["return_code"] + if return_code == 0: passed += 1 passed_operators.append(test_name) - elif returncode == -2: # Special code for skipped tests + elif return_code == -2: # Special code for skipped tests skipped += 1 skipped_operators.append(test_name) - elif returncode == -3: # Special code for partial tests + elif return_code == -3: # Special code for partial tests partial += 1 partial_operators.append(test_name) else: @@ -242,7 +386,11 @@ def print_summary(results): total = len(results) - print(f"Total tests: {total}") + print(f"Total tests run: {total}") + if total_expected_tests > 0 and total < total_expected_tests: + print(f"Total tests expected: {total_expected_tests}") + print(f"Tests not executed: {total_expected_tests - total}") + print(f"Passed: {passed}") print(f"Failed: {failed}") @@ -252,6 +400,19 @@ def print_summary(results): if partial > 0: print(f"Partial: {partial}") + # Print benchmark summary if cumulative_timing data is available + if cumulative_timing and cumulative_timing["operators_tested"] > 0: + print(f"{'-'*40}") + print("BENCHMARK SUMMARY:") + print(f" Operators Tested: {cumulative_timing['operators_tested']}") + print( + f" PyTorch Total Time: {cumulative_timing['total_torch_time'] * 1000:12.3f} ms" + ) + print( + f" InfiniCore Total Time: {cumulative_timing['total_infinicore_time'] * 1000:12.3f} ms" + ) + print(f"{'-'*40}") + # Display passed operators if passed_operators: print(f"\n✅ PASSED OPERATORS ({len(passed_operators)}):") @@ -284,12 +445,16 @@ def print_summary(results): print(" " + ", ".join(line_ops)) if total > 0: - # Calculate success rate based on executed tests only + # Calculate success rate based on actual executed tests executed_tests = passed + failed + partial if executed_tests > 0: success_rate = passed / executed_tests * 100 print(f"\nSuccess rate: {success_rate:.1f}%") + if verbose and total < total_expected_tests: + print(f"\n💡 Verbose mode: Execution stopped after first failure") + print(f" {total_expected_tests - total} tests were not executed") + if failed == 0: if skipped > 0 or partial > 0: print(f"\n⚠️ Tests completed with some operators not implemented") @@ -358,6 +523,14 @@ def generate_help_epilog(ops_dir): epilog_parts.append(" # Run with debug mode on multiple devices") epilog_parts.append(" python run.py --cpu --nvidia --debug") epilog_parts.append("") + epilog_parts.append( + " # Run with verbose mode to stop on first error with full traceback" + ) + epilog_parts.append(" python run.py --cpu --nvidia --verbose") + epilog_parts.append("") + epilog_parts.append(" # Run with benchmarking to get cumulative timing") + epilog_parts.append(" python run.py --cpu --bench") + epilog_parts.append("") epilog_parts.append(" # List available tests without running") epilog_parts.append(" python run.py --list") epilog_parts.append("") @@ -384,7 +557,13 @@ def generate_help_epilog(ops_dir): " - Operators are automatically discovered from the ops directory" ) epilog_parts.append( - " - --bench option is disabled in batch mode (run individual tests for benchmarking)" + " - --bench mode now shows cumulative timing across all operators" + ) + epilog_parts.append( + " - --verbose mode stops execution on first error and shows full traceback" + ) + epilog_parts.append( + " - In verbose mode, subsequent tests are skipped after first failure" ) return "\n".join(epilog_parts) @@ -413,15 +592,21 @@ def main(): action="store_true", help="List all available test files without running them", ) + parser.add_argument( + "--verbose", + action="store_true", + help="Enable verbose mode to stop on first error with full traceback", + ) + parser.add_argument( + "--bench", + action="store_true", + help="Enable bench mode to show performance data", + ) - from framework import get_hardware_args_group - - if "-h" in sys.argv or "--help" in sys.argv: - get_hardware_args_group(parser) + get_hardware_args_group(parser) # Parse known args first, leave the rest for the test scripts args, unknown_args = parser.parse_known_args() - get_hardware_args_group(parser) # Handle list command if args.list: @@ -453,6 +638,9 @@ def main(): print(f"Operating directory: {ops_dir}") print(f"Available operators: {len(available_operators)}") + if args.verbose: + print(f"Verbose mode: ENABLED (will stop on first error with full traceback)") + if args.ops: # Validate requested operators valid_ops = [] @@ -469,32 +657,50 @@ def main(): if valid_ops: print(f"Testing operators: {', '.join(valid_ops)}") + total_expected_tests = len(valid_ops) else: print("No valid operators specified. Running all available tests.") + total_expected_tests = len(available_operators) else: print("Testing all available operators") + total_expected_tests = len(available_operators) print() # Run all tests - results = run_all_op_tests( + results, cumulative_timing = run_all_op_tests( ops_dir=ops_dir, specific_ops=args.ops, - extra_args=unknown_args, + bench=args.bench, + verbose=args.verbose, ) # Print summary and exit with appropriate code - all_passed = print_summary(results) + all_passed = print_summary( + results, args.verbose, total_expected_tests, cumulative_timing + ) # Check if there were any tests with missing implementations has_missing_implementations = any( - returncode in [-2, -3] for _, (_, returncode, _, _) in results.items() + result_data["return_code"] in [-2, -3] for result_data in results.values() ) if all_passed and has_missing_implementations: print(f"\n⚠️ Note: Some operators are not fully implemented") print(f" Run individual tests for details on missing implementations") + if args.verbose and not all_passed: + print( + f"\n💡 Verbose mode tip: Use individual test commands for detailed debugging:" + ) + failed_ops = [ + name + for name, result_data in results.items() + if result_data["return_code"] == -1 + ] + for op in failed_ops[:3]: # Show first 3 failed operators + print(f" python {ops_dir / (op + '.py')} --verbose") + sys.exit(0 if all_passed else 1)