PaddlePaddle · JewelRoam · Nov 12, 2025 · Nov 12, 2025 · Nov 12, 2025
diff --git a/graph_net/analysis_util.py b/graph_net/analysis_util.py
@@ -1,5 +1,6 @@
 import os
 import json
+import re
 import numpy as np
 from scipy.stats import gmean
 from collections import OrderedDict, defaultdict
@@ -84,6 +85,182 @@ def load_json_file(filepath: str) -> dict:
         return {}
 
 
+def parse_logs_to_data(log_file: str) -> list:
+    """
+    Parse a structured log file generated by the benchmark script and
+    return a list of data dictionaries (one per model-compiler run).
+
+    This function directly parses log files without generating intermediate JSON files.
+    It automatically handles both Paddle (with subgraph) and PyTorch (without subgraph) samples.
+
+    Args:
+        log_file: Path to the benchmark log file
+
+    Returns:
+        List of data dictionaries, each containing configuration, correctness,
+        performance, and result information for a single model-compiler run.
+    """
+    try:
+        with open(log_file, "r", encoding="utf-8") as f:
+            lines = f.readlines()
+    except FileNotFoundError:
+        print(f"Error: Log file not found at '{log_file}'")
+        return []
+    except Exception as e:
+        print(f"Error reading log file: {e}")
+        return []
+
+    # Dictionary to hold parsed data for all runs
+    all_runs_data = {}
+    current_run_key = None
+
+    # Define regex patterns for each type of log line
+    patterns = {
+        "processing": re.compile(r"\[Processing\] (.+)"),
+        "config": re.compile(r"\[Config\] (\S+): (.+)"),
+        "performance": re.compile(r"\[Performance\]\[(\w+)\]: (.+)"),
+        "datatype": re.compile(r"\[Datatype\]\[(\w+)\]: (.+)"),
+        "correctness": re.compile(r"\[Correctness\](\[.+\]): (.+)"),
+        "result_status": re.compile(r"\[Result\] status: (.+)"),
+        "failure": re.compile(r"\[Fail due to (.+)\.\]"),
+        "speedup": re.compile(r"\[Speedup\]\[(\w+)\]: (.+)"),
+    }
+
+    for i, line in enumerate(lines):
+        # Check for the start of a new model run
+        processing_match = patterns["processing"].search(line)
+        if processing_match:
+            current_run_key = processing_match.group(1).strip()
+            # Initialize a nested dictionary structure for this new run
+            all_runs_data[current_run_key] = {
+                "configuration": {},
+                "correctness": {},
+                "performance": {
+                    "eager": {},
+                    "compiled": {},
+                    "datatype": {},
+                    "speedup": {},
+                },
+                "result": {
+                    "status": "unknown",
+                },
+            }
+            continue
+
+        # If we haven't identified a run yet, skip the line
+        if not current_run_key:
+            continue
+
+        # Get the data dictionary for the current run
+        data = all_runs_data[current_run_key]
+
+        # Try to match other patterns
+        config_match = patterns["config"].search(line)
+        if config_match:
+            key, value = config_match.groups()
+            data["configuration"][key.strip()] = value.strip()
+            continue
+
+        performance_match = patterns["performance"].search(line)
+        if performance_match:
+            key, value_str = performance_match.groups()
+            # The performance value is a JSON string, so we load it
+            data["performance"][key.strip()] = json.loads(value_str)
+            continue
+
+        datatype_match = patterns["datatype"].search(line)
+        if datatype_match:
+            key, value_str = datatype_match.groups()
+            # The datatype value is a space-separated string
+            data["performance"]["datatype"][key.strip()] = value_str.strip().split()
+            continue
+
+        correctness_match = patterns["correctness"].search(line)
+        if correctness_match:
+            key, value_str = correctness_match.groups()
+            values = []
+            for v in value_str.strip().split():
+                try:
+                    # Try to convert to int if it's a whole number, else float
+                    values.append(int(v) if "." not in v else float(v))
+                except ValueError:
+                    # Handle non-numeric values like 'nan'
+                    values.append(float(v))
+            data["correctness"][key.strip()] = values
+            continue
+
+        # Look for the status, and if it's "failed", look ahead to the next line.
+        result_status_match = patterns["result_status"].search(line)
+        if result_status_match:
+            status = result_status_match.group(1).strip()
+            data["result"]["status"] = status
+            if status == "failed" and (i + 1) < len(lines):
+                error_reason_match = patterns["failure"].search(lines[i + 1])
+                if error_reason_match:
+                    reason = error_reason_match.group(1).lower()
+                    if "eager" in reason:
+                        data["performance"]["failure"] = "eager"
+                        data["result"]["status"] = "eager_fail"
+                    elif "compiled" in reason:
+                        data["performance"]["failure"] = "compiled"
+                        data["result"]["status"] = "compile_fail"
+                    else:
+                        data["performance"]["failure"] = "other"
+                        data["result"]["status"] = "runtime_fail"
+            continue
+
+        speedup_match = patterns["speedup"].search(line)
+        if speedup_match:
+            key, value_str = speedup_match.groups()
+            data["performance"]["speedup"][key.strip()] = float(value_str)
+            continue
+
+    # After parsing all lines, process the results
+    if not all_runs_data:
+        print("No processable log entries found in the file.")
+        return []
+
+    samples = []
+    for run_key, data in all_runs_data.items():
+        try:
+            # Build result field with status and speedup (for compatibility with log2json output format)
+            if data["result"]["status"] == "success":
+                speedup_data = {}
+                if "e2e" in data["performance"]["speedup"]:
+                    e2e_value = data["performance"]["speedup"]["e2e"]
+                    speedup_data["e2e"] = {"mean": e2e_value}
+                if "gpu" in data["performance"]["speedup"]:
+                    gpu_value = data["performance"]["speedup"]["gpu"]
+                    speedup_data["gpu"] = {"mean": gpu_value}
+                if speedup_data:
+                    data["result"]["speedup"] = speedup_data
+
+            # Ensure performance.speedup.e2e is a direct value (not nested dict)
+            # This is required by calculate_s_scores which uses performance_data.get("speedup", {}).get("e2e")
+            if "speedup" in data["performance"]:
+                speedup_dict = data["performance"]["speedup"]
+                if "e2e" in speedup_dict:
+                    e2e_val = speedup_dict["e2e"]
+                    if isinstance(e2e_val, dict) and "mean" in e2e_val:
+                        speedup_dict["e2e"] = e2e_val["mean"]
+                if "gpu" in speedup_dict:
+                    gpu_val = speedup_dict["gpu"]
+                    if isinstance(gpu_val, dict) and "mean" in gpu_val:
+                        speedup_dict["gpu"] = gpu_val["mean"]
+
+            samples.append(data)
+
+        except KeyError as e:
+            print(f"Warning: Could not process run '{run_key}' due to missing key: {e}")
+        except Exception as e:
+            print(
+                f"Warning: An unexpected error occurred while processing run '{run_key}': {e}"
+            )
+
+    print(f"Successfully parsed {len(samples)} samples from log file: {log_file}")
+    return samples
+
+
 def load_one_folder(folder_path: str) -> list:
     """
     Traverse all .json files in a *single* folder and load all raw data.
@@ -107,13 +284,35 @@ def load_one_folder(folder_path: str) -> list:
 
 def scan_all_folders(benchmark_path: str) -> dict:
     """
-    Unified entry point:
-      - If there are .json files directly under benchmark_path → treat them as a single curve (curve name is the directory name).
+    Unified entry point that supports both log files and JSON directories:
+      - If benchmark_path is a log file → parse it directly and return data as a single curve.
+      - If benchmark_path is a directory with .json files directly under it → treat them as a single curve.
       - Otherwise, fallback to the old logic where subdirectories represent curves.
     Returns dict[folder_name] -> list_of_samples
     """
+    # Check if the path is a log file
+    if os.path.isfile(benchmark_path):
+        print(f"Detected log file: '{benchmark_path}'")
+        samples = parse_logs_to_data(benchmark_path)
+        if samples:
+            # Use the log file name (without extension) as the curve name
+            folder_name = (
+                os.path.splitext(os.path.basename(benchmark_path))[0] or "benchmark"
+            )
+            print(
+                f"  - Parsed log file → 1 curve '{folder_name}' "
+                f"with {len(samples)} samples."
+            )
+            return {folder_name: samples}
+        else:
+            print(f"  - No valid data found in log file.")
+            return {}
+
+    # Check if it's a directory
     if not os.path.isdir(benchmark_path):
-        print(f"Error: Provided path '{benchmark_path}' is not a valid directory.")
+        print(
+            f"Error: Provided path '{benchmark_path}' is neither a valid file nor directory."
+        )
         return {}
 
     print(f"Scanning '{benchmark_path}' ...")

diff --git a/graph_net/plot_ESt.py b/graph_net/plot_ESt.py
@@ -110,7 +110,7 @@ def main():
         "--benchmark-path",
         type=str,
         required=True,
-        help="Path to the directory containing benchmark JSON files or sub-folders.",
+        help="Path to the benchmark log file or directory containing benchmark JSON files or sub-folders.",
     )
     parser.add_argument(
         "--output-dir",

diff --git a/graph_net/plot_St.py b/graph_net/plot_St.py
@@ -73,7 +73,7 @@ def main():
         "--benchmark-path",
         type=str,
         required=True,
-        help="Path to the directory containing benchmark JSON files or sub-folders.",
+        help="Path to the benchmark log file or directory containing benchmark JSON files or sub-folders.",
     )
     parser.add_argument(
         "--output-dir",