diff --git a/.github/workflows/metrics_analysis.yml b/.github/workflows/metrics_analysis.yml new file mode 100644 index 0000000000..1dd6e946ac --- /dev/null +++ b/.github/workflows/metrics_analysis.yml @@ -0,0 +1,147 @@ +name: Metrics Diff Analysis + +on: + pull_request_target: + paths: + - '**/*.metrics.yaml' + +jobs: + metrics-analysis: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 # Need full history for git diff + + - name: Setup Base Environment + uses: ./actions/setup-base-env + + - name: Run metrics diff analysis + id: metrics-diff + run: | + # Run the analysis. Compare against the base hash of this PR + ./gradlew :yaml-tests:analyzeMetrics \ + -PmetricsAnalysis.baseRef="${{ github.sha }}" \ + -PmetricsAnalysis.headRef="${{ github.event.pull_request.head.sha }}" \ + -PmetricsAnalysis.urlBase="${{ github.server_url }}/${{ github.repository }}/blob" \ + -PmetricsAnalysis.repositoryRoot="${{ github.workspace }}" \ + -PmetricsAnalysis.output="${{ github.workspace }}/metrics-analysis-output.txt" \ + -PmetricsAnalysis.outlierQueries="${{ github.workspace }}/outlier-queries.txt" + + - name: Add Report To Summary + run: cat metrics-analysis-output.txt > $GITHUB_STEP_SUMMARY + + - name: Check for outliers + id: check-changes + run: | + if [[ -f outlier-queries.txt ]] ; then + echo "SIGNIFICANT_CHANGES=true" >> $GITHUB_OUTPUT + else + echo "SIGNIFICANT_CHANGES=false" >> $GITHUB_OUTPUT + fi + + - name: Comment on PR + uses: actions/github-script@v7 + continue-on-error: true + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const fs = require('fs'); + const report = fs.readFileSync("${{ github.workspace }}/metrics-analysis-output.txt", { encoding: 'utf8', flag: 'r'}); + + // Find existing comment + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + }); + + const existingComment = comments.find(comment => + comment.user.type === 'Bot' && comment.body.includes('📊 Metrics Diff Analysis') + ); + + if (existingComment) { + // Delete previous comment + await github.rest.issues.deleteComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existingComment.id, + }); + } + + // Create new comment + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: report + }); + + - name: Add inline comments for outliers + uses: actions/github-script@v7 + if: steps.check-changes.outputs.SIGNIFICANT_CHANGES == 'true' + continue-on-error: true + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + // First, delete any existing comments from previous runs + const { data: reviewComments } = await github.rest.pulls.listReviewComments({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: context.issue.number, + }); + + const metricsComments = reviewComments.filter(comment => + comment.user.type === 'Bot' && comment.body.includes('**Significant Metrics Change**') + ); + + for (const comment of metricsComments) { + try { + await github.rest.pulls.deleteReviewComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: comment.id, + }); + console.log(`Deleted previous metrics comment ${comment.id}`); + } catch (error) { + console.log(`Could not delete comment ${comment.id}: ${error.message}`); + } + } + + // Parse the outliers report analysis output to find files with notable queries + const fs = require('fs'); + const output = fs.readFileSync("${{ github.workspace }}/outlier-queries.txt", { encoding: 'utf8', flag: 'r' }); + + // Each query appears separated by a blank line + const queries = output.split('\n\n'); + + for (const query of queries) { + var newl = query.indexOf('\n'); + if (newl < 0) { + continue; + } + const info = query.substring(0, newl); + const match = info.match(/^(.+\.metrics\.yaml):(\d+): (.+)$/); + if (match) { + const [, filePath, lineNumber, query] = match; + const data = query.substring(newl, query.length); + try { + await github.rest.pulls.createReviewComment({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: context.issue.number, + body: "**Significant Metrics Change**\n\nThis query's metrics have changed significantly.\n\n" + data + "\n", + path: filePath, + line: lineNumber, + side: 'RIGHT' + }); + } catch (error) { + console.log(`Could not add comment to ${filePath}: ${error.message}`); + } + } + } diff --git a/yaml-tests/src/main/java/com/apple/foundationdb/relational/yamltests/GitMetricsFileFinder.java b/yaml-tests/src/main/java/com/apple/foundationdb/relational/yamltests/GitMetricsFileFinder.java new file mode 100644 index 0000000000..d1c1db12a7 --- /dev/null +++ b/yaml-tests/src/main/java/com/apple/foundationdb/relational/yamltests/GitMetricsFileFinder.java @@ -0,0 +1,183 @@ +/* + * GitMetricsFileFinder.java + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2015-2025 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.apple.foundationdb.relational.yamltests; + +import com.apple.foundationdb.record.logging.KeyValueLogMessage; +import com.apple.foundationdb.relational.api.exceptions.ErrorCode; +import com.apple.foundationdb.relational.api.exceptions.RelationalException; +import com.google.common.collect.ImmutableSet; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +/** + * Utility class for finding changed metrics files using git integration. + * This class provides methods to identify metrics files that have changed between different git references. + */ +public final class GitMetricsFileFinder { + private static final Logger logger = LoggerFactory.getLogger(GitMetricsFileFinder.class); + + private GitMetricsFileFinder() { + // Utility class + } + + /** + * Finds all metrics YAML files that have changed between two git references. + * + * @param baseRef the base git reference (e.g., "main", "HEAD~1", commit SHA) + * @param headRef the target git reference (e.g., "main", "HEAD~1", commit SHA) + * @param repositoryRoot the root directory of the git repository + * @return set of paths to changed metrics YAML files + * @throws RelationalException if git command fails or repository is not valid + */ + @Nonnull + public static Set findChangedMetricsYamlFiles(@Nonnull final String baseRef, + @Nonnull final String headRef, + @Nonnull final Path repositoryRoot) throws RelationalException { + final List changedFiles = getChangedFiles(baseRef, headRef, repositoryRoot); + final ImmutableSet.Builder metricsFiles = ImmutableSet.builder(); + + for (final var filePath : changedFiles) { + if (isMetricsYamlFile(filePath)) { + metricsFiles.add(repositoryRoot.resolve(filePath)); + } + } + + return metricsFiles.build(); + } + + /** + * Gets a list of all files changed between two git references. + * + * @param baseRef the base reference + * @param headRef the target reference + * @param repositoryRoot the repository root directory + * @return list of relative file paths that have changed + * @throws RelationalException if git command fails + */ + @Nonnull + private static List getChangedFiles(@Nonnull final String baseRef, + @Nonnull final String headRef, + @Nonnull final Path repositoryRoot) throws RelationalException { + final var command = List.of("git", "diff", "--name-only", baseRef + "..." + headRef); + try { + if (logger.isDebugEnabled()) { + logger.debug(KeyValueLogMessage.of("Executing git command", + "root", repositoryRoot, + "command", String.join(" ", command))); + } + final var processBuilder = new ProcessBuilder(command) + .directory(repositoryRoot.toFile()) + .redirectErrorStream(true); + + final var process = processBuilder.start(); + final List result = new ArrayList<>(); + + try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) { + for (String line = reader.readLine(); line != null; line = reader.readLine()) { + final String stripped = line.strip(); + if (!stripped.isEmpty()) { + result.add(stripped); + } + } + } + + final var exitCode = process.waitFor(); + if (exitCode != 0) { + throw new RelationalException( + "Git command failed with exit code " + exitCode + ": " + String.join(" ", command), + ErrorCode.INTERNAL_ERROR); + } + + return result; + } catch (final IOException | InterruptedException e) { + throw new RelationalException("Failed to execute git command: " + String.join(" ", command), ErrorCode.INTERNAL_ERROR, e); + } + } + + /** + * Checks if a file path represents a metrics YAML file. + * + * @param filePath the file path to check + * @return true if the file is a metrics file + */ + private static boolean isMetricsYamlFile(@Nonnull final String filePath) { + return filePath.endsWith(".metrics.yaml"); + } + + /** + * Gets the file contents for a specific git reference (commit). + * This method checks out the file content at the specified reference and + * saves it into a temporary file located at the returned {@link Path}. + * If the file does not exist at the given reference, it will return a + * {@code null} path. + * + * @param filePath the relative file path + * @param gitRef the git reference + * @param repositoryRoot the repository root + * @return path to a temporary file containing the content at the specified reference + * or {@code null} if it didn't exist at the given ref + * @throws RelationalException if git command fails + */ + @Nullable + public static Path getFileAtReference(@Nonnull final String filePath, + @Nonnull final String gitRef, + @Nonnull final Path repositoryRoot) throws RelationalException { + try { + final var tempFile = Files.createTempFile("metrics-diff-", gitRef + ".metrics.yaml"); + final var command = List.of("git", "show", gitRef + ":" + filePath); + final var processBuilder = new ProcessBuilder(command) + .directory(repositoryRoot.toFile()) + .redirectOutput(tempFile.toFile()) + .redirectErrorStream(false); + + final var process = processBuilder.start(); + final var exitCode = process.waitFor(); + + if (exitCode != 0) { + // File could not be loaded. It was probably not present at that version, so return null + if (logger.isDebugEnabled()) { + logger.debug(KeyValueLogMessage.of("Unable to get file contents at reference", + "path", filePath, + "ref", gitRef)); + } + Files.deleteIfExists(tempFile); + return null; + } + + return tempFile; + } catch (final IOException | InterruptedException e) { + throw new RelationalException( + "Failed to get file content at reference " + gitRef + ":" + filePath, ErrorCode.INTERNAL_ERROR, e); + } + } +} diff --git a/yaml-tests/src/main/java/com/apple/foundationdb/relational/yamltests/MetricsDiffAnalyzer.java b/yaml-tests/src/main/java/com/apple/foundationdb/relational/yamltests/MetricsDiffAnalyzer.java new file mode 100644 index 0000000000..9463475df4 --- /dev/null +++ b/yaml-tests/src/main/java/com/apple/foundationdb/relational/yamltests/MetricsDiffAnalyzer.java @@ -0,0 +1,805 @@ +/* + * MetricsDiffAnalyzer.java + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2015-2025 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.apple.foundationdb.relational.yamltests; + +import com.apple.foundationdb.record.logging.KeyValueLogMessage; +import com.apple.foundationdb.relational.api.exceptions.ErrorCode; +import com.apple.foundationdb.relational.api.exceptions.RelationalException; +import com.apple.foundationdb.relational.util.Assert; +import com.apple.foundationdb.relational.yamltests.generated.stats.PlannerMetricsProto; +import com.beust.jcommander.JCommander; +import com.beust.jcommander.Parameter; +import com.beust.jcommander.ParameterException; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.protobuf.Descriptors; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +/** + * Main analyzer for comparing metrics between different git references. + * This tool identifies queries that have been added, removed, or changed, and provides + * statistical analysis of metrics differences. + */ +public final class MetricsDiffAnalyzer { + private static final Logger logger = LogManager.getLogger(MetricsDiffAnalyzer.class); + + @Nonnull + private final String baseRef; + @Nonnull + private final String headRef; + @Nonnull + private final Path repositoryRoot; + @Nullable + private final String urlBase; + + /** + * Command line arguments for the metrics diff analyzer. + */ + public static class Arguments { + @Parameter(names = {"--base-ref", "-b"}, description = "Git reference for baseline (e.g., 'main', commit SHA)", order = 0) + public String baseRef = "main"; + + @Parameter(names = {"--head-ref"}, description = "Git reference for new code (e.g., 'HEAD', commit SHA)", order = 0) + public String headRef = "HEAD"; + + @Parameter(names = {"--repository-root", "-r"}, description = "Path to git repository root", order = 1) + public String repositoryRoot = "."; + + @Parameter(names = {"--output", "-o"}, description = "Output file path (writes to stdout if not specified)", order = 2) + public String outputPath; + + @Parameter(names = {"--outlier-queries"}, description = "Output file path to specifically list data for outlier queries", order = 5) + public String outlierQueryPath; + + @Parameter(names = {"--url-base"}, description = "Base of the URL to use for linking to queries", order = 6) + public String urlBase; + + @Parameter(names = {"--help", "-h"}, description = "Show help message", help = true, order = 4) + public boolean help; + } + + public MetricsDiffAnalyzer(@Nonnull final String baseRef, + @Nonnull final String headRef, + @Nonnull final Path repositoryRoot, + @Nullable final String urlBase) { + this.baseRef = baseRef; + this.headRef = headRef; + this.repositoryRoot = repositoryRoot; + this.urlBase = urlBase; + } + + /** + * Main entry point for command-line usage. + */ + @SuppressWarnings("PMD.SystemPrintln") + public static void main(String[] args) { + final Arguments arguments = new Arguments(); + final JCommander commander = JCommander.newBuilder() + .addObject(arguments) + .programName("MetricsDiffAnalyzer") + .build(); + + try { + commander.parse(args); + } catch (ParameterException e) { + System.err.println("Error: " + e.getMessage()); + commander.usage(); + System.exit(1); + } + + if (arguments.help) { + commander.usage(); + return; + } + + + try { + final Path repositoryRoot = arguments.repositoryRoot == null ? Paths.get(".") : Paths.get(arguments.repositoryRoot); + final Path outputPath = arguments.outputPath != null ? Paths.get(arguments.outputPath) : null; + + final var analyzer = new MetricsDiffAnalyzer(arguments.baseRef, arguments.headRef, repositoryRoot, arguments.urlBase); + final var analysis = analyzer.analyze(); + final var report = analysis.generateReport(); + + if (outputPath != null) { + Files.writeString(outputPath, report); + if (logger.isInfoEnabled()) { + logger.info(KeyValueLogMessage.of( + "Wrote metrics diff report", + "output", outputPath)); + } + } else { + System.out.println(report); + } + + if (arguments.outlierQueryPath != null) { + String outlierReport = analysis.generateOutlierQueryReport(); + if (!outlierReport.isEmpty()) { + final Path outlierQueryPath = Paths.get(arguments.outlierQueryPath); + Files.writeString(outlierQueryPath, outlierReport); + if (logger.isInfoEnabled()) { + logger.info(KeyValueLogMessage.of("Wrote outlier report", + "output", outlierQueryPath)); + } + } + } + } catch (final Exception e) { + logger.error("Failed to analyze metrics diff", e); + System.err.println("Error: " + e.getMessage()); + System.exit(1); + } + } + + /** + * Performs the metrics diff analysis. + * + * @return analysis results + * @throws RelationalException if analysis fails + */ + @Nonnull + public MetricsAnalysisResult analyze() throws RelationalException { + if (logger.isInfoEnabled()) { + logger.info(KeyValueLogMessage.of("Starting metrics diff analysis", + "base", baseRef)); + } + + // Find all changed metrics files + final var changedFiles = GitMetricsFileFinder.findChangedMetricsYamlFiles(baseRef, headRef, repositoryRoot); + if (logger.isInfoEnabled()) { + logger.info(KeyValueLogMessage.of("Found changed metrics files", + "base", baseRef, + "head", headRef, + "file_count", changedFiles.size())); + } + + final var analysisBuilder = newAnalysisBuilder(); + + for (final var filePath : changedFiles) { + analyzeYamlFile(filePath, analysisBuilder); + } + + return analysisBuilder.build(); + } + + /** + * Analyzes a single metrics file for changes. + */ + private void analyzeYamlFile(@Nonnull final Path yamlPath, + @Nonnull final MetricsAnalysisResult.Builder analysisBuilder) throws RelationalException { + if (logger.isDebugEnabled()) { + logger.debug(KeyValueLogMessage.of("Analyzing YAML metrics file", + "path", yamlPath)); + } + + // Make sure this is a YAML metrics file + if (!yamlPath.toString().endsWith(".metrics.yaml")) { + throw new RelationalException("Unexpected file type for file (expected .metrics.yaml): " + yamlPath, ErrorCode.INTERNAL_ERROR); + } + + // Load base (old) metrics + final Map baseMetrics = loadMetricsAtRef(yamlPath, baseRef); + + // Load head (new) metrics + final Map headMetrics = loadMetricsAtRef(yamlPath, headRef); + + // Compare the metrics + compareMetrics(baseMetrics, headMetrics, yamlPath, analysisBuilder); + } + + @Nonnull + private Map loadMetricsAtRef(@Nonnull Path yamlPath, @Nonnull String ref) throws RelationalException { + try { + final var yamlFile = GitMetricsFileFinder.getFileAtReference( + repositoryRoot.relativize(yamlPath).toString(), ref, repositoryRoot); + if (yamlFile == null) { + return ImmutableMap.of(); + } + try { + return YamlExecutionContext.loadMetricsFromYamlFile(yamlFile); + } finally { + Files.deleteIfExists(yamlFile); // Clean up temp file + } + } catch (IOException e) { + throw new RelationalException("unable to delete temporary file", ErrorCode.INTERNAL_ERROR, e); + } + } + + /** + * Compares metrics between base and head versions. + */ + @VisibleForTesting + public void compareMetrics(@Nonnull final Map baseMetrics, + @Nonnull final Map headMetrics, + @Nonnull final Path filePath, + @Nonnull final MetricsAnalysisResult.Builder analysisBuilder) { + + // Find new and updated queries + int newCount = 0; + int changedCount = 0; + for (final Map.Entry entry : headMetrics.entrySet()) { + var identifier = entry.getKey(); + final MetricsInfo headMetricInfo = entry.getValue(); + final MetricsInfo baseMetricInfo = baseMetrics.get(identifier); + if (baseMetricInfo == null) { + // Query only in new metrics. Mark as new + newCount++; + analysisBuilder.addNewQuery(filePath, identifier, headMetricInfo); + } else { + // Query in both old and new. Mark as changed + changedCount++; + final MetricsInfo baseInfo = baseMetrics.get(identifier); + final MetricsInfo headInfo = headMetrics.get(identifier); + + final var planChanged = !baseInfo.getExplain().equals(headInfo.getExplain()); + final var metricsChanged = MetricsInfo.areMetricsDifferent(baseInfo, headInfo); + + if (planChanged && metricsChanged) { + changedCount++; + analysisBuilder.addPlanAndMetricsChanged(filePath, identifier, baseInfo, headInfo); + } else if (!planChanged && metricsChanged) { + changedCount++; + analysisBuilder.addMetricsOnlyChanged(filePath, identifier, baseInfo, headInfo); + } + // If plan changed but metrics didn't change, we don't report it. This could happen if + // there's some cosmetic change to the plan, like one operator's serialization is changed. + // It could also be something more interesting, but we're generally more concerned + // with changed metrics anyway + } + } + + // Find dropped queries (in base but not in head) + int droppedCount = 0; + for (final Map.Entry entry : baseMetrics.entrySet()) { + var identifier = entry.getKey(); + final MetricsInfo baseMetricsInfo = entry.getValue(); + if (!headMetrics.containsKey(identifier)) { + droppedCount++; + analysisBuilder.addDroppedQuery(filePath, identifier, baseMetricsInfo); + } + } + + if (logger.isDebugEnabled()) { + logger.debug(KeyValueLogMessage.of("Analyzed metrics file", + "path", filePath, + "new", newCount, + "dropped", droppedCount, + "changed", changedCount)); + } + } + + @Nonnull + public MetricsAnalysisResult.Builder newAnalysisBuilder() { + return new MetricsAnalysisResult.Builder(baseRef, headRef, repositoryRoot, urlBase); + } + + /** + * Results of metrics analysis containing all detected changes. + */ + public static class MetricsAnalysisResult { + private final List newQueries; + private final List droppedQueries; + private final List planAndMetricsChanged; + private final List metricsOnlyChanged; + @Nonnull + private final String baseRef; + @Nonnull + private final String headRef; + @Nonnull + private final Path repositoryRoot; + @Nullable + private final String urlBase; + + private MetricsAnalysisResult(@Nonnull final List newQueries, + @Nonnull final List droppedQueries, + @Nonnull final List planAndMetricsChanged, + @Nonnull final List metricsOnlyChanged, + @Nonnull final String baseRef, + @Nonnull final String headRef, + @Nonnull final Path repositoryRoot, + @Nullable final String urlBase) { + this.newQueries = newQueries; + this.droppedQueries = droppedQueries; + this.planAndMetricsChanged = planAndMetricsChanged; + this.metricsOnlyChanged = metricsOnlyChanged; + this.baseRef = baseRef; + this.headRef = headRef; + this.repositoryRoot = repositoryRoot; + this.urlBase = urlBase; + } + + public List getNewQueries() { + return newQueries; + } + + public List getDroppedQueries() { + return droppedQueries; + } + + public List getPlanAndMetricsChanged() { + return planAndMetricsChanged; + } + + public List getMetricsOnlyChanged() { + return metricsOnlyChanged; + } + + @Nonnull + private Path relativePath(@Nonnull Path path) { + return repositoryRoot.relativize(path); + } + + @Nonnull + private String formatQueryDisplay(@Nonnull final QueryChange change) { + return formatQueryDisplay(change, true); + } + + /** + * Helper method to format a query change for display with relative path and line number. + */ + @Nonnull + private String formatQueryDisplay(@Nonnull final QueryChange change, boolean asLink) { + int lineNumber = -1; + String ref = null; + if (change.newInfo != null) { + lineNumber = change.newInfo.getLineNumber(); + ref = headRef; + } else if (change.oldInfo != null) { + lineNumber = change.oldInfo.getLineNumber(); + ref = baseRef; + } + final String lineInfo = lineNumber > 0 ? (":" + lineNumber) : ""; + String locationText = relativePath(change.filePath) + lineInfo; + if (asLink && urlBase != null) { + locationText = "[" + locationText + "](" + urlBase + "/" + ref + "/" + relativePath(change.filePath); + if (lineNumber > 0) { + locationText += "#L" + lineNumber; + } + locationText += ")"; + } + return locationText + ": `" + change.identifier.getQuery() + "`"; + } + + /** + * Generate a markdown formatted report summarizing metrics differences. It contains a few + * different sections, including information on the set of queries that have been added and + * dropped, as well as modifications. This is intended to be presented as a single document + * for a reviewer to use to aid in assessing the scope of planner metrics changes. + * + * @return the contents of a report comparing query metrics + */ + @Nonnull + public String generateReport() { + final var report = new StringBuilder(); + report.append("# 📊 Metrics Diff Analysis Report\n\n"); + + report.append("## Summary\n\n") + .append("- New queries: ").append(newQueries.size()).append("\n") + .append("- Dropped queries: ").append(droppedQueries.size()).append("\n") + .append("- Plan changed + metrics changed: ").append(planAndMetricsChanged.size()).append("\n") + .append("- Plan unchanged + metrics changed: ").append(metricsOnlyChanged.size()).append("\n") + .append("\n"); + + report.append("
\n\n") + .append("â„šī¸ About this analysis\n\n") + .append("This automated analysis compares query planner metrics between the base branch and this PR. It categorizes changes into:\n\n") + .append(" - **New queries**: Queries added in this PR\n") + .append(" - **Dropped queries**: Queries removed in this PR. These should be reviewed to ensure we are not losing coverage.\n") + .append(" - **Plan changed + metrics changed**: The query plan has changed along with planner metrics.\n") + .append(" - **Metrics only changed**: Same plan but different metrics\n\n") + .append("The last category in particular may indicate planner regressions that should be investigated.\n\n") + .append("
\n\n"); + + + if (!newQueries.isEmpty()) { + // Only list a count of new queries by file type. Having more test cases is generally a good thing, + // so we only want to get a rough overview + report.append("## New Queries\n\nCount of new queries by file:\n\n"); + Map countByFile = new TreeMap<>(); // tree map so that we sort by file name + for (final var change : newQueries) { + countByFile.compute(change.filePath, (ignore, oldValue) -> oldValue == null ? 1 : oldValue + 1); + } + for (Map.Entry countEntry : countByFile.entrySet()) { + report.append("- ") + .append(relativePath(countEntry.getKey())) + .append(": ") + .append(countEntry.getValue()) + .append("\n"); + } + report.append("\n"); + } + + if (!droppedQueries.isEmpty()) { + // List out each individual dropped query. These represent a potential lack of coverage, and + // so we want these listed out more explicitly + report.append("## Dropped Queries\n\nThe following queries with metrics were removed:\n\n"); + final int maxQueries = 20; + final List changes = droppedQueries.size() < maxQueries ? droppedQueries : droppedQueries.subList(0, maxQueries); + for (final var change : changes) { + report.append("- ").append(formatQueryDisplay(change)).append("\n"); + } + report.append("\n"); + + if (droppedQueries.size() > maxQueries) { + int remaining = droppedQueries.size() - maxQueries; + report.append("This list has been truncated. There ") + .append(remaining == 1 ? "is " : "are ") + .append(remaining) + .append(" remaining dropped queries.\n\n"); + } + + report.append("The reviewer should double check that these queries were removed intentionally to avoid a loss of coverage.\n\n"); + } + + appendChangesList(report, planAndMetricsChanged, "Plan and Metrics Changed", + "These queries experienced both plan and metrics changes. This generally indicates that there was some planner change\n" + + "that means the planning for this query may be substantially different. Some amount of query plan metrics change is expected,\n" + + "but the reviewer should still validate that these changes are not excessive.\n"); + appendChangesList(report, metricsOnlyChanged, "Only Metrics Changed", + "These queries experienced *only* metrics changes without any plan changes. If these metrics have substantially changed,\n" + + "then a planner change has been made which affects planner performance but does not correlate with any new outcomes,\n" + + "which could indicate a regression.\n"); + + return report.toString(); + } + + /** + * Print out a simple list of outlier queries. This format is kept simple so that + * it can be processed by tools to generate in-line comments. Each query is separated + * by a blank line. The first line contains the file and line number as well as the query + * text, and then the metrics differences follow. This is intended to be used to generate + * a list of in-line comments to highlight each of these queries. + * + * @return a report of outlier queries + */ + @Nonnull + public String generateOutlierQueryReport() { + final List outliers = findAllOutliers(); + if (outliers.isEmpty()) { + return ""; + } + + final StringBuilder report = new StringBuilder(); + for (QueryChange queryChange : outliers) { + // One query per line. Do not use format as a link + report.append(formatQueryDisplay(queryChange, false)).append("\n"); + appendMetricsDiff(report, queryChange); + report.append("\n"); + } + return report.toString(); + } + + @Nonnull + private String sign(double d) { + // For adding an explicit plus to positive values. Rely on usual toString of negative values + // to add the minus sign + return d > 0 ? "+" : ""; + } + + @Nonnull + private String sign(long l) { + // See: sign(double) + return l > 0 ? "+" : ""; + } + + private void appendStatisticalSummary(@Nonnull final StringBuilder report, @Nonnull final MetricsStatistics stats) { + for (final var fieldName : YamlExecutionContext.TRACKED_METRIC_FIELDS) { + final var fieldStats = stats.getFieldStatistics(fieldName); + final var regressionFieldStats = stats.getRegressionStatistics(fieldName); + if (fieldStats.hasChanges() || regressionFieldStats.hasChanges()) { + report.append(String.format("**`%s`**:%n", fieldName)); + report.append(String.format(" - Average change: %s%.1f%n", sign(fieldStats.getMean()), fieldStats.getMean())); + if (regressionFieldStats.hasChanges()) { + report.append(String.format(" - Average regression: %s%.1f%n", sign(regressionFieldStats.getMean()), regressionFieldStats.getMean())); + } + report.append(String.format(" - Median change: %s%d%n", sign(fieldStats.getMedian()), fieldStats.getMedian())); + if (regressionFieldStats.hasChanges()) { + report.append(String.format(" - Median regression: %s%d%n", sign(regressionFieldStats.getMedian()), regressionFieldStats.getMedian())); + } + report.append(String.format(" - Standard deviation: %.1f%n", fieldStats.getStandardDeviation())); + if (regressionFieldStats.hasChanges()) { + report.append(String.format(" - Standard deviation of regressions: %.1f%n", regressionFieldStats.getStandardDeviation())); + } + report.append(String.format(" - Range: %s%d to %s%d%n", sign(fieldStats.getMin()), fieldStats.getMin(), sign(fieldStats.getMax()), fieldStats.getMax())); + if (regressionFieldStats.hasChanges()) { + report.append(String.format(" - Range of regressions: %s%d to %s%d%n", sign(regressionFieldStats.getMin()), regressionFieldStats.getMin(), sign(regressionFieldStats.getMax()), regressionFieldStats.getMax())); + } + report.append(String.format(" - Queries changed: %d%n", fieldStats.getChangedCount())); + if (regressionFieldStats.hasChanges()) { + report.append(String.format(" - Queries regressed: %d%n", regressionFieldStats.getChangedCount())); + } else { + report.append(" - No regressions! 🎉\n"); + } + report.append("\n"); // End with blank line + } + } + } + + private void appendChangesList(@Nonnull final StringBuilder report, @Nonnull List changes, @Nonnull String title, @Nonnull String explanation) { + if (changes.isEmpty()) { + return; + } + report.append("## ").append(title).append("\n\n"); + report.append(explanation).append("\n"); + report.append("Total: ").append(changes.size()).append(" quer").append(changes.size() == 1 ? "y" : "ies").append("\n\n"); + + // Statistical analysis + final var summary = calculateMetricsStatistics(changes); + report.append("### Statistical Summary (").append(title).append(")\n\n"); + appendStatisticalSummary(report, summary); + + // Show outliers for metrics-only changes (these are more concerning) + final var metricsOutliers = findOutliers(changes, summary); + if (metricsOutliers.isEmpty()) { + report.append("There were no queries with significant regressions detected.\n\n"); + } else { + report.append("### Significant Regressions (").append(title).append(")\n\n"); + report.append("There ") + .append(metricsOutliers.size() == 1 ? "was " : "were ") + .append(metricsOutliers.size()) + .append(" outlier").append(metricsOutliers.size() == 1 ? "" : "s") + .append(" detected. Outlier queries have a significant regression in at least one field. Statistically, " + + "this represents either an increase of more than two standard deviations above " + + "the mean or a large absolute increase (e.g., 100).\n\n"); + + for (final var change : metricsOutliers) { + report.append(String.format("- %s", formatQueryDisplay(change))).append("\n"); + if (change.oldInfo != null && change.newInfo != null) { + final String oldExplain = change.oldInfo.getExplain(); + final String newExplain = change.newInfo.getExplain(); + if (oldExplain.equals(newExplain)) { + report.append(" - explain: `").append(oldExplain).append("`\n"); + } else { + report.append(" - old explain: `").append(oldExplain).append("`\n"); + report.append(" - new explain: `").append(newExplain).append("`\n"); + } + appendMetricsDiff(report, change); + } + } + report.append("\n"); + } + + if (changes.size() > metricsOutliers.size()) { + int minorChanges = changes.size() - metricsOutliers.size(); + report.append("### Minor Changes (").append(title).append(")\n\nIn addition, there ") + .append(minorChanges == 1 ? "was " : "were ") + .append(minorChanges) + .append(" quer").append(minorChanges == 1 ? "y" : "ies") + .append(" with minor changes.\n\n"); + } + } + + private MetricsStatistics calculateMetricsStatistics(@Nonnull final List changes) { + final var statisticsBuilder = new MetricsStatistics.Builder(); + + for (final var change : changes) { + if (change.oldInfo != null && change.newInfo != null) { + final var oldMetrics = change.oldInfo.getCountersAndTimers(); + final var newMetrics = change.newInfo.getCountersAndTimers(); + final var descriptor = oldMetrics.getDescriptorForType(); + + for (final var fieldName : YamlExecutionContext.TRACKED_METRIC_FIELDS) { + final var field = descriptor.findFieldByName(fieldName); + final var oldValue = (long)oldMetrics.getField(field); + final var newValue = (long)newMetrics.getField(field); + + if (oldValue != newValue) { + statisticsBuilder.addDifference(fieldName, oldValue, newValue); + } + } + } + } + + return statisticsBuilder.build(); + } + + @Nonnull + public List findAllOutliers() { + return ImmutableList.builder() + .addAll(findOutliers(planAndMetricsChanged)) + .addAll(findOutliers(metricsOnlyChanged)) + .build(); + } + + @Nonnull + private List findOutliers(@Nonnull final List changes) { + final MetricsStatistics summary = calculateMetricsStatistics(changes); + return findOutliers(changes, summary); + } + + @Nonnull + private List findOutliers(@Nonnull final List changes, @Nonnull final MetricsStatistics stats) { + if (changes.size() < 3) { + // Not enough data for meaningful outlier detection + return changes; + } + + final ImmutableList.Builder outliers = ImmutableList.builder(); + + for (final var change : changes) { + if (isOutlier(change, stats)) { + outliers.add(change); + } + } + + return outliers.build(); + } + + private boolean isOutlier(@Nonnull QueryChange change, @Nonnull MetricsStatistics stats) { + if (change.oldInfo == null || change.newInfo == null) { + return false; + } + final var oldMetrics = change.oldInfo.getCountersAndTimers(); + final var newMetrics = change.newInfo.getCountersAndTimers(); + final var descriptor = oldMetrics.getDescriptorForType(); + + for (final var fieldName : YamlExecutionContext.TRACKED_METRIC_FIELDS) { + final var field = descriptor.findFieldByName(fieldName); + final var oldValue = (long)oldMetrics.getField(field); + final var newValue = (long)newMetrics.getField(field); + + if (oldValue != newValue) { + final var difference = newValue - oldValue; + if (difference > 0 && isOutlierValue(stats.getRegressionStatistics(fieldName), difference)) { + return true; + } + } + } + + return false; + } + + private boolean isOutlierValue(MetricsStatistics.FieldStatistics fieldStats, long difference) { + // Consider it an outlier if it's more than 2 standard deviations from the mean + // or if it's a very large absolute change + Assert.thatUnchecked(fieldStats.hasChanges(), "Field stats should have at least one difference"); + final var zScore = Math.abs((difference - fieldStats.getMean()) / fieldStats.getStandardDeviation()); + final var isLargeAbsoluteChange = Math.abs(difference) > Math.max(100, Math.abs(fieldStats.getMean()) * 2); + return zScore > 2.0 || isLargeAbsoluteChange; + } + + private void appendMetricsDiff(@Nonnull final StringBuilder report, + @Nonnull final QueryChange queryChange) { + Assert.thatUnchecked(queryChange.oldInfo != null, "old info must be set to display metrics diff"); + Assert.thatUnchecked(queryChange.newInfo != null, "new info must be set to display metrics diff"); + + final PlannerMetricsProto.CountersAndTimers oldMetrics = queryChange.oldInfo.getCountersAndTimers(); + final PlannerMetricsProto.CountersAndTimers newMetrics = queryChange.newInfo.getCountersAndTimers(); + + final var descriptor = oldMetrics.getDescriptorForType(); + + for (final var fieldName : YamlExecutionContext.TRACKED_METRIC_FIELDS) { + final Descriptors.FieldDescriptor field = descriptor.findFieldByName(fieldName); + final long oldValue = (long)oldMetrics.getField(field); + final long newValue = (long)newMetrics.getField(field); + if (oldValue != newValue) { + final long change = newValue - oldValue; + report.append(" - `").append(fieldName).append("`: ") + .append(oldValue).append(" -> ").append(newValue) + .append(" (").append(sign(change)).append(change).append(")\n"); + } + } + } + + public static class Builder { + private final ImmutableList.Builder newQueries = ImmutableList.builder(); + private final ImmutableList.Builder droppedQueries = ImmutableList.builder(); + private final ImmutableList.Builder planAndMetricsChanged = ImmutableList.builder(); + private final ImmutableList.Builder metricsOnlyChanged = ImmutableList.builder(); + @Nonnull + private final String baseRef; + @Nonnull + private final String headRef; + @Nonnull + private final Path repositoryRoot; + @Nullable + private final String urlBase; + + public Builder(@Nonnull String baseRef, @Nonnull String headRef, @Nonnull final Path repositoryRoot, @Nullable final String urlBase) { + this.baseRef = baseRef; + this.headRef = headRef; + this.repositoryRoot = repositoryRoot; + this.urlBase = urlBase; + } + + @Nonnull + public Builder addNewQuery(@Nonnull final Path filePath, + @Nonnull final PlannerMetricsProto.Identifier identifier, + @Nonnull final MetricsInfo info) { + newQueries.add(new QueryChange(filePath, identifier, null, info)); + return this; + } + + @Nonnull + public Builder addDroppedQuery(@Nonnull final Path filePath, + @Nonnull final PlannerMetricsProto.Identifier identifier, + @Nonnull final MetricsInfo info) { + droppedQueries.add(new QueryChange(filePath, identifier, info, null)); + return this; + } + + @Nonnull + public Builder addPlanAndMetricsChanged(@Nonnull final Path filePath, + @Nonnull final PlannerMetricsProto.Identifier identifier, + @Nonnull final MetricsInfo oldInfo, + @Nonnull final MetricsInfo newInfo) { + planAndMetricsChanged.add(new QueryChange(filePath, identifier, oldInfo, newInfo)); + return this; + } + + @Nonnull + public Builder addMetricsOnlyChanged(@Nonnull final Path filePath, + @Nonnull final PlannerMetricsProto.Identifier identifier, + @Nonnull final MetricsInfo oldInfo, + @Nonnull final MetricsInfo newInfo) { + metricsOnlyChanged.add(new QueryChange(filePath, identifier, oldInfo, newInfo)); + return this; + } + + @Nonnull + public MetricsAnalysisResult build() { + return new MetricsAnalysisResult( + newQueries.build(), + droppedQueries.build(), + planAndMetricsChanged.build(), + metricsOnlyChanged.build(), + baseRef, + headRef, + repositoryRoot, + urlBase + ); + } + } + } + + /** + * Represents a change to a query's metrics. + */ + public static class QueryChange { + public final Path filePath; + public final PlannerMetricsProto.Identifier identifier; + @Nullable + public final MetricsInfo oldInfo; + @Nullable + public final MetricsInfo newInfo; + + public QueryChange(@Nonnull final Path filePath, + @Nonnull final PlannerMetricsProto.Identifier identifier, + @Nullable final MetricsInfo oldInfo, + @Nullable final MetricsInfo newInfo) { + this.filePath = filePath; + this.identifier = identifier; + this.oldInfo = oldInfo; + this.newInfo = newInfo; + } + } +} diff --git a/yaml-tests/src/main/java/com/apple/foundationdb/relational/yamltests/MetricsInfo.java b/yaml-tests/src/main/java/com/apple/foundationdb/relational/yamltests/MetricsInfo.java new file mode 100644 index 0000000000..e430b59918 --- /dev/null +++ b/yaml-tests/src/main/java/com/apple/foundationdb/relational/yamltests/MetricsInfo.java @@ -0,0 +1,120 @@ +/* + * MetricsInfo.java + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2015-2025 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.apple.foundationdb.relational.yamltests; + +import com.apple.foundationdb.relational.yamltests.generated.stats.PlannerMetricsProto; +import com.google.protobuf.Descriptors; + +import javax.annotation.Nonnull; +import java.nio.file.Path; +import java.util.Objects; + +public final class MetricsInfo { + @Nonnull + private final PlannerMetricsProto.Info underlying; + @Nonnull + private final Path filePath; + private final int lineNumber; + + MetricsInfo(@Nonnull PlannerMetricsProto.Info metricsInfo, + @Nonnull Path filePath, + int lineNumber) { + this.underlying = metricsInfo; + this.filePath = filePath; + this.lineNumber = lineNumber; + } + + + @Nonnull + public PlannerMetricsProto.Info getUnderlying() { + return underlying; + } + + @Nonnull + public Path getFilePath() { + return filePath; + } + + public int getLineNumber() { + return lineNumber; + } + + @Nonnull + public String getExplain() { + return underlying.getExplain(); + } + + @Nonnull + public PlannerMetricsProto.CountersAndTimers getCountersAndTimers() { + return underlying.getCountersAndTimers(); + } + + @Override + public boolean equals(final Object object) { + if (this == object) { + return true; + } + if (object == null || getClass() != object.getClass()) { + return false; + } + final MetricsInfo that = (MetricsInfo)object; + return lineNumber == that.lineNumber && Objects.equals(underlying, that.underlying) && Objects.equals(filePath, that.filePath); + } + + @Override + public int hashCode() { + return Objects.hash(underlying, filePath, lineNumber); + } + + /** + * Compares two CountersAndTimers and determines if any of the tracked metrics are different. + * This method checks the core metrics that are used for planner comparison but excludes timing + * information as those can vary between runs. + * + * @param expected the expected metrics values + * @param actual the actual metrics values + * @return true if any of the tracked metrics differ + */ + public static boolean areMetricsDifferent(@Nonnull final MetricsInfo expected, + @Nonnull final MetricsInfo actual) { + final var metricsDescriptor = PlannerMetricsProto.CountersAndTimers.getDescriptor(); + + return YamlExecutionContext.TRACKED_METRIC_FIELDS.stream() + .map(metricsDescriptor::findFieldByName) + .anyMatch(field -> isMetricDifferent(expected, actual, field)); + } + + /** + * Compares a specific metric field between expected and actual values. + * + * @param expected the expected metrics + * @param actual the actual metrics + * @param fieldDescriptor the field to compare + * @return true if the metric values differ + */ + private static boolean isMetricDifferent(@Nonnull final MetricsInfo expected, + @Nonnull final MetricsInfo actual, + @Nonnull final Descriptors.FieldDescriptor fieldDescriptor) { + final long expectedMetric = (long) expected.getUnderlying().getCountersAndTimers().getField(fieldDescriptor); + final long actualMetric = (long) actual.getUnderlying().getCountersAndTimers().getField(fieldDescriptor); + return expectedMetric != actualMetric; + } +} diff --git a/yaml-tests/src/main/java/com/apple/foundationdb/relational/yamltests/MetricsStatistics.java b/yaml-tests/src/main/java/com/apple/foundationdb/relational/yamltests/MetricsStatistics.java new file mode 100644 index 0000000000..947569b114 --- /dev/null +++ b/yaml-tests/src/main/java/com/apple/foundationdb/relational/yamltests/MetricsStatistics.java @@ -0,0 +1,179 @@ +/* + * MetricsStatistics.java + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2015-2025 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.apple.foundationdb.relational.yamltests; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; + +import javax.annotation.Nonnull; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Statistical analysis of metrics differences for a set of queries. + * Provides mean, median, standard deviation, and range calculations for each metric field. + */ +public final class MetricsStatistics { + private final Map fieldStatistics; + private final Map regressionFieldStatistics; + + private MetricsStatistics(@Nonnull final Map fieldStatistics, + @Nonnull final Map regressionFieldStatistics) { + this.fieldStatistics = fieldStatistics; + this.regressionFieldStatistics = regressionFieldStatistics; + } + + @Nonnull + public FieldStatistics getFieldStatistics(@Nonnull final String fieldName) { + return fieldStatistics.getOrDefault(fieldName, FieldStatistics.EMPTY); + } + + @Nonnull + public FieldStatistics getRegressionStatistics(@Nonnull final String fieldName) { + return regressionFieldStatistics.getOrDefault(fieldName, FieldStatistics.EMPTY); + } + + /** + * Statistics for a single metrics field across all queries. + */ + public static class FieldStatistics { + @Nonnull + public static final FieldStatistics EMPTY = new FieldStatistics(ImmutableList.of(), 0.0, 0.0); + + @Nonnull + public final List sortedValues; + public final double mean; + public final double standardDeviation; + + private FieldStatistics(@Nonnull final List sortedValues, + final double mean, + final double standardDeviation) { + this.sortedValues = ImmutableList.copyOf(sortedValues); + this.mean = mean; + this.standardDeviation = standardDeviation; + } + + public boolean hasChanges() { + return !sortedValues.isEmpty(); + } + + public int getChangedCount() { + return sortedValues.size(); + } + + public double getMean() { + return mean; + } + + public long getMin() { + return hasChanges() ? sortedValues.get(0) : 0L; + } + + public long getMax() { + return hasChanges() ? sortedValues.get(sortedValues.size() - 1) : 0L; + } + + public double getStandardDeviation() { + return standardDeviation; + } + + /** + * Returns the value for which {@code quantile} proportion of elements + * are smaller. This is a generalization of the concept of "percentiles" + * to real numbers. + * + * @param quantile the quantile to calculate + * @return a value that separates the smallest {@code quantile} proportion of elements + */ + public long getQuantile(double quantile) { + if (sortedValues.isEmpty()) { + return 0L; + } else { + int index = (int)(getChangedCount() * quantile); + return sortedValues.get(index); + } + } + + public long getMedian() { + return getQuantile(0.5); + } + } + + public static Builder newBuilder() { + return new Builder(); + } + + /** + * Builder for collecting metric differences and calculating statistics. + */ + public static class Builder { + @Nonnull + private final Map> differences = new HashMap<>(); + @Nonnull + private final Map> regressions = new HashMap<>(); + + @Nonnull + public Builder addDifference(@Nonnull final String fieldName, final long baseValue, final long headValue) { + long difference = headValue - baseValue; + differences.computeIfAbsent(fieldName, k -> new ArrayList<>()).add(difference); + if (difference > 0) { + regressions.computeIfAbsent(fieldName, k -> new ArrayList<>()).add(difference); + } + return this; + } + + @Nonnull + private Map buildStats(@Nonnull final Map> baseMap) { + final ImmutableMap.Builder builder = ImmutableMap.builder(); + + for (final var entry : baseMap.entrySet()) { + final String fieldName = entry.getKey(); + final List values = entry.getValue(); + + if (values.isEmpty()) { + continue; + } + + // Sort values for quantile calculations + Collections.sort(values); + + // Calculate statistics for values + final var mean = values.stream().mapToLong(Long::longValue).average().orElse(0.0); + final var variance = values.stream() + .mapToDouble(v -> Math.pow(v - mean, 2)) + .average().orElse(0.0); + final var standardDeviation = Math.sqrt(variance); + + builder.put(fieldName, new FieldStatistics(values, mean, standardDeviation)); + } + + return builder.build(); + } + + @Nonnull + public MetricsStatistics build() { + return new MetricsStatistics(buildStats(differences), buildStats(regressions)); + } + } +} diff --git a/yaml-tests/src/main/java/com/apple/foundationdb/relational/yamltests/YamlExecutionContext.java b/yaml-tests/src/main/java/com/apple/foundationdb/relational/yamltests/YamlExecutionContext.java index 857065c57a..3d63d9e65b 100644 --- a/yaml-tests/src/main/java/com/apple/foundationdb/relational/yamltests/YamlExecutionContext.java +++ b/yaml-tests/src/main/java/com/apple/foundationdb/relational/yamltests/YamlExecutionContext.java @@ -20,6 +20,7 @@ package com.apple.foundationdb.relational.yamltests; +import com.apple.foundationdb.record.logging.KeyValueLogMessage; import com.apple.foundationdb.relational.api.Options; import com.apple.foundationdb.relational.api.exceptions.ErrorCode; import com.apple.foundationdb.relational.api.exceptions.RelationalException; @@ -34,17 +35,21 @@ import org.junit.jupiter.api.Assertions; import org.opentest4j.TestAbortedException; import org.yaml.snakeyaml.DumperOptions; +import org.yaml.snakeyaml.LoaderOptions; import org.yaml.snakeyaml.Yaml; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import java.io.BufferedInputStream; import java.io.BufferedReader; +import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; import java.net.URI; import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.Comparator; @@ -63,6 +68,19 @@ public final class YamlExecutionContext { private static final Logger logger = LogManager.getLogger(YamlRunner.class); + /** + * List of metrics field names that are tracked for planner comparison. + * These are the core metrics that should be consistent between runs, excluding timing + * information which can vary. + */ + public static final List TRACKED_METRIC_FIELDS = List.of( + "task_count", + "transform_count", + "transform_yield_count", + "insert_new_count", + "insert_reused_count" + ); + public static final ContextOption OPTION_FORCE_CONTINUATIONS = new ContextOption<>("optionForceContinuation"); public static final ContextOption OPTION_CORRECT_EXPLAIN = new ContextOption<>("optionCorrectExplain"); public static final ContextOption OPTION_CORRECT_METRICS = new ContextOption<>("optionCorrectMetrics"); @@ -494,6 +512,145 @@ private static ImmutableMap loadMetricsFromYamlFile(@Nonnull final Path filePath) throws RelationalException { + final ImmutableMap.Builder resultMapBuilder = ImmutableMap.builder(); + final Map seen = new HashMap<>(); + if (!Files.exists(filePath)) { + return resultMapBuilder.build(); + } + + try { + final LoaderOptions loaderOptions = new LoaderOptions(); + loaderOptions.setAllowDuplicateKeys(true); + final var yaml = new Yaml(new CustomYamlConstructor(loaderOptions)); + final var document = yaml.load(new BufferedInputStream(new FileInputStream(filePath.toFile()))); + + if (!(document instanceof Map)) { + return resultMapBuilder.build(); + } + + final var data = (Map>>) document; + + // Parse each block in the YAML file + for (final var blockEntry : data.entrySet()) { + final var blockName = blockEntry.getKey(); + final var queries = blockEntry.getValue(); + + if (queries == null) { + continue; + } + + // Process each query in the block + for (final var queryMap : queries) { + processQueryAtLine(queryMap, blockName, seen, resultMapBuilder, filePath); + } + } + + return resultMapBuilder.build(); + } catch (final IOException e) { + throw new RelationalException(ErrorCode.INTERNAL_ERROR, e); + } + } + + /** + * Processes a single query with its line number information. + */ + @SuppressWarnings("unchecked") + private static void processQueryAtLine(@Nullable Map queryMap, + String blockName, + Map seen, + ImmutableMap.Builder resultMapBuilder, + Path filePath) { + if (queryMap == null) { + return; + } + + String query = null; + String explain = null; + int lineNumber = 1; // Default line number + for (Map.Entry entry : queryMap.entrySet()) { + final Object key = entry.getKey(); + if (key instanceof CustomYamlConstructor.LinedObject) { + CustomYamlConstructor.LinedObject linedObject = (CustomYamlConstructor.LinedObject) key; + final String keyString = (String) linedObject.getObject(); + if ("query".equals(keyString)) { + query = (String) entry.getValue(); + lineNumber = ((CustomYamlConstructor.LinedObject) key).getLineNumber(); + } else if ("explain".equals(keyString)) { + explain = (String) entry.getValue(); + } + } + } + if (query == null) { + // Query not found + return; + } + + // Extract the query string, handling LinedObject if present + final var setup = (List) queryMap.get("setup"); + + // Build identifier + final var identifierBuilder = PlannerMetricsProto.Identifier.newBuilder() + .setBlockName(blockName) + .setQuery(query); + if (setup != null) { + identifierBuilder.addAllSetups(setup); + } + final var identifier = identifierBuilder.build(); + + // Build counters and timers + final var countersAndTimers = PlannerMetricsProto.CountersAndTimers.newBuilder() + .setTaskCount(getLongValue(queryMap, "task_count")) + .setTaskTotalTimeNs(TimeUnit.MILLISECONDS.toNanos(getLongValue(queryMap, "task_total_time_ms"))) + .setTransformCount(getLongValue(queryMap, "transform_count")) + .setTransformTimeNs(TimeUnit.MILLISECONDS.toNanos(getLongValue(queryMap, "transform_time_ms"))) + .setTransformYieldCount(getLongValue(queryMap, "transform_yield_count")) + .setInsertTimeNs(TimeUnit.MILLISECONDS.toNanos(getLongValue(queryMap, "insert_time_ms"))) + .setInsertNewCount(getLongValue(queryMap, "insert_new_count")) + .setInsertReusedCount(getLongValue(queryMap, "insert_reused_count")) + .build(); + + // Build info + final var info = PlannerMetricsProto.Info.newBuilder() + .setExplain(explain == null ? "" : explain) + .setCountersAndTimers(countersAndTimers) + .build(); + + // Check for duplicates + final var oldInfo = seen.get(identifier); + if (oldInfo == null) { + seen.put(identifier, info); + resultMapBuilder.put(identifier, new MetricsInfo(info, filePath, lineNumber)); + } else if (!info.equals(oldInfo)) { + logger.warn(KeyValueLogMessage.of("Metrics file contains multiple copies of the same query", + "file", filePath, + "block", identifier.getBlockName(), + "query", identifier.getQuery(), + "line", lineNumber)); + } + } + + /** + * Helper method to safely extract long values from YAML data. + */ + private static long getLongValue(Map map, String key) { + final var value = map.get(key); + if (value instanceof Number) { + return ((Number) value).longValue(); + } + throw new IllegalArgumentException("Expected numeric value for key: " + key + ", got: " + value); + } + @Nonnull private static String metricsBinaryProtoFileName(@Nonnull final String resourcePath) { return baseName(resourcePath) + ".metrics.binpb"; diff --git a/yaml-tests/src/main/java/com/apple/foundationdb/relational/yamltests/command/queryconfigs/CheckExplainConfig.java b/yaml-tests/src/main/java/com/apple/foundationdb/relational/yamltests/command/queryconfigs/CheckExplainConfig.java index 2a0e58db2f..1ae90a1dad 100644 --- a/yaml-tests/src/main/java/com/apple/foundationdb/relational/yamltests/command/queryconfigs/CheckExplainConfig.java +++ b/yaml-tests/src/main/java/com/apple/foundationdb/relational/yamltests/command/queryconfigs/CheckExplainConfig.java @@ -206,26 +206,14 @@ private void checkMetrics(final @Nonnull String currentQuery, private boolean areMetricsDifferent(final PlannerMetricsProto.CountersAndTimers expectedCountersAndTimers, final PlannerMetricsProto.CountersAndTimers actualCountersAndTimers, final Descriptors.Descriptor metricsDescriptor) { - return isMetricDifferent(expectedCountersAndTimers, - actualCountersAndTimers, - metricsDescriptor.findFieldByName("task_count"), - lineNumber) | - isMetricDifferent(expectedCountersAndTimers, - actualCountersAndTimers, - metricsDescriptor.findFieldByName("transform_count"), - lineNumber) | - isMetricDifferent(expectedCountersAndTimers, - actualCountersAndTimers, - metricsDescriptor.findFieldByName("transform_yield_count"), - lineNumber) | - isMetricDifferent(expectedCountersAndTimers, - actualCountersAndTimers, - metricsDescriptor.findFieldByName("insert_new_count"), - lineNumber) | - isMetricDifferent(expectedCountersAndTimers, - actualCountersAndTimers, - metricsDescriptor.findFieldByName("insert_reused_count"), - lineNumber); + boolean different = false; + for (String fieldName : YamlExecutionContext.TRACKED_METRIC_FIELDS) { + // Check each metric. Do NOT short-circuit because we want to log any metrics + // that have changed (a side effect of isMetricDifferent) + different |= isMetricDifferent(expectedCountersAndTimers, actualCountersAndTimers, + metricsDescriptor.findFieldByName(fieldName), lineNumber); + } + return different; } private static boolean isMetricDifferent(@Nonnull final PlannerMetricsProto.CountersAndTimers expected, diff --git a/yaml-tests/src/test/java/com/apple/foundationdb/relational/yamltests/MetricsDiffAnalyzerTest.java b/yaml-tests/src/test/java/com/apple/foundationdb/relational/yamltests/MetricsDiffAnalyzerTest.java new file mode 100644 index 0000000000..1024b76d27 --- /dev/null +++ b/yaml-tests/src/test/java/com/apple/foundationdb/relational/yamltests/MetricsDiffAnalyzerTest.java @@ -0,0 +1,365 @@ +/* + * MetricsDiffAnalyzerTest.java + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2015-2025 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.apple.foundationdb.relational.yamltests; + +import com.apple.foundationdb.relational.yamltests.generated.stats.PlannerMetricsProto; +import com.google.common.collect.ImmutableMap; +import org.junit.jupiter.api.Test; + +import javax.annotation.Nonnull; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; +import java.util.Map; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.fail; + +class MetricsDiffAnalyzerTest { + + @Nonnull + private static final PlannerMetricsProto.CountersAndTimers BASE_DUMMY_COUNTERS = PlannerMetricsProto.CountersAndTimers.newBuilder() + .setTaskCount(10) + .setTaskTotalTimeNs(1000000L) + .setTransformCount(5) + .setTransformTimeNs(500000L) + .setTransformYieldCount(1) + .setInsertTimeNs(100000L) + .setInsertNewCount(2) + .setInsertReusedCount(3) + .build(); + + @Test + void testLoadMetricsFromYamlFile() throws Exception { + // Load test metrics file + final var testFile = getTestResourcePath("test-base.metrics.yaml"); + final var metrics = YamlExecutionContext.loadMetricsFromYamlFile(testFile); + + assertThat(metrics).hasSize(4); // 3 basic + 1 complex query + + // Verify one of the loaded metrics + final var basicIdentifier = PlannerMetricsProto.Identifier.newBuilder() + .setBlockName("basic_queries") + .setQuery("SELECT id FROM users WHERE id = ?") + .build(); + + assertThat(metrics).containsKey(basicIdentifier); + final var metricsInfo = metrics.get(basicIdentifier); + assertThat(metricsInfo.getExplain()).isEqualTo("Index(users_by_id [[?, ?]])"); + assertThat(metricsInfo.getCountersAndTimers().getTaskCount()).isEqualTo(10); + assertThat(metricsInfo.getCountersAndTimers().getTransformCount()).isEqualTo(5); + } + + @Nonnull + private MetricsDiffAnalyzer.MetricsAnalysisResult analyze(@Nonnull Map baseMetrics, + @Nonnull Map headMetrics) { + final var analyzer = new MetricsDiffAnalyzer("base", "head", Paths.get("."), null); + final var analysisBuilder = analyzer.newAnalysisBuilder(); + final var filePath = Paths.get("test.metrics.yaml"); + + analyzer.compareMetrics(baseMetrics, headMetrics, filePath, analysisBuilder); + return analysisBuilder.build(); + } + + @Nonnull + private Path getTestResourcePath(String filename) { + final var classLoader = Thread.currentThread().getContextClassLoader(); + final var resource = classLoader.getResource("metrics-diff/" + filename); + assertNotNull(resource, "Test resource not found: metrics-diff/" + filename); + return Paths.get(resource.getPath()); + } + + + @Test + void testCompareMetricsDetectsChanges() throws Exception { + // Load base and head metrics + final var baseMetrics = YamlExecutionContext.loadMetricsFromYamlFile( + getTestResourcePath("test-base.metrics.yaml")); + final var headMetrics = YamlExecutionContext.loadMetricsFromYamlFile( + getTestResourcePath("test-head.metrics.yaml")); + final MetricsDiffAnalyzer.MetricsAnalysisResult analysis = analyze(baseMetrics, headMetrics); + + // Verify we detect changes + assertThat(analysis.getNewQueries()).hasSize(1); // "SELECT email FROM users WHERE email = ?" + assertThat(analysis.getDroppedQueries()).isEmpty(); + assertThat(analysis.getPlanAndMetricsChanged()).hasSize(2); // two queries have plan change + assertThat(analysis.getMetricsOnlyChanged()).hasSize(1); // users query has only metrics changes + + // Verify specific changes + final var newQuery = analysis.getNewQueries().get(0); + assertThat(newQuery.identifier.getQuery()).contains("SELECT email FROM users WHERE email = ?"); + + for (MetricsDiffAnalyzer.QueryChange planChanged : analysis.getPlanAndMetricsChanged()) { + assertThat(planChanged.newInfo).isNotNull(); + assertThat(planChanged.oldInfo).isNotNull(); + final String query = planChanged.identifier.getQuery(); + final String newExplain = planChanged.newInfo.getExplain(); + final String oldExplain = planChanged.oldInfo.getExplain(); + + if (query.contains("SELECT * FROM orders WHERE customer_id = ?")) { + assertThat(newExplain).contains("Covering_Index"); + assertThat(oldExplain).contains("Index(orders_by_customer"); + } else if (query.contains("SELECT name FROM users WHERE name LIKE 'John%'")) { + assertThat(newExplain).contains("Index(users_by_name [[John, John~]]) [IMPROVED]"); + assertThat(oldExplain).contains("Index(users_by_name [[John, John~]])"); + } else { + fail("Unexpected query: " + query); + } + } + + final var metricsChanged = analysis.getMetricsOnlyChanged().get(0); + assertThat(metricsChanged.newInfo).isNotNull(); + assertThat(metricsChanged.oldInfo).isNotNull(); + assertThat(metricsChanged.newInfo.getExplain()) + .isEqualTo(metricsChanged.oldInfo.getExplain()) + .contains("GroupBy(Join(Index(users), Index(orders_by_customer)))"); + } + + @Test + void testStatisticalAnalysis() { + // Create test data with known statistical properties + final var baseMetrics = createTestMetricsWithStatistics(); + final var headMetrics = createModifiedTestMetrics(); + final MetricsDiffAnalyzer.MetricsAnalysisResult result = analyze(baseMetrics, headMetrics); + + // Generate report and verify statistical analysis + final String report = result.generateReport(); + + assertThat(report) + .contains("Statistical Summary") + .contains("Average change:") + .contains("Median change:") + .contains("Standard deviation:") + .contains("Range:") + .contains("Queries changed:"); + } + + @Test + void testReportGeneration() throws Exception { + // Load test metrics + final var baseMetrics = YamlExecutionContext.loadMetricsFromYamlFile( + getTestResourcePath("test-base.metrics.yaml")); + final var headMetrics = YamlExecutionContext.loadMetricsFromYamlFile( + getTestResourcePath("test-head.metrics.yaml")); + final MetricsDiffAnalyzer.MetricsAnalysisResult analysis = analyze(baseMetrics, headMetrics); + + final String report = analysis.generateReport(); + + // Verify report structure + assertThat(report) + .contains("# 📊 Metrics Diff Analysis Report") + .contains("## Summary") + .contains("- New queries: 1") + .contains("- Dropped queries: 0") + .contains("- Plan changed + metrics changed: 2") + .contains("- Plan unchanged + metrics changed: 1") + .contains("## New Queries") + .contains("## Plan and Metrics Changed") + .contains("## Only Metrics Changed"); + } + + @Test + void testOutlierDetection() { + // Create metrics with one clear outlier + final var baseMetrics = createMetricsWithOutliers(); + final var headMetrics = createMetricsWithOutlierChanges(); + final MetricsDiffAnalyzer.MetricsAnalysisResult analysis = analyze(baseMetrics, headMetrics); + final List outliers = analysis.findAllOutliers(); + assertThat(outliers) + .as("should have found a single outlier") + .hasSize(1); + final MetricsDiffAnalyzer.QueryChange expected = outliers.get(0); + assertThat(expected.newInfo) + .isNotNull(); + + final String report = analysis.generateReport(); + + // Should report the outlier in the larger report + assertThat(report) + .contains("- Plan unchanged + metrics changed: 11") + .contains("Significant Regressions (Only Metrics Changed)") + .contains("outlier_query"); + + // Should also be able to see the query in the outlier query report + final String outlierReport = analysis.generateOutlierQueryReport(); + assertThat(outlierReport).isNotEmpty(); + String[] outlierText = outlierReport.split("\n\n"); + assertThat(outlierText) + .hasSize(1); + assertThat(outlierText[0]) + .contains(expected.identifier.getQuery()) + .contains("" + expected.newInfo.getLineNumber()); + } + + @Nonnull + private Map createTestMetricsWithStatistics() { + // Create metrics with predictable statistical properties + final var builder = ImmutableMap.builder(); + + // Add several queries with varying metrics for statistics + for (int i = 1; i <= 5; i++) { + final var identifier = PlannerMetricsProto.Identifier.newBuilder() + .setBlockName("test_block") + .setQuery("SELECT * FROM table" + i + " WHERE id = ?") + .build(); + + final var countersAndTimers = PlannerMetricsProto.CountersAndTimers.newBuilder() + .setTaskCount(10 * i) + .setTaskTotalTimeNs(1000000L * i) + .setTransformCount(5 * i) + .setTransformTimeNs(500000L * i) + .setTransformYieldCount(i) + .setInsertTimeNs(100000L * i) + .setInsertNewCount(2 * i) + .setInsertReusedCount(3 * i) + .build(); + + final var info = PlannerMetricsProto.Info.newBuilder() + .setExplain("Index(table" + i + "_by_id [[?, ?]])") + .setCountersAndTimers(countersAndTimers) + .build(); + + builder.put(identifier, new MetricsInfo(info, Paths.get("test.yaml"), i)); + } + + return builder.build(); + } + + @Nonnull + private Map createModifiedTestMetrics() { + // Create modified versions with predictable changes + final var builder = ImmutableMap.builder(); + + for (int i = 1; i <= 5; i++) { + final var identifier = PlannerMetricsProto.Identifier.newBuilder() + .setBlockName("test_block") + .setQuery("SELECT * FROM table" + i + " WHERE id = ?") + .build(); + + // Add consistent changes: +2 to task_count, +1 to transform_count + final var countersAndTimers = PlannerMetricsProto.CountersAndTimers.newBuilder() + .setTaskCount(10 * i + 2) + .setTaskTotalTimeNs(1000000L * i) + .setTransformCount(5 * i + 1) + .setTransformTimeNs(500000L * i) + .setTransformYieldCount(i) + .setInsertTimeNs(100000L * i) + .setInsertNewCount(2 * i) + .setInsertReusedCount(3 * i) + .build(); + + final var info = PlannerMetricsProto.Info.newBuilder() + .setExplain("Index(table" + i + "_by_id [[?, ?]])") + .setCountersAndTimers(countersAndTimers) + .build(); + + builder.put(identifier, new MetricsInfo(info, Paths.get("test.yaml"), i)); + } + + return builder.build(); + } + + @Nonnull + private Map createMetricsWithOutliers() { + final var builder = ImmutableMap.builder(); + + // Add normal queries + for (int i = 1; i <= 10; i++) { + final var identifier = PlannerMetricsProto.Identifier.newBuilder() + .setBlockName("test_block") + .setQuery("normal_query_" + i) + .build(); + + final var info = PlannerMetricsProto.Info.newBuilder() + .setExplain("Index(normal_index)") + .setCountersAndTimers(BASE_DUMMY_COUNTERS) + .build(); + + builder.put(identifier, new MetricsInfo(info, Paths.get("test.yaml"), i)); + } + + // Add outlier query with much different metrics + final var outlierIdentifier = PlannerMetricsProto.Identifier.newBuilder() + .setBlockName("test_block") + .setQuery("outlier_query") + .build(); + + final var outlierCounters = BASE_DUMMY_COUNTERS.toBuilder() + .setTaskCount(BASE_DUMMY_COUNTERS.getTaskCount() * 10) // Much higher than normal + .setTransformCount(BASE_DUMMY_COUNTERS.getTransformCount() + 100) // Much higher than normal + .build(); + + final var outlierInfo = PlannerMetricsProto.Info.newBuilder() + .setExplain("Index(outlier_index)") + .setCountersAndTimers(outlierCounters) + .build(); + + builder.put(outlierIdentifier, new MetricsInfo(outlierInfo, Paths.get("test.yaml"), 4)); + + return builder.build(); + } + + @Nonnull + private Map createMetricsWithOutlierChanges() { + final var builder = ImmutableMap.builder(); + + // Normal queries with small changes + final PlannerMetricsProto.CountersAndTimers normalIncreaseCounters = BASE_DUMMY_COUNTERS.toBuilder() + .setTaskCount(BASE_DUMMY_COUNTERS.getTaskCount() + 1) + .setTransformCount(BASE_DUMMY_COUNTERS.getTransformCount() + 1) + .build(); + + for (int i = 1; i <= 10; i++) { + final var identifier = PlannerMetricsProto.Identifier.newBuilder() + .setBlockName("test_block") + .setQuery("normal_query_" + i) + .build(); + + final var info = PlannerMetricsProto.Info.newBuilder() + .setExplain("Index(normal_index)") + .setCountersAndTimers(normalIncreaseCounters) + .build(); + + builder.put(identifier, new MetricsInfo(info, Paths.get("test.yaml"), i)); + } + + // Outlier query with huge changes + final var outlierIdentifier = PlannerMetricsProto.Identifier.newBuilder() + .setBlockName("test_block") + .setQuery("outlier_query") + .build(); + + final var outlierCounters = BASE_DUMMY_COUNTERS.toBuilder() + .setTaskCount(BASE_DUMMY_COUNTERS.getTaskCount() * 10 + 100) // +100 change (much larger than others) + .setTransformCount(BASE_DUMMY_COUNTERS.getTransformCount() + 50) // -50 change (much larger than others) + .build(); + + final var outlierInfo = PlannerMetricsProto.Info.newBuilder() + .setExplain("Index(outlier_index)") + .setCountersAndTimers(outlierCounters) + .build(); + + builder.put(outlierIdentifier, new MetricsInfo(outlierInfo, Paths.get("test.yaml"), 4)); + + return builder.build(); + } +} diff --git a/yaml-tests/src/test/java/com/apple/foundationdb/relational/yamltests/MetricsDiffIntegrationTest.java b/yaml-tests/src/test/java/com/apple/foundationdb/relational/yamltests/MetricsDiffIntegrationTest.java new file mode 100644 index 0000000000..9341340fbc --- /dev/null +++ b/yaml-tests/src/test/java/com/apple/foundationdb/relational/yamltests/MetricsDiffIntegrationTest.java @@ -0,0 +1,184 @@ +/* + * MetricsDiffIntegrationTest.java + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2015-2025 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.apple.foundationdb.relational.yamltests; + +import com.apple.foundationdb.relational.api.exceptions.RelationalException; +import org.junit.jupiter.api.Test; + +import javax.annotation.Nonnull; +import java.nio.file.Path; +import java.nio.file.Paths; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +/** + * Integration tests for the complete metrics diff analysis workflow. + */ +class MetricsDiffIntegrationTest { + + @Test + void testCompleteWorkflowWithStatistics() throws Exception { + final MetricsDiffAnalyzer.MetricsAnalysisResult analysis = analyze( + "metrics-diff-test-base.metrics.yaml", + "metrics-diff-test-head.metrics.yaml"); + + // Verify the analysis detected expected changes + assertThat(analysis.getNewQueries()).hasSize(1); // table4 is new + assertThat(analysis.getDroppedQueries()).isEmpty(); + assertThat(analysis.getPlanAndMetricsChanged()).isEmpty(); // no plan changes + assertThat(analysis.getMetricsOnlyChanged()).hasSize(3); // table1, table2, table3 have metrics changes + + // Generate and verify report + final var report = analysis.generateReport(); + assertThat(report) + .contains("# 📊 Metrics Diff Analysis Report") + .contains("## Summary") + .contains("- New queries: 1") + .contains("test.metrics.yaml: 1") + .contains("- Dropped queries: 0") + .contains("- Plan changed + metrics changed: 0") + .contains("- Plan unchanged + metrics changed: 3") + // Verify statistical analysis + .contains("## Only Metrics Changed") + .contains("### Statistical Summary (Only Metrics Changed)") + .contains("**`task_count`**:") + .contains("Average change: +2.0") // All queries have +2 task_count + .contains("**`transform_count`**:") + .contains("Average change: +1.0"); // All queries have +1 transform_count + } + + @Test + void testCompleteWorkflowBackwards() throws Exception { + final MetricsDiffAnalyzer.MetricsAnalysisResult analysis = analyze( + "metrics-diff-test-head.metrics.yaml", + "metrics-diff-test-base.metrics.yaml"); + + // Verify the analysis detected expected changes + assertThat(analysis.getNewQueries()).isEmpty(); + assertThat(analysis.getDroppedQueries()).hasSize(1); + assertThat(analysis.getPlanAndMetricsChanged()).isEmpty(); // no plan changes + assertThat(analysis.getMetricsOnlyChanged()).hasSize(3); // table1, table2, table3 have metrics changes + + // Generate and verify report + final var report = analysis.generateReport(); + assertThat(report) + .contains("# 📊 Metrics Diff Analysis Report") + .contains("## Summary") + .contains("- New queries: 0") + .contains("- Dropped queries: 1") + .contains("[test.metrics.yaml:35](https://example.com/repo/blob/base/test.metrics.yaml#L35): `SELECT * FROM table4`") + .contains("- Plan changed + metrics changed: 0") + .contains("- Plan unchanged + metrics changed: 3") + // Verify statistical analysis + .contains("## Only Metrics Changed") + .contains("### Statistical Summary (Only Metrics Changed)") + .contains("**`task_count`**:") + .contains("Average change: -2.0") // All queries have -2 task_count + .contains("**`transform_count`**:") + .contains("Average change: -1.0"); // All queries have -1 transform_count + } + + @Test + void testOutlierDetection() throws Exception { + // Load test files with outlier case + final MetricsDiffAnalyzer.MetricsAnalysisResult analysis = analyze( + "metrics-outlier-test-base.metrics.yaml", + "metrics-outlier-test-head.metrics.yaml"); + + final var report = analysis.generateReport(); + + assertThat(report) + // Should detect the outlier query + .contains("### Significant Regressions (Only Metrics Changed)") + .contains("outlier_query") + .contains("[test.metrics.yaml:145](https://example.com/repo/blob/head/test.metrics.yaml#L145): `outlier_query`") + // Normal queries should have small changes (+1), outlier should have large changes (+100, +50) + .contains("`task_count`: 100 -> 200 (+100)") + .contains("`transform_count`: 50 -> 100 (+50)"); + } + + @Test + void testReportFormatting() throws Exception { + final MetricsDiffAnalyzer.MetricsAnalysisResult analysis = analyze( + "test-base.metrics.yaml", + "test-head.metrics.yaml"); + + final var report = analysis.generateReport(); + + assertThat(report) + // Verify markdown formatting + .contains("# 📊 Metrics Diff Analysis Report") + .contains("## Summary") + .contains("## New Queries") + .contains("## Plan and Metrics Changed") + .contains("## Only Metrics Changed") + // Verify statistical formatting patterns + .containsPattern("\\*\\*`\\w+`\\*\\*:") + .containsPattern("- Average change: [+-]?\\d+\\.\\d+") + .containsPattern("- Average regression: \\+?\\d+\\.\\d+") + .containsPattern("- Median change: [+-]?\\d+") + .containsPattern("- Median regression: \\+?\\d+") + .containsPattern("- Standard deviation: \\d+\\.\\d+") + .containsPattern("- Standard deviation of regressions: \\d+\\.\\d+") + .containsPattern("- Range: [+-]?\\d+ to [+-]?\\d+") + .containsPattern("- Range of regressions: \\+?\\d+ to \\+?\\d+") + .containsPattern("- Queries changed: \\d+") + .containsPattern("- Queries regressed: \\d+"); + } + + @Test + void testEmptyResultsReporting() throws Exception { + // Test with identical files (no changes) + final MetricsDiffAnalyzer.MetricsAnalysisResult analysis = analyze("test-base.metrics.yaml", "test-base.metrics.yaml"); + final var report = analysis.generateReport(); + + // Should show no changes + assertThat(report) + .contains("- New queries: 0") + .contains("- Dropped queries: 0") + .contains("- Plan changed + metrics changed: 0") + .contains("- Plan unchanged + metrics changed: 0"); + } + + @Nonnull + private static MetricsDiffAnalyzer.MetricsAnalysisResult analyze(@Nonnull String baseResource, @Nonnull String headResource) throws RelationalException { + // Load test resources + final var baseMetrics = YamlExecutionContext.loadMetricsFromYamlFile( + getTestResourcePath(baseResource)); + final var headMetrics = YamlExecutionContext.loadMetricsFromYamlFile( + getTestResourcePath(headResource)); + final Path basePath = Paths.get("."); + + final MetricsDiffAnalyzer analyzer = new MetricsDiffAnalyzer("base", "head", basePath, "https://example.com/repo/blob"); + final MetricsDiffAnalyzer.MetricsAnalysisResult.Builder analysisBuilder = analyzer.newAnalysisBuilder(); + analyzer.compareMetrics(baseMetrics, headMetrics, Paths.get("test.metrics.yaml"), analysisBuilder); + return analysisBuilder.build(); + } + + @Nonnull + private static Path getTestResourcePath(String filename) { + final var classLoader = Thread.currentThread().getContextClassLoader(); + final var resource = classLoader.getResource("metrics-diff/" + filename); + assertNotNull(resource, "Test resource not found: metrics-diff/" + filename); + return Paths.get(resource.getPath()); + } +} diff --git a/yaml-tests/src/test/java/com/apple/foundationdb/relational/yamltests/MetricsStatisticsTest.java b/yaml-tests/src/test/java/com/apple/foundationdb/relational/yamltests/MetricsStatisticsTest.java new file mode 100644 index 0000000000..ea78eb1660 --- /dev/null +++ b/yaml-tests/src/test/java/com/apple/foundationdb/relational/yamltests/MetricsStatisticsTest.java @@ -0,0 +1,230 @@ +/* + * MetricsStatisticsTest.java + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2015-2025 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.apple.foundationdb.relational.yamltests; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.data.Offset.offset; + +class MetricsStatisticsTest { + + @Test + void testEmptyStatistics() { + final var stats = MetricsStatistics.newBuilder().build(); + final var fieldStats = stats.getFieldStatistics("task_count"); + + assertThat(fieldStats).isEqualTo(MetricsStatistics.FieldStatistics.EMPTY); + assertThat(fieldStats.hasChanges()).isFalse(); + assertThat(fieldStats.getChangedCount()).isZero(); + assertThat(fieldStats.getMean()).isZero(); + assertThat(fieldStats.getMedian()).isZero(); + assertThat(fieldStats.getQuantile(0.1)).isZero(); + assertThat(fieldStats.getStandardDeviation()).isZero(); + assertThat(fieldStats.getMin()).isZero(); + assertThat(fieldStats.getMax()).isZero(); + } + + @Test + void testSingleValueStatistics() { + final var stats = MetricsStatistics.newBuilder() + .addDifference("task_count", 10L, 20L) + .build(); + + final var fieldStats = stats.getFieldStatistics("task_count"); + + assertThat(fieldStats.hasChanges()).isTrue(); + assertThat(fieldStats.getChangedCount()).isOne(); + assertThat(fieldStats.getMean()).isEqualTo(10.0); + assertThat(fieldStats.getMedian()).isEqualTo(10); + assertThat(fieldStats.getStandardDeviation()).isZero(); // Single value has no deviation + assertThat(fieldStats.getQuantile(0.1)).isEqualTo(10L); + assertThat(fieldStats.getMin()).isEqualTo(10L); + assertThat(fieldStats.getMax()).isEqualTo(10L); + } + + @Test + void testMultipleValuesStatistics() { + // Add values: 2, 4, 6, 8, 10 + // Mean = 6.0, Median = 6.0, StdDev = sqrt(8) ≈ 2.83 + final var stats = MetricsStatistics.newBuilder() + .addDifference("task_count", 2L, 4L) + .addDifference("task_count", 4L, 8L) + .addDifference("task_count", 6L, 12L) + .addDifference("task_count", 8L, 16L) + .addDifference("task_count", 10L, 20L) + .build(); + + final var fieldStats = stats.getFieldStatistics("task_count"); + + assertThat(fieldStats.hasChanges()).isTrue(); + assertThat(fieldStats.getChangedCount()).isEqualTo(5); + assertThat(fieldStats.getMean()).isEqualTo(6.0); + assertThat(fieldStats.getMedian()).isEqualTo(6L); + assertThat(fieldStats.getStandardDeviation()).isCloseTo(2.83, offset(0.01)); + assertThat(fieldStats.getQuantile(0.2)).isEqualTo(4L); + assertThat(fieldStats.getMin()).isEqualTo(2L); + assertThat(fieldStats.getMax()).isEqualTo(10L); + } + + @Test + void testEvenNumberOfValuesMedian() { + // Add values: 1, 3, 5, 7 + // Median should be (3 + 5) / 2 = 4.0 + final var stats = MetricsStatistics.newBuilder() + .addDifference("task_count", 1L, 2L) + .addDifference("task_count", 3L, 6L) + .addDifference("task_count", 5L, 10L) + .addDifference("task_count", 7L, 14L) + .build(); + + final var fieldStats = stats.getFieldStatistics("task_count"); + + assertThat(fieldStats.getMedian()).isEqualTo(5L); + assertThat(fieldStats.getMean()).isEqualTo(4.0); + } + + @Test + void testNegativeValues() { + // Add both positive and negative values: -10, -5, 0, 5, 10 + // Mean = 0.0, Median = 0.0 + final var stats = MetricsStatistics.newBuilder() + .addDifference("task_count", 50L, 40L) + .addDifference("task_count", 40L, 35L) + .addDifference("task_count", 30L, 30L) + .addDifference("task_count", 20L, 25L) + .addDifference("task_count", 10L, 20L) + .build(); + + // For field statistics, the sign matters, and the positive and negative + // cancel out + final var fieldStats = stats.getFieldStatistics("task_count"); + assertThat(fieldStats.getChangedCount()).isEqualTo(5); + assertThat(fieldStats.getMean()).isZero(); + assertThat(fieldStats.getMedian()).isZero(); + assertThat(fieldStats.getMin()).isEqualTo(-10L); + assertThat(fieldStats.getMax()).isEqualTo(10L); + + // For regressions, we look only at positive changes, ignoring any negative + // or zero values + final var regressionStats = stats.getRegressionStatistics("task_count"); + assertThat(regressionStats.getChangedCount()).isEqualTo(2); + assertThat(regressionStats.getMean()).isCloseTo(7.5, offset(0.01)); + assertThat(regressionStats.getMedian()).isEqualTo(10L); + assertThat(regressionStats.getMin()).isEqualTo(5L); + assertThat(regressionStats.getMax()).isEqualTo(10L); + } + + @Test + void testMultipleFields() { + final var builder = new MetricsStatistics.Builder(); + + // Add different values for different fields + builder.addDifference("task_count", 10L, 20L); + builder.addDifference("task_count", 10L, 30L); + + builder.addDifference("transform_count", 5L, 10L); + builder.addDifference("transform_count", 5L, 20L); + builder.addDifference("transform_count", 5L, 30L); + + final var stats = builder.build(); + + // Check task_count statistics + final var taskStats = stats.getFieldStatistics("task_count"); + assertThat(taskStats.getChangedCount()).isEqualTo(2); + assertThat(taskStats.getMean()).isCloseTo(15.0, offset(0.01)); + assertThat(taskStats.getMedian()).isEqualTo(20L); + + // Check transform_count statistics + final var transformStats = stats.getFieldStatistics("transform_count"); + assertThat(transformStats.getChangedCount()).isEqualTo(3); + assertThat(transformStats.getMean()).isCloseTo(15.0, offset(0.01)); + assertThat(transformStats.getMedian()).isEqualTo(15L); + + // Check non-existent field + final var emptyStats = stats.getFieldStatistics("non_existent"); + assertThat(emptyStats).isEqualTo(MetricsStatistics.FieldStatistics.EMPTY); + } + + @Test + void testLargeDataset() { + final var builder = new MetricsStatistics.Builder(); + + // Add 1000 values from 1 to 1000 + for (long i = 1; i <= 1000; i++) { + builder.addDifference("task_count", 10L, 10L + i); + } + + final var stats = builder.build(); + final var fieldStats = stats.getFieldStatistics("task_count"); + + assertThat(fieldStats.getChangedCount()).isEqualTo(1000); + assertThat(fieldStats.getMean()).isCloseTo(500.5, offset(0.1)); + assertThat(fieldStats.getMedian()).isEqualTo(501L); + assertThat(fieldStats.getMin()).isOne(); + assertThat(fieldStats.getMax()).isEqualTo(1000); + + // Standard deviation for 1 to 1000 should be approximately 288.7 + assertThat(fieldStats.getStandardDeviation()).isCloseTo(288.7, offset(1.0)); + } + + @Test + void testDuplicateValues() { + final var builder = new MetricsStatistics.Builder(); + + // Add same value multiple times + builder.addDifference("task_count", 10L, 15L); + builder.addDifference("task_count", 20L, 25L); + builder.addDifference("task_count", 30L, 35L); + builder.addDifference("task_count", 40L, 50L); + builder.addDifference("task_count", 50L, 60L); + + final var stats = builder.build(); + final var fieldStats = stats.getFieldStatistics("task_count"); + + assertThat(fieldStats.getChangedCount()).isEqualTo(5); + assertThat(fieldStats.getMean()).isCloseTo(7.0, offset(0.01)); // (5+5+5+10+10)/5 = 7 + assertThat(fieldStats.getMedian()).isEqualTo(5L); // Middle value when sorted: [5,5,5,10,10] + assertThat(fieldStats.getQuantile(0.55)).isEqualTo(5L); + assertThat(fieldStats.getQuantile(0.65)).isEqualTo(10L); + assertThat(fieldStats.getMin()).isEqualTo(5L); + assertThat(fieldStats.getMax()).isEqualTo(10L); + } + + @Test + void testZeroValues() { + final var builder = new MetricsStatistics.Builder(); + + // Add zeros and non-zeros + builder.addDifference("task_count", 0L, 0L); + builder.addDifference("task_count", 0L, 1L); + builder.addDifference("task_count", 1L, 2L); + + final var stats = builder.build(); + final var fieldStats = stats.getFieldStatistics("task_count"); + + assertThat(fieldStats.getChangedCount()).isEqualTo(3); + assertThat(fieldStats.getMean()).isCloseTo(0.67, offset(0.01)); + assertThat(fieldStats.getMedian()).isOne(); + assertThat(fieldStats.getMin()).isZero(); + assertThat(fieldStats.getMax()).isOne(); + } +} diff --git a/yaml-tests/src/test/resources/metrics-diff/metrics-diff-test-base.metrics.yaml b/yaml-tests/src/test/resources/metrics-diff/metrics-diff-test-base.metrics.yaml new file mode 100644 index 0000000000..d548fd5dc3 --- /dev/null +++ b/yaml-tests/src/test/resources/metrics-diff/metrics-diff-test-base.metrics.yaml @@ -0,0 +1,34 @@ +statistics_test: + - query: "SELECT * FROM table1" + setup: [] + explain: "Index(table1_idx)" + task_count: 10 + task_total_time_ms: 5 + transform_count: 5 + transform_time_ms: 2 + transform_yield_count: 0 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "SELECT * FROM table2" + setup: [] + explain: "Index(table2_idx)" + task_count: 20 + task_total_time_ms: 10 + transform_count: 10 + transform_time_ms: 5 + transform_yield_count: 0 + insert_time_ms: 2 + insert_new_count: 4 + insert_reused_count: 6 + - query: "SELECT * FROM table3" + setup: [] + explain: "Index(table3_idx)" + task_count: 30 + task_total_time_ms: 15 + transform_count: 15 + transform_time_ms: 7 + transform_yield_count: 0 + insert_time_ms: 3 + insert_new_count: 6 + insert_reused_count: 9 diff --git a/yaml-tests/src/test/resources/metrics-diff/metrics-diff-test-head.metrics.yaml b/yaml-tests/src/test/resources/metrics-diff/metrics-diff-test-head.metrics.yaml new file mode 100644 index 0000000000..aa5ed60cc4 --- /dev/null +++ b/yaml-tests/src/test/resources/metrics-diff/metrics-diff-test-head.metrics.yaml @@ -0,0 +1,45 @@ +statistics_test: + - query: "SELECT * FROM table1" + setup: [] + explain: "Index(table1_idx)" + task_count: 12 + task_total_time_ms: 5 + transform_count: 6 + transform_time_ms: 2 + transform_yield_count: 0 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "SELECT * FROM table2" + setup: [] + explain: "Index(table2_idx)" + task_count: 22 + task_total_time_ms: 10 + transform_count: 11 + transform_time_ms: 5 + transform_yield_count: 0 + insert_time_ms: 2 + insert_new_count: 4 + insert_reused_count: 6 + - query: "SELECT * FROM table3" + setup: [] + explain: "Index(table3_idx)" + task_count: 32 + task_total_time_ms: 15 + transform_count: 16 + transform_time_ms: 7 + transform_yield_count: 0 + insert_time_ms: 3 + insert_new_count: 6 + insert_reused_count: 9 + - query: "SELECT * FROM table4" + setup: [] + explain: "Index(table4_idx)" + task_count: 40 + task_total_time_ms: 20 + transform_count: 20 + transform_time_ms: 10 + transform_yield_count: 0 + insert_time_ms: 4 + insert_new_count: 8 + insert_reused_count: 12 diff --git a/yaml-tests/src/test/resources/metrics-diff/metrics-outlier-test-base.metrics.yaml b/yaml-tests/src/test/resources/metrics-diff/metrics-outlier-test-base.metrics.yaml new file mode 100644 index 0000000000..076f4a4bcb --- /dev/null +++ b/yaml-tests/src/test/resources/metrics-diff/metrics-outlier-test-base.metrics.yaml @@ -0,0 +1,155 @@ +outlier_test: + - query: "normal_query_1" + setup: [] + explain: "Index(normal_index)" + task_count: 10 + task_total_time_ms: 5 + transform_count: 5 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "normal_query_2" + setup: [] + explain: "Index(normal_index)" + task_count: 10 + task_total_time_ms: 5 + transform_count: 5 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "normal_query_3" + setup: [] + explain: "Index(normal_index)" + task_count: 10 + task_total_time_ms: 5 + transform_count: 5 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "normal_query_4" + setup: [] + explain: "Index(normal_index)" + task_count: 10 + task_total_time_ms: 5 + transform_count: 5 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "normal_query_5" + setup: [] + explain: "Index(normal_index)" + task_count: 10 + task_total_time_ms: 5 + transform_count: 5 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "normal_query_6" + setup: [] + explain: "Index(normal_index)" + task_count: 10 + task_total_time_ms: 5 + transform_count: 5 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "normal_query_7" + setup: [] + explain: "Index(normal_index)" + task_count: 10 + task_total_time_ms: 5 + transform_count: 5 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "normal_query_8" + setup: [] + explain: "Index(normal_index)" + task_count: 10 + task_total_time_ms: 5 + transform_count: 5 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "normal_query_9" + setup: [] + explain: "Index(normal_index)" + task_count: 10 + task_total_time_ms: 5 + transform_count: 5 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "normal_query_10" + setup: [] + explain: "Index(normal_index)" + task_count: 10 + task_total_time_ms: 5 + transform_count: 5 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "normal_query_11" + setup: [] + explain: "Index(normal_index)" + task_count: 10 + task_total_time_ms: 5 + transform_count: 5 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "normal_query_12" + setup: [] + explain: "Index(normal_index)" + task_count: 10 + task_total_time_ms: 5 + transform_count: 5 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "normal_query_13" + setup: [] + explain: "Index(normal_index)" + task_count: 10 + task_total_time_ms: 5 + transform_count: 5 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "outlier_query" + setup: [] + explain: "Index(outlier_index)" + task_count: 100 + task_total_time_ms: 50 + transform_count: 50 + transform_time_ms: 25 + transform_yield_count: 1 + insert_time_ms: 10 + insert_new_count: 2 + insert_reused_count: 3 diff --git a/yaml-tests/src/test/resources/metrics-diff/metrics-outlier-test-head.metrics.yaml b/yaml-tests/src/test/resources/metrics-diff/metrics-outlier-test-head.metrics.yaml new file mode 100644 index 0000000000..f7bfd5aa8c --- /dev/null +++ b/yaml-tests/src/test/resources/metrics-diff/metrics-outlier-test-head.metrics.yaml @@ -0,0 +1,155 @@ +outlier_test: + - query: "normal_query_1" + setup: [] + explain: "Index(normal_index)" + task_count: 11 + task_total_time_ms: 5 + transform_count: 6 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "normal_query_2" + setup: [] + explain: "Index(normal_index)" + task_count: 11 + task_total_time_ms: 5 + transform_count: 6 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "normal_query_3" + setup: [] + explain: "Index(normal_index)" + task_count: 11 + task_total_time_ms: 5 + transform_count: 6 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "normal_query_4" + setup: [] + explain: "Index(normal_index)" + task_count: 11 + task_total_time_ms: 5 + transform_count: 6 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "normal_query_5" + setup: [] + explain: "Index(normal_index)" + task_count: 11 + task_total_time_ms: 5 + transform_count: 6 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "normal_query_6" + setup: [] + explain: "Index(normal_index)" + task_count: 11 + task_total_time_ms: 5 + transform_count: 6 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "normal_query_7" + setup: [] + explain: "Index(normal_index)" + task_count: 11 + task_total_time_ms: 5 + transform_count: 6 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "normal_query_8" + setup: [] + explain: "Index(normal_index)" + task_count: 11 + task_total_time_ms: 5 + transform_count: 6 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "normal_query_9" + setup: [] + explain: "Index(normal_index)" + task_count: 11 + task_total_time_ms: 5 + transform_count: 6 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "normal_query_10" + setup: [] + explain: "Index(normal_index)" + task_count: 11 + task_total_time_ms: 5 + transform_count: 6 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "normal_query_11" + setup: [] + explain: "Index(normal_index)" + task_count: 11 + task_total_time_ms: 5 + transform_count: 6 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "normal_query_12" + setup: [] + explain: "Index(normal_index)" + task_count: 11 + task_total_time_ms: 5 + transform_count: 6 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "normal_query_13" + setup: [] + explain: "Index(normal_index)" + task_count: 11 + task_total_time_ms: 5 + transform_count: 6 + transform_time_ms: 2 + transform_yield_count: 1 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "outlier_query" + setup: [] + explain: "Index(outlier_index)" + task_count: 200 + task_total_time_ms: 100 + transform_count: 100 + transform_time_ms: 50 + transform_yield_count: 1 + insert_time_ms: 20 + insert_new_count: 2 + insert_reused_count: 3 diff --git a/yaml-tests/src/test/resources/metrics-diff/test-base.metrics.yaml b/yaml-tests/src/test/resources/metrics-diff/test-base.metrics.yaml new file mode 100644 index 0000000000..8393f2e965 --- /dev/null +++ b/yaml-tests/src/test/resources/metrics-diff/test-base.metrics.yaml @@ -0,0 +1,47 @@ +basic_queries: + - query: "SELECT id FROM users WHERE id = ?" + setup: [] + explain: "Index(users_by_id [[?, ?]])" + task_count: 10 + task_total_time_ms: 5 + transform_count: 5 + transform_time_ms: 2 + transform_yield_count: 0 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "SELECT name FROM users WHERE name LIKE 'John%'" + setup: [] + explain: "Index(users_by_name [[John, John~]])" + task_count: 20 + task_total_time_ms: 10 + transform_count: 15 + transform_time_ms: 8 + transform_yield_count: 2 + insert_time_ms: 3 + insert_new_count: 8 + insert_reused_count: 7 + - query: "SELECT * FROM orders WHERE customer_id = ?" + setup: [] + explain: "Index(orders_by_customer [[?, ?]])" + task_count: 15 + task_total_time_ms: 7 + transform_count: 12 + transform_time_ms: 5 + transform_yield_count: 1 + insert_time_ms: 2 + insert_new_count: 5 + insert_reused_count: 7 + +complex_queries: + - query: "SELECT u.name, COUNT(*) FROM users u JOIN orders o ON u.id = o.customer_id GROUP BY u.name" + setup: [] + explain: "GroupBy(Join(Index(users), Index(orders_by_customer)))" + task_count: 50 + task_total_time_ms: 25 + transform_count: 30 + transform_time_ms: 15 + transform_yield_count: 5 + insert_time_ms: 8 + insert_new_count: 15 + insert_reused_count: 20 diff --git a/yaml-tests/src/test/resources/metrics-diff/test-head.metrics.yaml b/yaml-tests/src/test/resources/metrics-diff/test-head.metrics.yaml new file mode 100644 index 0000000000..3474837fae --- /dev/null +++ b/yaml-tests/src/test/resources/metrics-diff/test-head.metrics.yaml @@ -0,0 +1,58 @@ +basic_queries: + - query: "SELECT id FROM users WHERE id = ?" + setup: [] + explain: "Index(users_by_id [[?, ?]])" + task_count: 10 + task_total_time_ms: 5 + transform_count: 5 + transform_time_ms: 2 + transform_yield_count: 0 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 3 + - query: "SELECT name FROM users WHERE name LIKE 'John%'" + setup: [] + explain: "Index(users_by_name [[John, John~]]) [IMPROVED]" + task_count: 18 + task_total_time_ms: 9 + transform_count: 12 + transform_time_ms: 6 + transform_yield_count: 1 + insert_time_ms: 2 + insert_new_count: 6 + insert_reused_count: 6 + - query: "SELECT * FROM orders WHERE customer_id = ?" + setup: [] + explain: "Covering_Index(orders_by_customer_with_details [[?, ?]])" + task_count: 12 + task_total_time_ms: 6 + transform_count: 8 + transform_time_ms: 4 + transform_yield_count: 0 + insert_time_ms: 1 + insert_new_count: 3 + insert_reused_count: 5 + - query: "SELECT email FROM users WHERE email = ?" + setup: [] + explain: "Index(users_by_email [[?, ?]])" + task_count: 8 + task_total_time_ms: 4 + transform_count: 6 + transform_time_ms: 2 + transform_yield_count: 0 + insert_time_ms: 1 + insert_new_count: 2 + insert_reused_count: 4 + +complex_queries: + - query: "SELECT u.name, COUNT(*) FROM users u JOIN orders o ON u.id = o.customer_id GROUP BY u.name" + setup: [] + explain: "GroupBy(Join(Index(users), Index(orders_by_customer)))" + task_count: 52 + task_total_time_ms: 26 + transform_count: 32 + transform_time_ms: 16 + transform_yield_count: 6 + insert_time_ms: 9 + insert_new_count: 17 + insert_reused_count: 22 diff --git a/yaml-tests/src/test/resources/metrics-diff/test-new-queries.metrics.yaml b/yaml-tests/src/test/resources/metrics-diff/test-new-queries.metrics.yaml new file mode 100644 index 0000000000..99236bb350 --- /dev/null +++ b/yaml-tests/src/test/resources/metrics-diff/test-new-queries.metrics.yaml @@ -0,0 +1,12 @@ +simple_queries: + - query: "SELECT * FROM products WHERE price > 100" + setup: [] + explain: "Index(products_by_price [[100, ∞]])" + task_count: 25 + task_total_time_ms: 12 + transform_count: 20 + transform_time_ms: 10 + transform_yield_count: 3 + insert_time_ms: 4 + insert_new_count: 10 + insert_reused_count: 15 diff --git a/yaml-tests/yaml-tests.gradle b/yaml-tests/yaml-tests.gradle index 66b1abb4bf..8ed4e50c92 100644 --- a/yaml-tests/yaml-tests.gradle +++ b/yaml-tests/yaml-tests.gradle @@ -59,6 +59,7 @@ dependencies { implementation(libs.protobuf) implementation(libs.protobuf.util) implementation(libs.snakeyaml) + implementation(libs.jcommander) testImplementation(libs.bundles.test.impl) testRuntimeOnly(libs.bundles.test.runtime) @@ -234,6 +235,44 @@ tasks.register('runDebug', JavaExec) { systemProperty("tests.runQuick", "true") } +tasks.register('analyzeMetrics', JavaExec) { + description = 'Analyze metrics differences between git references' + group = 'verification' + + mainClass = 'com.apple.foundationdb.relational.yamltests.MetricsDiffAnalyzer' + classpath = sourceSets.main.runtimeClasspath + + // Pass along arguments to metrics analysis + args = [] + if (project.hasProperty('metricsAnalysis.baseRef')) { + args += ['--base-ref', project.getProperty('metricsAnalysis.baseRef')] + } + if (project.hasProperty('metricsAnalysis.headRef')) { + args += ['--head-ref', project.getProperty('metricsAnalysis.headRef')] + } + if (project.hasProperty('metricsAnalysis.urlBase')) { + args += ['--url-base', project.getProperty('metricsAnalysis.urlBase')] + } + if (project.hasProperty('metricsAnalysis.output')) { + args += ['--output', project.getProperty('metricsAnalysis.output')] + } + if (project.hasProperty('metricsAnalysis.outlierQueries')) { + args += ['--outlier-queries', project.getProperty('metricsAnalysis.outlierQueries')] + } + args += ['--repository-root'] + if (project.hasProperty('metricsAnalysis.repositoryRoot')) { + args += [project.getProperty('metricsAnalysis.repositoryRoot')] + } else { + args += [rootProject.projectDir.toString()] + } + + // Make sure classes are built first + dependsOn classes + doFirst { + logger.info("Running metrics analysis with args: ${args}") + } +} + publishing { publications { library(MavenPublication) {