Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 24 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,14 @@ Commands:
### `generate-hashes` command

```terminal
Usage: bazel-diff generate-hashes [-hkvV] -b=<bazelPath> [-s=<seedFilepaths>]
-w=<workspacePath>
Usage: bazel-diff generate-hashes [-hkvV] [--[no-]useCquery] [-b=<bazelPath>]
[--contentHashPath=<contentHashPath>]
[-s=<seedFilepaths>] -w=<workspacePath>
[-co=<bazelCommandOptions>]...
[--cqueryCommandOptions=<cqueryCommandOptions>
]...
[--fineGrainedHashExternalRepos=<fineGrainedHa
shExternalRepos>]...
[-so=<bazelStartupOptions>]... <outputPath>
Writes to a file the SHA256 hashes for each Bazel Target in the provided
workspace.
Expand All @@ -95,12 +100,16 @@ workspace.
binary available in PATH will be used.
-co, --bazelCommandOptions=<bazelCommandOptions>
Additional space separated Bazel command options used
when invoking Bazel
when invoking `bazel query`
--contentHashPath=<contentHashPath>
Path to content hash json file. It's a map which maps
relative file path from workspace path to its
content hash. Files in this map will skip content
hashing and use provided value
--cqueryCommandOptions=<cqueryCommandOptions>
Additional space separated Bazel command options used
when invoking `bazel cquery`. This flag is has no
effect if `--useCquery`is false.
--fineGrainedHashExternalRepos=<fineGrainedHashExternalRepos>
Comma separate list of external repos in which
fine-grained hashes are computed for the targets.
Expand All @@ -124,12 +133,24 @@ workspace.
-so, --bazelStartupOptions=<bazelStartupOptions>
Additional space separated Bazel client startup
options used when invoking Bazel
--[no-]useCquery If true, use cquery instead of query when generating
dependency graphs. Using cquery would yield more
accurate build graph at the cost of slower query
execution. When this is set, one usually also wants
to set `--cqueryCommandOptions` to specify a
targeting platform. Note that this flag only works
with Bazel 6.2.0 or above because lower versions
does not support `--query_file` flag.
-v, --verbose Display query string, missing files and elapsed time
-V, --version Print version information and exit.
-w, --workspacePath=<workspacePath>
Path to Bazel workspace directory.
```

**Note**: `--useCquery` flag may not work with very large repos due to limitation
of Bazel. You may want to fallback to use normal query mode in that case.
See https://github.com/bazelbuild/bazel/issues/17743 for more details.

### What does the SHA256 value of `generate-hashes` represent?

`generate-hashes` is a canonical SHA256 value representing all attributes and inputs into a target. These inputs
Expand Down
20 changes: 2 additions & 18 deletions cli/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -25,34 +25,18 @@ kt_jvm_library(
name = "cli-lib",
srcs = glob(["src/main/kotlin/**/*.kt"]),
deps = [
":build_java_proto",
"@bazel_diff_maven//:com_google_code_gson_gson",
"@bazel_diff_maven//:com_google_guava_guava",
"@bazel_diff_maven//:info_picocli_picocli",
"@bazel_diff_maven//:io_insert_koin_koin_core_jvm",
"@bazel_diff_maven//:org_apache_commons_commons_pool2",
"@bazel_diff_maven//:org_jetbrains_kotlinx_kotlinx_coroutines_core_jvm",
"@bazel_tools//src/main/protobuf:analysis_v2_java_proto",
"@bazel_tools//src/main/protobuf:build_java_proto",
"@com_google_protobuf//:protobuf_java",
],
)

java_proto_library(
name = "build_java_proto",
deps = [":build_proto"],
)

proto_library(
name = "build_proto",
srcs = [":build_proto_gen"],
)

genrule(
name = "build_proto_gen",
srcs = ["@bazel_tools//src/main/protobuf:build.proto"],
outs = ["build.proto"],
cmd = "cp $< $@",
)

kt_jvm_test(
name = "BuildGraphHasherTest",
test_class = "com.bazel_diff.hash.BuildGraphHasherTest",
Expand Down
24 changes: 21 additions & 3 deletions cli/src/main/kotlin/com/bazel_diff/bazel/BazelClient.kt
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,33 @@ import org.koin.core.component.KoinComponent
import org.koin.core.component.inject
import java.util.*

class BazelClient(private val fineGrainedHashExternalRepos: Set<String>) : KoinComponent {
class BazelClient(private val useCquery: Boolean, private val fineGrainedHashExternalRepos: Set<String>) : KoinComponent {
private val logger: Logger by inject()
private val queryService: BazelQueryService by inject()

suspend fun queryAllTargets(): List<BazelTarget> {
val queryEpoch = Calendar.getInstance().getTimeInMillis()

val query = listOf("//external:all-targets", "//...:all-targets") + fineGrainedHashExternalRepos.map { "@$it//...:all-targets" }
val targets = queryService.query(query.joinToString(" + ") { "'$it'" })
val repoTargetsQuery = listOf("//external:all-targets")
val targets = if (useCquery) {
// Explicitly listing external repos here sometimes causes issues mentioned at
// https://bazel.build/query/cquery#recursive-target-patterns. Hence, we query all dependencies with `deps`
// instead. However, we still need to append all "//external:*" targets because fine-grained hash
// computation depends on hashing of source files in external repos as well, which is limited to repos
// explicitly mentioned in `fineGrainedHashExternalRepos` flag. Therefore, for any repos not mentioned there
// we are still relying on the repo-generation target under `//external` to compute the hash.
//
// In addition, we must include all source dependencies in this query in order for them to show up in
// `configuredRuleInput`. Hence, one must not filter them out with `kind(rule, deps(..))`. However, these
// source targets are omitted inside BazelQueryService with the custom starlark function used to print
// labels.
(queryService.query("deps(//...:all-targets)", useCquery = true) +
queryService.query(repoTargetsQuery.joinToString(" + ") { "'$it'" }))
.distinctBy { it.rule.name }
} else {
val buildTargetsQuery = listOf("//...:all-targets") + fineGrainedHashExternalRepos.map { "@$it//...:all-targets" }
Copy link
Contributor

@honnix honnix Jan 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What was the reason removing //external:all-targets from query branch? Without those repo generation targets, how would "coarse grained" hashes get computed?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#263 is a related change where I removed rule input transformation for query.

queryService.query((repoTargetsQuery + buildTargetsQuery).joinToString(" + ") { "'$it'" })
}
val queryDuration = Calendar.getInstance().getTimeInMillis() - queryEpoch
logger.i { "All targets queried in $queryDuration" }
return targets.mapNotNull { target: Build.Target ->
Expand Down
120 changes: 93 additions & 27 deletions cli/src/main/kotlin/com/bazel_diff/bazel/BazelQueryService.kt
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@ package com.bazel_diff.bazel
import com.bazel_diff.log.Logger
import com.bazel_diff.process.Redirect
import com.bazel_diff.process.process
import com.google.devtools.build.lib.analysis.AnalysisProtosV2
import com.google.devtools.build.lib.query2.proto.proto2api.Build
import kotlinx.coroutines.ExperimentalCoroutinesApi
import kotlinx.coroutines.runBlocking
import org.koin.core.component.KoinComponent
import org.koin.core.component.inject
import java.nio.charset.StandardCharsets
import java.io.File
import java.nio.file.Files
import java.nio.file.Path

Expand All @@ -17,16 +18,53 @@ class BazelQueryService(
private val bazelPath: Path,
private val startupOptions: List<String>,
private val commandOptions: List<String>,
private val keepGoing: Boolean?,
private val cqueryOptions: List<String>,
private val keepGoing: Boolean,
private val noBazelrc: Boolean,
) : KoinComponent {
private val logger: Logger by inject()

suspend fun query(query: String, useCquery: Boolean = false): List<Build.Target> {
// Unfortunately, there is still no direct way to tell if a target is compatible or not with the proto output
// by itself. So we do an extra cquery with the trick at
// https://bazel.build/extending/platforms#cquery-incompatible-target-detection to first find all compatible
// targets.
val compatibleTargetSet =
if (useCquery) {
runQuery(query, useCquery = true, outputCompatibleTargets = true).useLines {
it.filter { it.isNotBlank() }.toSet()
}
} else {
emptySet()
}
val outputFile = runQuery(query, useCquery)

val targets = outputFile.inputStream().buffered().use { proto ->
if (useCquery) {
val cqueryResult = AnalysisProtosV2.CqueryResult.parseFrom(proto)
cqueryResult.resultsList.filter { it.target.rule.name in compatibleTargetSet }.map { it.target }
} else {
mutableListOf<Build.Target>().apply {
while (true) {
val target = Build.Target.parseDelimitedFrom(proto) ?: break
// EOF
add(target)
}
}
}
}

return targets
}

@OptIn(ExperimentalCoroutinesApi::class)
suspend fun query(query: String): List<Build.Target> {
val tempFile = Files.createTempFile(null, ".txt")
val outputFile = Files.createTempFile(null, ".bin")
Files.write(tempFile, query.toByteArray(StandardCharsets.UTF_8))
private suspend fun runQuery(query: String, useCquery: Boolean, outputCompatibleTargets: Boolean = false): File {
val queryFile = Files.createTempFile(null, ".txt").toFile()
queryFile.deleteOnExit()
val outputFile = Files.createTempFile(null, ".bin").toFile()
outputFile.deleteOnExit()

queryFile.writeText(query)
logger.i { "Executing Query: $query" }

val cmd: MutableList<String> = ArrayList<String>().apply {
Expand All @@ -35,41 +73,69 @@ class BazelQueryService(
add("--bazelrc=/dev/null")
}
addAll(startupOptions)
add("query")
if (useCquery) {
add("cquery")
add("--transitions=lite")
} else {
add("query")
}
add("--output")
add("streamed_proto")
add("--order_output=no")
if (keepGoing != null && keepGoing) {
if (useCquery) {
if (outputCompatibleTargets) {
add("starlark")
add("--starlark:file")
val cqueryOutputFile = Files.createTempFile(null, ".cquery").toFile()
cqueryOutputFile.deleteOnExit()
cqueryOutputFile.writeText("""
def format(target):
if providers(target) == None:
# skip printing non-target results. That is, source files and generated files won't be
# printed
return ""
if "IncompatiblePlatformProvider" not in providers(target):
label = str(target.label)
if label.startswith("@//"):
# normalize label to be consistent with content inside proto
return label[1:]
else:
return label
return ""
""".trimIndent())
add(cqueryOutputFile.toString())
} else {
// Unfortunately, cquery does not support streamed_proto yet.
// See https://github.com/bazelbuild/bazel/issues/17743. This poses an issue for large monorepos.
add("proto")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bummer here but seems unavoidable for now

Copy link
Contributor Author

@tgeng tgeng Jun 1, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah :(. BTW, I realized there is a bug with the first commit so I have pushed another update with one more test covering the basic source code change case.

It seems that the implementation can be cleaner if source code hashing and rule target hashing are done uniformly with a deps query to include everything (rather than separate queries on source and rule targets). But that change would be pretty big so I didn't attempt it.

}
} else {
add("streamed_proto")
}
if (!useCquery) {
add("--order_output=no")
}
if (keepGoing) {
add("--keep_going")
}
addAll(commandOptions)
if (useCquery) {
addAll(cqueryOptions)
} else {
addAll(commandOptions)
}
add("--query_file")
add(tempFile.toString())
add(queryFile.toString())
}

val result = runBlocking {
process(
*cmd.toTypedArray(),
stdout = Redirect.ToFile(outputFile.toFile()),
stdout = Redirect.ToFile(outputFile),
workingDirectory = workingDirectory.toFile(),
stderr = Redirect.PRINT,
destroyForcibly = true,
)
}

if(result.resultCode != 0) throw RuntimeException("Bazel query failed, exit code ${result.resultCode}")

val targets = mutableListOf<Build.Target>()
outputFile.toFile().inputStream().buffered().use {stream ->
while (true) {
val target = Build.Target.parseDelimitedFrom(stream) ?: break
// EOF
targets.add(target)
}
}

Files.delete(tempFile)
Files.delete(outputFile)
return targets
if (result.resultCode != 0) throw RuntimeException("Bazel query failed, exit code ${result.resultCode}")
return outputFile
}
}
12 changes: 10 additions & 2 deletions cli/src/main/kotlin/com/bazel_diff/bazel/BazelRule.kt
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,16 @@ class BazelRule(private val rule: Build.Rule) {
}
}

fun ruleInputList(fineGrainedHashExternalRepos: Set<String>): List<String> {
return rule.ruleInputList.map { ruleInput: String -> transformRuleInput(fineGrainedHashExternalRepos, ruleInput) }
fun ruleInputList(useCquery: Boolean, fineGrainedHashExternalRepos: Set<String>): List<String> {
return if (useCquery) {
rule.configuredRuleInputList.map { it.label } +
rule.ruleInputList.map { ruleInput: String -> transformRuleInput(fineGrainedHashExternalRepos, ruleInput) }
// Only keep the non-fine-grained ones because the others are already covered by configuredRuleInputList
.filter { it.startsWith("//external:") }
.distinct()
} else {
rule.ruleInputList.map { ruleInput: String -> transformRuleInput(fineGrainedHashExternalRepos, ruleInput) }
}
}

val name: String = rule.name
Expand Down
20 changes: 19 additions & 1 deletion cli/src/main/kotlin/com/bazel_diff/cli/GenerateHashesCommand.kt
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class GenerateHashesCommand : Callable<Int> {

@CommandLine.Option(
names = ["-co", "--bazelCommandOptions"],
description = ["Additional space separated Bazel command options used when invoking Bazel"],
description = ["Additional space separated Bazel command options used when invoking `bazel query`"],
scope = CommandLine.ScopeType.INHERIT,
converter = [OptionsConverter::class],
)
Expand All @@ -73,6 +73,22 @@ class GenerateHashesCommand : Callable<Int> {
)
var fineGrainedHashExternalRepos: Set<String> = emptySet()

@CommandLine.Option(
names = ["--useCquery"],
negatable = true,
description = ["If true, use cquery instead of query when generating dependency graphs. Using cquery would yield more accurate build graph at the cost of slower query execution. When this is set, one usually also wants to set `--cqueryCommandOptions` to specify a targeting platform. Note that this flag only works with Bazel 6.2.0 or above because lower versions does not support `--query_file` flag."],
scope = CommandLine.ScopeType.INHERIT
)
var useCquery = false

@CommandLine.Option(
names = ["--cqueryCommandOptions"],
description = ["Additional space separated Bazel command options used when invoking `bazel cquery`. This flag is has no effect if `--useCquery`is false."],
scope = CommandLine.ScopeType.INHERIT,
converter = [OptionsConverter::class],
)
var cqueryCommandOptions: List<String> = emptyList()

@CommandLine.Option(
names = ["-k", "--keep_going"],
negatable = true,
Expand Down Expand Up @@ -108,6 +124,8 @@ class GenerateHashesCommand : Callable<Int> {
contentHashPath,
bazelStartupOptions,
bazelCommandOptions,
cqueryCommandOptions,
useCquery,
keepGoing,
fineGrainedHashExternalRepos,
),
Expand Down
9 changes: 6 additions & 3 deletions cli/src/main/kotlin/com/bazel_diff/di/Modules.kt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ fun hasherModule(
contentHashPath: File?,
startupOptions: List<String>,
commandOptions: List<String>,
keepGoing: Boolean?,
cqueryOptions: List<String>,
useCquery: Boolean,
keepGoing: Boolean,
fineGrainedHashExternalRepos: Set<String>,
): Module = module {
val debug = System.getProperty("DEBUG", "false").equals("true")
Expand All @@ -38,14 +40,15 @@ fun hasherModule(
bazelPath,
startupOptions,
commandOptions,
cqueryOptions,
keepGoing,
debug
)
}
single { BazelClient(fineGrainedHashExternalRepos) }
single { BazelClient(useCquery, fineGrainedHashExternalRepos) }
single { BuildGraphHasher(get()) }
single { TargetHasher() }
single { RuleHasher(fineGrainedHashExternalRepos) }
single { RuleHasher(useCquery, fineGrainedHashExternalRepos) }
single { SourceFileHasher(fineGrainedHashExternalRepos) }
single(named("working-directory")) { workingDirectory }
single(named("output-base")) {
Expand Down
Loading