diff --git a/Cargo.lock b/Cargo.lock index ee056747..9c56b98a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1110,11 +1110,11 @@ dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", - "aws-smithy-http 0.62.5", + "aws-smithy-http 0.62.6", "aws-smithy-json", "aws-smithy-runtime", "aws-smithy-runtime-api", - "aws-smithy-types 1.3.4", + "aws-smithy-types 1.3.5", "aws-types", "bytes", "fastrand", @@ -1791,6 +1791,10 @@ dependencies = [ "alloc-stdlib", ] +[[package]] +name = "build-info" +version = "0.1.0" + [[package]] name = "bumpalo" version = "3.19.0" @@ -3302,6 +3306,7 @@ dependencies = [ "api-snowflake-rest-sessions", "axum 0.8.7", "base64 0.22.1", + "build-info", "catalog-metastore", "cfg-if", "executor", @@ -3327,6 +3332,7 @@ dependencies = [ "api-snowflake-rest", "api-snowflake-rest-sessions", "axum 0.8.7", + "build-info", "catalog-metastore", "clap", "console-subscriber", diff --git a/Cargo.toml b/Cargo.toml index 29251391..8fc66f07 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ members = [ "crates/error-stack-trace", "crates/embucket-lambda", "crates/state-store", + "crates/build-info", ] resolver = "2" package.license-file = "LICENSE" diff --git a/crates/build-info/Cargo.toml b/crates/build-info/Cargo.toml new file mode 100644 index 00000000..53a53ae3 --- /dev/null +++ b/crates/build-info/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "build-info" +version = "0.1.0" +edition = "2021" +license-file.workspace = true + +[dependencies] +# No runtime dependencies - all info comes from env!() at compile time + +[lints] +workspace = true diff --git a/crates/build-info/build.rs b/crates/build-info/build.rs new file mode 100644 index 00000000..130736bd --- /dev/null +++ b/crates/build-info/build.rs @@ -0,0 +1,143 @@ +use std::process::Command; + +fn main() { + // Capture git commit SHA (full) + let git_sha = run_git_command(&["rev-parse", "HEAD"]).unwrap_or_else(|| "unknown".to_string()); + + // Capture git commit SHA (short, 8 chars) + let git_sha_short = run_git_command(&["rev-parse", "--short=8", "HEAD"]) + .unwrap_or_else(|| "unknown".to_string()); + + // Capture git branch name + let git_branch = run_git_command(&["rev-parse", "--abbrev-ref", "HEAD"]) + .unwrap_or_else(|| "unknown".to_string()); + + // Capture git describe for semantic versioning + // Format: v0.1.0-5-g7b92aa23 (tag-commits_since_tag-short_sha) + // or v0.1.0 if on a tag, or v0.1.0-dirty if dirty + let git_describe = run_git_command(&["describe", "--tags", "--always", "--dirty"]) + .or_else(|| { + // Fallback to CARGO_PKG_VERSION if no tags exist + std::env::var("CARGO_PKG_VERSION").ok() + }) + .unwrap_or_else(|| "unknown".to_string()); + + // Check if repository has uncommitted changes + let git_dirty = is_git_dirty(); + + // Capture build timestamp in ISO 8601 format (YYYY-MM-DD) + let build_timestamp = std::env::var("SOURCE_DATE_EPOCH") + .ok() + .and_then(|epoch| { + use std::time::UNIX_EPOCH; + let secs = epoch.parse::().ok()?; + let time = UNIX_EPOCH + std::time::Duration::from_secs(secs); + Some(format_timestamp(time)) + }) + .unwrap_or_else(|| format_timestamp(std::time::SystemTime::now())); + + // Set environment variables for the build + println!("cargo:rustc-env=GIT_SHA={git_sha}"); + println!("cargo:rustc-env=GIT_SHA_SHORT={git_sha_short}"); + println!("cargo:rustc-env=GIT_BRANCH={git_branch}"); + println!("cargo:rustc-env=GIT_DESCRIBE={git_describe}"); + println!("cargo:rustc-env=GIT_DIRTY={git_dirty}"); + println!("cargo:rustc-env=BUILD_TIMESTAMP={build_timestamp}"); + + // Rerun build script if git HEAD changes + println!("cargo:rerun-if-changed=.git/HEAD"); + // Also rerun if the current branch ref changes + if let Some(branch_ref) = run_git_command(&["symbolic-ref", "HEAD"]) { + let ref_path = format!(".git/{branch_ref}"); + println!("cargo:rerun-if-changed={ref_path}"); + } +} + +/// Runs a git command and returns the output as a trimmed string, or None if the command fails. +fn run_git_command(args: &[&str]) -> Option { + let output = Command::new("git").args(args).output().ok()?; + + if output.status.success() { + String::from_utf8(output.stdout) + .ok() + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + } else { + None + } +} + +/// Checks if the git repository has uncommitted changes (modified, staged, or untracked files). +/// Returns "true" or "false" as a string. +fn is_git_dirty() -> String { + // Check if there are any changes in the index or working tree + // git diff-index --quiet HEAD returns non-zero if there are changes + let has_changes = Command::new("git") + .args(["diff-index", "--quiet", "HEAD", "--"]) + .status() + .map(|status| !status.success()) + .unwrap_or(false); + + if has_changes { + return "true".to_string(); + } + + // Check for untracked files + let has_untracked = run_git_command(&["ls-files", "--others", "--exclude-standard"]) + .is_some_and(|output| !output.is_empty()); + + if has_untracked { + "true".to_string() + } else { + "false".to_string() + } +} + +/// Formats a `SystemTime` as an ISO 8601 date (YYYY-MM-DD). +fn format_timestamp(time: std::time::SystemTime) -> String { + use std::time::UNIX_EPOCH; + + let duration = time + .duration_since(UNIX_EPOCH) + .unwrap_or_else(|_| std::time::Duration::from_secs(0)); + + let total_secs = duration.as_secs(); + // Simple date calculation (not accounting for leap seconds, but good enough) + let days_since_epoch = total_secs / 86400; + + // Start from 1970-01-01 + let mut year = 1970; + let mut remaining_days = days_since_epoch; + + loop { + let days_in_year = if is_leap_year(year) { 366 } else { 365 }; + if remaining_days < days_in_year { + break; + } + remaining_days -= days_in_year; + year += 1; + } + + let days_in_months = if is_leap_year(year) { + [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] + } else { + [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] + }; + + let mut month = 1; + let mut day = remaining_days + 1; + + for days_in_month in &days_in_months { + if day <= *days_in_month { + break; + } + day -= days_in_month; + month += 1; + } + + format!("{year:04}-{month:02}-{day:02}") +} + +const fn is_leap_year(year: u64) -> bool { + (year.is_multiple_of(4) && !year.is_multiple_of(100)) || year.is_multiple_of(400) +} diff --git a/crates/build-info/src/lib.rs b/crates/build-info/src/lib.rs new file mode 100644 index 00000000..8ad276ac --- /dev/null +++ b/crates/build-info/src/lib.rs @@ -0,0 +1,91 @@ +//! Build-time information for Embucket binaries. +//! +//! This crate provides access to version and git metadata captured at build time. +//! All information is embedded at compile time via environment variables set by build.rs. + +/// Build information for Embucket binaries. +pub struct BuildInfo; + +impl BuildInfo { + /// Version from Cargo.toml (e.g., "0.1.0") + pub const VERSION: &'static str = env!("CARGO_PKG_VERSION"); + + /// Full git commit hash (e.g., "7b92aa2347...") + pub const GIT_SHA: &'static str = env!("GIT_SHA"); + + /// Short git commit hash (e.g., "7b92aa23") + pub const GIT_SHA_SHORT: &'static str = env!("GIT_SHA_SHORT"); + + /// Git branch name (e.g., "main") + pub const GIT_BRANCH: &'static str = env!("GIT_BRANCH"); + + /// Git describe output - semantic version from tags + /// Format examples: + /// - "v0.1.0" - on a tag + /// - "v0.1.0-5-g7b92aa23" - 5 commits after tag v0.1.0 + /// - "v0.1.0-dirty" - on a tag with uncommitted changes + /// - "7b92aa23" - no tags exist, just the commit hash + pub const GIT_DESCRIBE: &'static str = env!("GIT_DESCRIBE"); + + /// Whether the repository had uncommitted changes ("true" or "false") + pub const GIT_DIRTY: &'static str = env!("GIT_DIRTY"); + + /// Build timestamp in RFC 3339 format + pub const BUILD_TIMESTAMP: &'static str = env!("BUILD_TIMESTAMP"); + + /// Returns a formatted version string with git metadata. + /// + /// Format: "0.1.0 (7b92aa23) on main built 2025-12-13" + /// If dirty: "0.1.0 (7b92aa23-dirty) on main built 2025-12-13" + #[must_use] + pub fn full_version() -> String { + let dirty_suffix = if Self::GIT_DIRTY == "true" { + "-dirty" + } else { + "" + }; + format!( + "{} ({}{}) on {} built {}", + Self::VERSION, + Self::GIT_SHA_SHORT, + dirty_suffix, + Self::GIT_BRANCH, + Self::BUILD_TIMESTAMP + ) + } + + /// Returns true if the repository had uncommitted changes at build time. + #[must_use] + pub fn is_dirty() -> bool { + Self::GIT_DIRTY == "true" + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_build_info_constants() { + // These should all be non-empty (either real values or "unknown") + assert!(!BuildInfo::VERSION.is_empty()); + assert!(!BuildInfo::GIT_SHA.is_empty()); + assert!(!BuildInfo::GIT_SHA_SHORT.is_empty()); + assert!(!BuildInfo::GIT_BRANCH.is_empty()); + assert!(!BuildInfo::GIT_DIRTY.is_empty()); + assert!(!BuildInfo::BUILD_TIMESTAMP.is_empty()); + } + + #[test] + fn test_full_version() { + let version = BuildInfo::full_version(); + // Should contain at least the version number + assert!(version.contains(BuildInfo::VERSION)); + } + + #[test] + fn test_is_dirty() { + // Should return a boolean without panicking + let _ = BuildInfo::is_dirty(); + } +} diff --git a/crates/embucket-lambda/Cargo.toml b/crates/embucket-lambda/Cargo.toml index 8f54e70c..e03e8192 100644 --- a/crates/embucket-lambda/Cargo.toml +++ b/crates/embucket-lambda/Cargo.toml @@ -9,6 +9,7 @@ api-snowflake-rest = { path = "../api-snowflake-rest" } api-snowflake-rest-sessions = { path = "../api-snowflake-rest-sessions" } catalog-metastore = { path = "../catalog-metastore" } executor = { path = "../executor" } +build-info = { path = "../build-info" } lambda_http = "0.17" tokio = { workspace = true } tracing = { workspace = true } diff --git a/crates/embucket-lambda/src/config.rs b/crates/embucket-lambda/src/config.rs index 37ec5316..800eb54b 100644 --- a/crates/embucket-lambda/src/config.rs +++ b/crates/embucket-lambda/src/config.rs @@ -1,3 +1,4 @@ +use build_info::BuildInfo; use executor::utils::{Config as ExecutionConfig, MemPoolType}; use std::{env, path::PathBuf}; @@ -40,7 +41,7 @@ impl EnvConfig { mem_pool_size_mb: parse_env("MEM_POOL_SIZE_MB"), mem_enable_track_consumers_pool: parse_env("MEM_ENABLE_TRACK_CONSUMERS_POOL"), disk_pool_size_mb: parse_env("DISK_POOL_SIZE_MB"), - embucket_version: env_or_default("EMBUCKET_VERSION", "0.1.0"), + embucket_version: env_or_default("EMBUCKET_VERSION", BuildInfo::VERSION), metastore_config: env::var("METASTORE_CONFIG").ok().map(PathBuf::from), jwt_secret: env::var("JWT_SECRET").ok(), max_concurrent_table_fetches: parse_env("MAX_CONCURRENT_TABLE_FETCHES").unwrap_or(5), diff --git a/crates/embucket-lambda/src/main.rs b/crates/embucket-lambda/src/main.rs index a7c49571..9bcf5210 100644 --- a/crates/embucket-lambda/src/main.rs +++ b/crates/embucket-lambda/src/main.rs @@ -10,6 +10,7 @@ use api_snowflake_rest_sessions::session::SESSION_EXPIRATION_SECONDS; use axum::Router; use axum::body::Body as AxumBody; use axum::extract::connect_info::ConnectInfo; +use build_info::BuildInfo; use catalog_metastore::metastore_settings_config::MetastoreSettingsConfig; use http::HeaderMap; use http_body_util::BodyExt; @@ -33,6 +34,15 @@ type InitResult = Result>; async fn main() -> Result<(), LambdaError> { init_tracing(); + // Log version and build information on startup + info!( + version = %BuildInfo::GIT_DESCRIBE, + git_sha = %BuildInfo::GIT_SHA_SHORT, + git_branch = %BuildInfo::GIT_BRANCH, + build_timestamp = %BuildInfo::BUILD_TIMESTAMP, + "embucket-lambda started" + ); + let env_config = EnvConfig::from_env(); info!( data_format = %env_config.data_format, @@ -69,7 +79,11 @@ struct LambdaApp { impl LambdaApp { #[tracing::instrument(name = "lambda_app_initialize", skip_all, fields( data_format = %config.data_format, - max_concurrency = config.max_concurrency_level + max_concurrency = config.max_concurrency_level, + version = %BuildInfo::GIT_DESCRIBE, + git_sha = %BuildInfo::GIT_SHA_SHORT, + git_branch = %BuildInfo::GIT_BRANCH, + build_timestamp = %BuildInfo::BUILD_TIMESTAMP, ))] async fn initialize(config: EnvConfig) -> InitResult { let snowflake_cfg = SnowflakeServerConfig::new( @@ -113,7 +127,11 @@ impl LambdaApp { http.method = %request.method(), http.uri = %request.uri(), http.request_id = tracing::field::Empty, - http.status_code = tracing::field::Empty + http.status_code = tracing::field::Empty, + version = %BuildInfo::GIT_DESCRIBE, + git_sha = %BuildInfo::GIT_SHA_SHORT, + git_branch = %BuildInfo::GIT_BRANCH, + build_timestamp = %BuildInfo::BUILD_TIMESTAMP, ))] async fn handle_event(&self, request: Request) -> Result, LambdaError> { let (parts, body) = request.into_parts(); diff --git a/crates/embucketd/Cargo.toml b/crates/embucketd/Cargo.toml index e8d35fc1..a4510782 100644 --- a/crates/embucketd/Cargo.toml +++ b/crates/embucketd/Cargo.toml @@ -10,6 +10,7 @@ executor = { path = "../executor" } catalog-metastore = { path = "../catalog-metastore" } api-snowflake-rest = { path = "../api-snowflake-rest" } api-snowflake-rest-sessions = { path = "../api-snowflake-rest-sessions" } +build-info = { path = "../build-info" } axum = { workspace = true } clap = { workspace = true } console-subscriber = { version = "0.4.1" } diff --git a/crates/embucketd/src/main.rs b/crates/embucketd/src/main.rs index d758572b..e14bbeda 100644 --- a/crates/embucketd/src/main.rs +++ b/crates/embucketd/src/main.rs @@ -16,6 +16,7 @@ use axum::{ Json, Router, routing::{get, post}, }; +use build_info::BuildInfo; use catalog_metastore::metastore_settings_config::MetastoreSettingsConfig; use clap::Parser; use dotenv::dotenv; @@ -100,6 +101,15 @@ async fn async_main( opts: cli::CliOpts, tracing_provider: SdkTracerProvider, ) -> Result<(), Box> { + // Log version and build information on startup + tracing::info!( + version = %BuildInfo::GIT_DESCRIBE, + git_sha = %BuildInfo::GIT_SHA_SHORT, + git_branch = %BuildInfo::GIT_BRANCH, + build_timestamp = %BuildInfo::BUILD_TIMESTAMP, + "embucketd started" + ); + let data_format = opts .data_format .clone() @@ -112,7 +122,7 @@ async fn async_main( ); let execution_cfg = ExecutionConfig { - embucket_version: "0.1.0".to_string(), + embucket_version: BuildInfo::VERSION.to_string(), sql_parser_dialect: opts.sql_parser_dialect.clone(), query_timeout_secs: opts.query_timeout_secs, max_concurrency_level: opts.max_concurrency_level,