diff --git a/.agents/skills/debug-openshell-cluster/SKILL.md b/.agents/skills/debug-openshell-cluster/SKILL.md index b65bf26d2..4c605d023 100644 --- a/.agents/skills/debug-openshell-cluster/SKILL.md +++ b/.agents/skills/debug-openshell-cluster/SKILL.md @@ -172,6 +172,24 @@ If the gateway exits with `failed to read sandbox JWT signing key from `sandbox-jwt` secret at `/etc/openshell-jwt`. The sandbox JWT mount is required even when local Helm values disable TLS. +If `server.providerTokenGrants.spiffe.enabled=true`, the gateway should still +render `[openshell.gateway.gateway_jwt]` and mount the `sandbox-jwt` Secret. +SPIRE is used only by sandbox pods for dynamic provider token grants. Verify +that SPIRE is installed, the CSI driver is available, and the Kubernetes driver +config includes `provider_spiffe_workload_api_socket_path`: + +```bash +helm -n openshell get values openshell | grep -E 'providerTokenGrants|workloadApiSocketPath' +kubectl get pods -A | grep -E 'spire|spiffe' +kubectl -n openshell get configmap openshell-config -o yaml | grep provider_spiffe_workload_api_socket_path +``` + +Sandbox pods using provider token grants should have an +`openshell.io/sandbox-id` annotation, an `openshell.ai/managed-by=openshell` +label, supervisor env vars `OPENSHELL_K8S_SA_TOKEN_FILE` and +`OPENSHELL_PROVIDER_SPIFFE_WORKLOAD_API_SOCKET`, plus both the projected +`openshell-sa-token` volume and the `spiffe-workload-api` CSI volume. + Check the image references currently used by the gateway deployment: ```bash diff --git a/.agents/skills/helm-dev-environment/SKILL.md b/.agents/skills/helm-dev-environment/SKILL.md index 58efbfef8..12ea5eb2a 100644 --- a/.agents/skills/helm-dev-environment/SKILL.md +++ b/.agents/skills/helm-dev-environment/SKILL.md @@ -177,6 +177,23 @@ To remove Keycloak: mise run keycloak:k8s:teardown ``` +### SPIRE / SPIFFE Provider Token Grants + +Skaffold can install SPIRE with the SPIFFE hardened Helm charts. To activate +SPIFFE JWT-SVIDs for dynamic provider token grants: + +1. Uncomment the `spire-crds` and `spire` releases in `deploy/helm/openshell/skaffold.yaml` +2. Uncomment `#- ci/values-spire.yaml` in the OpenShell release values files +3. Redeploy: `mise run helm:skaffold:run` + +`ci/values-spire-stack.yaml` configures the local SPIRE trust domain as +`openshell.local` and adds a `ClusterSPIFFEID` that maps sandbox pod +annotations to `spiffe://openshell.local/openshell/sandbox/`. +OpenShell mounts the SPIFFE CSI Workload API socket at +`/spiffe-workload-api/spire-agent.sock` into sandbox pods for provider token +grants. Supervisor-to-gateway authentication remains on the Kubernetes +ServiceAccount bootstrap and gateway-minted sandbox JWT path. + --- ## Cluster Lifecycle (suspend/resume) @@ -205,6 +222,8 @@ mise run helm:k3s:status | `deploy/helm/openshell/ci/values-gateway.yaml` | Envoy Gateway GRPCRoute + Gateway overlay | | `deploy/helm/openshell/ci/values-high-availability.yaml` | HA test overlay (`replicaCount: 2` with bundled PostgreSQL) | | `deploy/helm/openshell/ci/values-keycloak.yaml` | Keycloak OIDC overlay | +| `deploy/helm/openshell/ci/values-spire.yaml` | SPIFFE/SPIRE provider token grant overlay | +| `deploy/helm/openshell/ci/values-spire-stack.yaml` | SPIRE hardened chart values for local dev | | `deploy/helm/openshell/ci/values-tls-disabled.yaml` | Lint-only: TLS + auth disabled (reverse-proxy edge termination) | | `deploy/kube/manifests/envoy-gateway-openshell.yaml` | GatewayClass for Envoy Gateway (`mise run helm:gateway:apply`) | | `tasks/scripts/helm-k3s-local.sh` | k3d cluster create/delete/start/stop/status | diff --git a/Cargo.lock b/Cargo.lock index 4bc657be3..03cecae82 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -156,6 +156,15 @@ dependencies = [ "thiserror 2.0.18", ] +[[package]] +name = "arc-swap" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a3a1fd6f75306b68087b831f025c712524bcb19aad54e557b1129cfa0a2b207" +dependencies = [ + "rustversion", +] + [[package]] name = "argon2" version = "0.5.3" @@ -217,28 +226,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "async-stream" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" -dependencies = [ - "async-stream-impl", - "futures-core", - "pin-project-lite", -] - -[[package]] -name = "async-stream-impl" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - [[package]] name = "async-trait" version = "0.1.89" @@ -303,40 +290,13 @@ dependencies = [ "fs_extra", ] -[[package]] -name = "axum" -version = "0.7.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" -dependencies = [ - "async-trait", - "axum-core 0.4.5", - "bytes", - "futures-util", - "http", - "http-body", - "http-body-util", - "itoa", - "matchit 0.7.3", - "memchr", - "mime", - "percent-encoding", - "pin-project-lite", - "rustversion", - "serde", - "sync_wrapper", - "tower 0.5.3", - "tower-layer", - "tower-service", -] - [[package]] name = "axum" version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "31b698c5f9a010f6573133b09e0de5408834d0c82f8d7475a89fc1867a71cd90" dependencies = [ - "axum-core 0.5.6", + "axum-core", "base64 0.22.1", "bytes", "form_urlencoded", @@ -347,7 +307,7 @@ dependencies = [ "hyper", "hyper-util", "itoa", - "matchit 0.8.4", + "matchit", "memchr", "mime", "percent-encoding", @@ -366,26 +326,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "axum-core" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" -dependencies = [ - "async-trait", - "bytes", - "futures-util", - "http", - "http-body", - "http-body-util", - "mime", - "pin-project-lite", - "rustversion", - "sync_wrapper", - "tower-layer", - "tower-service", -] - [[package]] name = "axum-core" version = "0.5.6" @@ -1855,19 +1795,13 @@ dependencies = [ "futures-core", "futures-sink", "http", - "indexmap 2.14.0", + "indexmap", "slab", "tokio", "tokio-util", "tracing", ] -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - [[package]] name = "hashbrown" version = "0.14.5" @@ -2149,7 +2083,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.3", + "socket2", "tokio", "tower-service", "tracing", @@ -2328,16 +2262,6 @@ dependencies = [ "quote", ] -[[package]] -name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown 0.12.3", -] - [[package]] name = "indexmap" version = "2.14.0" @@ -2957,12 +2881,6 @@ dependencies = [ "regex-automata", ] -[[package]] -name = "matchit" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" - [[package]] name = "matchit" version = "0.8.4" @@ -3011,7 +2929,7 @@ dependencies = [ "http-body-util", "hyper", "hyper-util", - "indexmap 2.14.0", + "indexmap", "ipnet", "metrics", "metrics-util", @@ -3469,7 +3387,8 @@ dependencies = [ "tempfile", "thiserror 2.0.18", "tonic", - "tonic-build", + "tonic-prost", + "tonic-prost-build", "url", ] @@ -3664,6 +3583,7 @@ dependencies = [ "rand_core 0.6.4", "rcgen", "regorus", + "reqwest 0.12.28", "russh", "rustix 1.1.4", "rustls", @@ -3674,6 +3594,7 @@ dependencies = [ "serde_yml", "sha1 0.10.6", "sha2 0.10.9", + "spiffe", "temp-env", "tempfile", "thiserror 2.0.18", @@ -3695,7 +3616,7 @@ version = "0.0.0" dependencies = [ "anyhow", "async-trait", - "axum 0.8.9", + "axum", "bytes", "clap", "futures", @@ -4033,12 +3954,13 @@ dependencies = [ [[package]] name = "petgraph" -version = "0.7.1" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ "fixedbitset", - "indexmap 2.14.0", + "hashbrown 0.15.5", + "indexmap", ] [[package]] @@ -4276,9 +4198,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.5" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" dependencies = [ "bytes", "prost-derive", @@ -4286,19 +4208,20 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.13.5" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" +checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ "heck", "itertools 0.14.0", "log", "multimap", - "once_cell", "petgraph", "prettyplease", "prost", "prost-types", + "pulldown-cmark", + "pulldown-cmark-to-cmark", "regex", "syn 2.0.117", "tempfile", @@ -4306,9 +4229,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.13.5" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", "itertools 0.14.0", @@ -4319,9 +4242,9 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.13.5" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" +checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" dependencies = [ "prost", ] @@ -4335,6 +4258,26 @@ dependencies = [ "autotools", ] +[[package]] +name = "pulldown-cmark" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c3a14896dfa883796f1cb410461aef38810ea05f2b2c33c5aded3649095fdad" +dependencies = [ + "bitflags", + "memchr", + "unicase", +] + +[[package]] +name = "pulldown-cmark-to-cmark" +version = "22.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50793def1b900256624a709439404384204a5dc3a6ec580281bfaac35e882e90" +dependencies = [ + "pulldown-cmark", +] + [[package]] name = "quanta" version = "0.12.6" @@ -4363,7 +4306,7 @@ dependencies = [ "quinn-udp", "rustc-hash 2.1.2", "rustls", - "socket2 0.6.3", + "socket2", "thiserror 2.0.18", "tokio", "tracing", @@ -4401,7 +4344,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.3", + "socket2", "tracing", "windows-sys 0.60.2", ] @@ -5257,7 +5200,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.14.0", + "indexmap", "itoa", "ryu", "serde", @@ -5270,7 +5213,7 @@ version = "0.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59e2dd588bf1597a252c3b920e0143eb99b0f76e4e082f4c92ce34fbc9e71ddd" dependencies = [ - "indexmap 2.14.0", + "indexmap", "itoa", "libyml", "memchr", @@ -5453,22 +5396,40 @@ dependencies = [ [[package]] name = "socket2" -version = "0.5.10" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] -name = "socket2" -version = "0.6.3" +name = "spiffe" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +checksum = "6d3f9e45e9e53f03cb452fe0f050101a9280ff4f4214e326037bc8275284d906" dependencies = [ - "libc", - "windows-sys 0.61.2", + "arc-swap", + "base64ct", + "fastrand", + "futures", + "hyper-util", + "log", + "prost", + "prost-types", + "serde", + "serde_json", + "thiserror 2.0.18", + "time", + "tokio", + "tokio-util", + "tonic", + "tonic-prost", + "tower 0.5.3", + "tracing", + "url", + "zeroize", ] [[package]] @@ -5531,7 +5492,7 @@ dependencies = [ "futures-util", "hashbrown 0.15.5", "hashlink", - "indexmap 2.14.0", + "indexmap", "log", "memchr", "once_cell", @@ -6097,7 +6058,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2 0.6.3", + "socket2", "tokio-macros", "windows-sys 0.61.2", ] @@ -6203,7 +6164,7 @@ version = "0.22.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" dependencies = [ - "indexmap 2.14.0", + "indexmap", "serde", "serde_spanned", "toml_datetime", @@ -6219,13 +6180,12 @@ checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" [[package]] name = "tonic" -version = "0.12.3" +version = "0.14.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" +checksum = "ac2a5518c70fa84342385732db33fb3f44bc4cc748936eb5833d2df34d6445ef" dependencies = [ - "async-stream", "async-trait", - "axum 0.7.9", + "axum", "base64 0.22.1", "bytes", "h2", @@ -6237,14 +6197,13 @@ dependencies = [ "hyper-util", "percent-encoding", "pin-project", - "prost", "rustls-native-certs", - "rustls-pemfile", - "socket2 0.5.10", + "socket2", + "sync_wrapper", "tokio", "tokio-rustls", "tokio-stream", - "tower 0.4.13", + "tower 0.5.3", "tower-layer", "tower-service", "tracing", @@ -6252,9 +6211,32 @@ dependencies = [ [[package]] name = "tonic-build" -version = "0.12.3" +version = "0.14.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c68f61875ac5293cf72e6c8cf0158086428c82c37229e98c840878f1706b0322" +dependencies = [ + "prettyplease", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "tonic-prost" +version = "0.14.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50849f68853be452acf590cde0b146665b8d507b3b8af17261df47e02c209ea0" +dependencies = [ + "bytes", + "prost", + "tonic", +] + +[[package]] +name = "tonic-prost-build" +version = "0.14.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9557ce109ea773b399c9b9e5dca39294110b74f1f342cb347a80d1fce8c26a11" +checksum = "654e5643eff75d7f8c99197ce1440ed19a3474eada74c12bbac488b2cafdae27" dependencies = [ "prettyplease", "proc-macro2", @@ -6262,6 +6244,8 @@ dependencies = [ "prost-types", "quote", "syn 2.0.117", + "tempfile", + "tonic-build", ] [[package]] @@ -6272,11 +6256,8 @@ checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" dependencies = [ "futures-core", "futures-util", - "indexmap 1.9.3", "pin-project", "pin-project-lite", - "rand 0.8.6", - "slab", "tokio", "tokio-util", "tower-layer", @@ -6292,9 +6273,12 @@ checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" dependencies = [ "futures-core", "futures-util", + "indexmap", "pin-project-lite", + "slab", "sync_wrapper", "tokio", + "tokio-util", "tower-layer", "tower-service", "tracing", @@ -6781,7 +6765,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" dependencies = [ "anyhow", - "indexmap 2.14.0", + "indexmap", "wasm-encoder", "wasmparser", ] @@ -6807,7 +6791,7 @@ checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" dependencies = [ "bitflags", "hashbrown 0.15.5", - "indexmap 2.14.0", + "indexmap", "semver", ] @@ -7372,7 +7356,7 @@ checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" dependencies = [ "anyhow", "heck", - "indexmap 2.14.0", + "indexmap", "prettyplease", "syn 2.0.117", "wasm-metadata", @@ -7403,7 +7387,7 @@ checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" dependencies = [ "anyhow", "bitflags", - "indexmap 2.14.0", + "indexmap", "log", "serde", "serde_derive", @@ -7422,7 +7406,7 @@ checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" dependencies = [ "anyhow", "id-arena", - "indexmap 2.14.0", + "indexmap", "log", "semver", "serde", @@ -7636,7 +7620,7 @@ dependencies = [ "flate2", "getrandom 0.4.2", "hmac", - "indexmap 2.14.0", + "indexmap", "lzma-rust2", "memchr", "pbkdf2", diff --git a/Cargo.toml b/Cargo.toml index 079e1e172..86025646a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,10 +17,11 @@ repository = "https://github.com/NVIDIA/OpenShell" tokio = { version = "1.43", features = ["full"] } # gRPC/Protobuf -tonic = "0.12" -tonic-build = "0.12" -prost = "0.13" -prost-types = "0.13" +tonic = "0.14" +tonic-prost = "0.14" +tonic-prost-build = "0.14" +prost = "0.14" +prost-types = "0.14" # HTTP server axum = { version = "0.8", features = ["ws"] } @@ -87,6 +88,7 @@ sha2 = "0.10" rand = "0.9" jsonwebtoken = "9" getrandom = "0.3" +spiffe = { version = "0.15", default-features = false, features = ["workload-api-jwt", "tracing"] } # Filesystem embedding include_dir = "0.7" diff --git a/architecture/gateway.md b/architecture/gateway.md index 35e2d6659..c8bb695ea 100644 --- a/architecture/gateway.md +++ b/architecture/gateway.md @@ -47,9 +47,13 @@ Supported auth modes: | Cloudflare JWT | Edge-authenticated deployments where Cloudflare Access supplies identity. | | OIDC | Bearer-token auth for users, with browser PKCE or client credentials login. | -Sandbox supervisor RPCs authenticate with gateway-minted sandbox JWTs when that -authenticator is configured; mTLS does not grant sandbox identity. User-facing -mutations are authorized by role policy when OIDC or edge identity is enabled. +Sandbox supervisor RPCs authenticate with explicit sandbox credentials; mTLS +does not grant sandbox identity. Kubernetes deployments use the +gateway-minted JWT bootstrap path: the supervisor starts with a projected +ServiceAccount token, exchanges it for a gateway-minted sandbox JWT, and uses +that JWT on subsequent gateway RPCs. +User-facing mutations are authorized by role policy when OIDC or edge identity +is enabled. Sandbox secrets are gateway-signed JWTs bound to a single sandbox ID. Docker, Podman, and VM drivers deliver the initial token through supervisor-only diff --git a/architecture/sandbox.md b/architecture/sandbox.md index 4bc6803eb..370838dde 100644 --- a/architecture/sandbox.md +++ b/architecture/sandbox.md @@ -70,10 +70,20 @@ agent process and SSH child processes. Driver-controlled environment variables override template values so sandbox images cannot spoof identity, callback, or relay settings. +Supervisor bootstrap identity is not inherited by agent child processes. When +provider token grants mount a SPIFFE Workload API socket, children also enter a +private mount namespace where the socket directory is hidden before privilege +drop. + Credential placeholders in proxied HTTP requests can be resolved by the proxy when policy allows the target endpoint. Secrets must not be logged in OCSF or plain tracing output. +Provider profiles can also declare dynamic token grants. For matching HTTP +endpoints, the supervisor obtains a SPIFFE JWT-SVID from the local Workload API, +exchanges it for an OAuth2 access token, caches the token, and injects it as an +`Authorization: Bearer` header before forwarding the request. + ## Connect and Logs The supervisor runs an SSH server on a Unix socket inside the sandbox. The diff --git a/crates/openshell-cli/Cargo.toml b/crates/openshell-cli/Cargo.toml index b69a9629b..0ecd4339c 100644 --- a/crates/openshell-cli/Cargo.toml +++ b/crates/openshell-cli/Cargo.toml @@ -30,7 +30,7 @@ prost-types = { workspace = true } tokio = { workspace = true } # gRPC client -tonic = { workspace = true, features = ["tls", "tls-native-roots"] } +tonic = { workspace = true, features = ["tls-native-roots"] } # CLI chrono = "0.4" diff --git a/crates/openshell-core/Cargo.toml b/crates/openshell-core/Cargo.toml index 78c87d54c..4432bd56d 100644 --- a/crates/openshell-core/Cargo.toml +++ b/crates/openshell-core/Cargo.toml @@ -14,6 +14,7 @@ repository.workspace = true prost = { workspace = true } prost-types = { workspace = true } tonic = { workspace = true } +tonic-prost = { workspace = true } thiserror = { workspace = true } miette = { workspace = true } serde = { workspace = true } @@ -30,7 +31,7 @@ reqwest = { workspace = true, features = ["blocking", "rustls-tls-webpki-roots"] dev-settings = [] [build-dependencies] -tonic-build = { workspace = true } +tonic-prost-build = { workspace = true } protobuf-src = { workspace = true } [dev-dependencies] diff --git a/crates/openshell-core/build.rs b/crates/openshell-core/build.rs index 12e79a1dc..7955772a6 100644 --- a/crates/openshell-core/build.rs +++ b/crates/openshell-core/build.rs @@ -43,14 +43,14 @@ fn main() -> Result<(), Box> { let out_dir = PathBuf::from(env::var("OUT_DIR")?); let descriptor_path = out_dir.join("openshell_descriptor.bin"); - // Configure tonic-build - tonic_build::configure() + // Configure tonic/prost protobuf code generation. + tonic_prost_build::configure() .build_server(true) .build_client(true) // Emit a binary FileDescriptorSet so the server can enumerate every // RPC at runtime (used by the per-handler auth exhaustiveness test). .file_descriptor_set_path(&descriptor_path) - .compile_protos(&proto_files, &[proto_root.as_path()])?; + .compile_protos(&proto_files, &[proto_root])?; println!( "cargo:rustc-env=OPENSHELL_DESCRIPTOR_PATH={}", diff --git a/crates/openshell-core/src/sandbox_env.rs b/crates/openshell-core/src/sandbox_env.rs index 9ffbf79f0..f0e27de59 100644 --- a/crates/openshell-core/src/sandbox_env.rs +++ b/crates/openshell-core/src/sandbox_env.rs @@ -56,3 +56,11 @@ pub const SANDBOX_TOKEN_FILE: &str = "OPENSHELL_SANDBOX_TOKEN_FILE"; /// writes and rotates this file; the supervisor exchanges its contents /// for a gateway JWT at startup and on refresh. pub const K8S_SA_TOKEN_FILE: &str = "OPENSHELL_K8S_SA_TOKEN_FILE"; + +/// Filesystem path to the SPIFFE Workload API UNIX socket used for provider +/// token grants. +/// +/// When set, the supervisor can fetch JWT-SVIDs for upstream provider token +/// exchanges without using SPIFFE for gateway authentication. +pub const PROVIDER_SPIFFE_WORKLOAD_API_SOCKET: &str = + "OPENSHELL_PROVIDER_SPIFFE_WORKLOAD_API_SOCKET"; diff --git a/crates/openshell-driver-kubernetes/src/config.rs b/crates/openshell-driver-kubernetes/src/config.rs index 48004fa4b..fe4db8b46 100644 --- a/crates/openshell-driver-kubernetes/src/config.rs +++ b/crates/openshell-driver-kubernetes/src/config.rs @@ -190,6 +190,10 @@ pub struct KubernetesComputeConfig { /// this token within a few seconds of pod start, so any value at /// the floor is sufficient. Default 3600. pub sa_token_ttl_secs: i64, + /// SPIFFE Workload API socket path mounted into sandbox pods for dynamic + /// provider token grants. Empty disables provider token-grant SPIFFE + /// material. + pub provider_spiffe_workload_api_socket_path: String, } /// Lower bound enforced by kubelet for projected SA tokens. @@ -224,6 +228,7 @@ impl Default for KubernetesComputeConfig { workspace_default_storage_size: DEFAULT_WORKSPACE_STORAGE_SIZE.to_string(), default_runtime_class_name: String::new(), sa_token_ttl_secs: 3600, + provider_spiffe_workload_api_socket_path: String::new(), } } } @@ -241,6 +246,14 @@ impl KubernetesComputeConfig { .clamp(MIN_SA_TOKEN_TTL_SECS, MAX_SA_TOKEN_TTL_SECS) } } + + #[must_use] + pub fn provider_spiffe_enabled(&self) -> bool { + !self + .provider_spiffe_workload_api_socket_path + .trim() + .is_empty() + } } #[cfg(test)] diff --git a/crates/openshell-driver-kubernetes/src/driver.rs b/crates/openshell-driver-kubernetes/src/driver.rs index 449cee58d..9fc4fe878 100644 --- a/crates/openshell-driver-kubernetes/src/driver.rs +++ b/crates/openshell-driver-kubernetes/src/driver.rs @@ -79,6 +79,7 @@ pub const SANDBOX_KIND: &str = "Sandbox"; const GPU_RESOURCE_NAME: &str = "nvidia.com/gpu"; const GPU_RESOURCE_QUANTITY: &str = "1"; +const SPIFFE_WORKLOAD_API_VOLUME_NAME: &str = "spiffe-workload-api"; // This POC treats the selected Struct as a driver-local typed schema. Once the // Kubernetes shape stabilizes, these serde structs may move to driver-local @@ -374,6 +375,10 @@ impl KubernetesComputeDriver { workspace_default_storage_size: &self.config.workspace_default_storage_size, default_runtime_class_name: &self.config.default_runtime_class_name, sa_token_ttl_secs: self.config.effective_sa_token_ttl_secs(), + provider_spiffe_enabled: self.config.provider_spiffe_enabled(), + provider_spiffe_workload_api_socket_path: &self + .config + .provider_spiffe_workload_api_socket_path, }; obj.data = sandbox_to_k8s_spec(sandbox.spec.as_ref(), ¶ms); let api = self.api(); @@ -1090,6 +1095,8 @@ struct SandboxPodParams<'a> { /// Lifetime (seconds) of the projected `ServiceAccount` token used /// for the bootstrap `IssueSandboxToken` exchange. sa_token_ttl_secs: i64, + provider_spiffe_enabled: bool, + provider_spiffe_workload_api_socket_path: &'a str, } impl Default for SandboxPodParams<'_> { @@ -1113,6 +1120,8 @@ impl Default for SandboxPodParams<'_> { workspace_default_storage_size: DEFAULT_WORKSPACE_STORAGE_SIZE, default_runtime_class_name: "", sa_token_ttl_secs: 3600, + provider_spiffe_enabled: false, + provider_spiffe_workload_api_socket_path: "", } } } @@ -1220,8 +1229,25 @@ fn sandbox_template_to_k8s( let driver_config = kubernetes_driver_config(template); let mut metadata = serde_json::Map::new(); - if !template.labels.is_empty() { - metadata.insert("labels".to_string(), serde_json::json!(template.labels)); + let mut pod_labels = template + .labels + .iter() + .map(|(key, value)| (key.clone(), serde_json::Value::String(value.clone()))) + .collect::>(); + if params.provider_spiffe_enabled { + pod_labels.insert( + LABEL_MANAGED_BY.to_string(), + serde_json::Value::String(LABEL_MANAGED_BY_VALUE.to_string()), + ); + if !params.sandbox_id.is_empty() { + pod_labels.insert( + LABEL_SANDBOX_ID.to_string(), + serde_json::Value::String(params.sandbox_id.to_string()), + ); + } + } + if !pod_labels.is_empty() { + metadata.insert("labels".to_string(), serde_json::Value::Object(pod_labels)); } // Carry the sandbox UUID as a pod annotation so the gateway can resolve // a projected SA token claim (pod name + uid) back to a sandbox identity @@ -1336,6 +1362,7 @@ fn sandbox_template_to_k8s( params.grpc_endpoint, params.ssh_socket_path, !params.client_tls_secret_name.is_empty(), + provider_spiffe_socket_path(params), ); container.insert("env".to_string(), serde_json::Value::Array(env)); @@ -1358,9 +1385,9 @@ fn sandbox_template_to_k8s( } container.insert("securityContext".to_string(), security_context); - // Mount client TLS secret for mTLS to the server, plus the projected - // ServiceAccount token used to bootstrap the sandbox's gateway JWT - // via `IssueSandboxToken`. + // Mount client TLS secret for mTLS to the server. Gateway identity uses + // the projected ServiceAccount bootstrap token. Provider token grants may + // additionally mount the SPIFFE Workload API socket. let mut volume_mounts: Vec = Vec::new(); if !params.client_tls_secret_name.is_empty() { volume_mounts.push(serde_json::json!({ @@ -1369,6 +1396,13 @@ fn sandbox_template_to_k8s( "readOnly": true })); } + if params.provider_spiffe_enabled { + volume_mounts.push(serde_json::json!({ + "name": SPIFFE_WORKLOAD_API_VOLUME_NAME, + "mountPath": spiffe_socket_mount_path(params.provider_spiffe_workload_api_socket_path), + "readOnly": true, + })); + } volume_mounts.push(serde_json::json!({ "name": "openshell-sa-token", "mountPath": "/var/run/secrets/openshell", @@ -1397,6 +1431,15 @@ fn sandbox_template_to_k8s( "secret": { "secretName": params.client_tls_secret_name, "defaultMode": 256 } })); } + if params.provider_spiffe_enabled { + volumes.push(serde_json::json!({ + "name": SPIFFE_WORKLOAD_API_VOLUME_NAME, + "csi": { + "driver": "csi.spiffe.io", + "readOnly": true + } + })); + } // Projected ServiceAccountToken volume — kubelet writes a short-lived // audience-bound JWT into /var/run/secrets/openshell/token and rotates // it automatically. The supervisor exchanges this for a gateway-minted @@ -1621,6 +1664,7 @@ fn build_env_list( grpc_endpoint: &str, ssh_socket_path: &str, tls_enabled: bool, + provider_spiffe_socket_path: Option<&str>, ) -> Vec { let mut env = existing_env.cloned().unwrap_or_default(); apply_env_map(&mut env, template_environment); @@ -1632,6 +1676,7 @@ fn build_env_list( grpc_endpoint, ssh_socket_path, tls_enabled, + provider_spiffe_socket_path, ); env } @@ -1654,6 +1699,7 @@ fn apply_required_env( grpc_endpoint: &str, ssh_socket_path: &str, tls_enabled: bool, + provider_spiffe_socket_path: Option<&str>, ) { upsert_env(env, openshell_core::sandbox_env::SANDBOX_ID, sandbox_id); upsert_env(env, openshell_core::sandbox_env::SANDBOX, sandbox_name); @@ -1702,6 +1748,28 @@ fn apply_required_env( openshell_core::sandbox_env::K8S_SA_TOKEN_FILE, "/var/run/secrets/openshell/token", ); + if let Some(socket_path) = provider_spiffe_socket_path { + upsert_env( + env, + openshell_core::sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET, + socket_path, + ); + } +} + +fn provider_spiffe_socket_path<'a>(params: &'a SandboxPodParams<'a>) -> Option<&'a str> { + params + .provider_spiffe_enabled + .then_some(params.provider_spiffe_workload_api_socket_path) +} + +fn spiffe_socket_mount_path(socket_path: &str) -> String { + std::path::Path::new(socket_path) + .parent() + .and_then(std::path::Path::to_str) + .filter(|path| !path.is_empty()) + .unwrap_or("/spiffe-workload-api") + .to_string() } fn upsert_env(env: &mut Vec, name: &str, value: &str) { @@ -2185,6 +2253,7 @@ mod tests { "https://endpoint:8080", "0.0.0.0:2222", true, // tls_enabled + None, ); // Extract the TLS-related env vars @@ -3070,6 +3139,51 @@ mod tests { ); } + #[test] + fn provider_spiffe_mounts_csi_socket_and_keeps_sa_token_bootstrap() { + let params = SandboxPodParams { + sandbox_id: "sandbox-123", + sandbox_name: "sandbox", + provider_spiffe_enabled: true, + provider_spiffe_workload_api_socket_path: "/spiffe-workload-api/spire-agent.sock", + ..SandboxPodParams::default() + }; + let pod_template = sandbox_template_to_k8s( + &SandboxTemplate::default(), + false, + &std::collections::HashMap::new(), + true, + ¶ms, + ); + + let env = pod_template["spec"]["containers"][0]["env"] + .as_array() + .expect("env"); + assert!(env.iter().any(|e| { + e["name"] == openshell_core::sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET + && e["value"] == "/spiffe-workload-api/spire-agent.sock" + })); + assert!(env.iter().any(|e| { + e["name"] == openshell_core::sandbox_env::K8S_SA_TOKEN_FILE + && e["value"] == "/var/run/secrets/openshell/token" + })); + + let volumes = pod_template["spec"]["volumes"].as_array().expect("volumes"); + assert!(volumes.iter().any(|volume| { + volume["name"] == SPIFFE_WORKLOAD_API_VOLUME_NAME + && volume["csi"]["driver"] == "csi.spiffe.io" + })); + assert!(volumes.iter().any(|volume| { + volume["name"] == "openshell-sa-token" + && volume["projected"]["sources"][0]["serviceAccountToken"]["path"] == "token" + })); + + assert_eq!( + pod_template["metadata"]["labels"][LABEL_MANAGED_BY], + serde_json::json!(LABEL_MANAGED_BY_VALUE) + ); + } + #[test] fn platform_config_bool_extracts_value() { let template = SandboxTemplate { diff --git a/crates/openshell-driver-kubernetes/src/main.rs b/crates/openshell-driver-kubernetes/src/main.rs index a2b0e2790..f7eeeba42 100644 --- a/crates/openshell-driver-kubernetes/src/main.rs +++ b/crates/openshell-driver-kubernetes/src/main.rs @@ -92,6 +92,9 @@ struct Args { /// gateway clamps values outside `[600, 86400]`. Default 3600. #[arg(long, env = "OPENSHELL_K8S_SA_TOKEN_TTL_SECS", default_value_t = 3600)] sa_token_ttl_secs: i64, + + #[arg(long, env = "OPENSHELL_PROVIDER_SPIFFE_WORKLOAD_API_SOCKET")] + provider_spiffe_workload_api_socket_path: Option, } #[tokio::main] @@ -129,6 +132,9 @@ async fn main() -> Result<()> { default_runtime_class_name: std::env::var("OPENSHELL_K8S_DEFAULT_RUNTIME_CLASS_NAME") .unwrap_or_default(), sa_token_ttl_secs: args.sa_token_ttl_secs, + provider_spiffe_workload_api_socket_path: args + .provider_spiffe_workload_api_socket_path + .unwrap_or_default(), }) .await .into_diagnostic()?; diff --git a/crates/openshell-providers/src/discovery.rs b/crates/openshell-providers/src/discovery.rs index 79d6fb091..a0edd2bf1 100644 --- a/crates/openshell-providers/src/discovery.rs +++ b/crates/openshell-providers/src/discovery.rs @@ -96,6 +96,7 @@ mod tests { header_name: String::new(), query_param: String::new(), refresh: None, + token_grant: None, }, CredentialProfile { name: "secondary".to_string(), @@ -106,6 +107,7 @@ mod tests { header_name: String::new(), query_param: String::new(), refresh: None, + token_grant: None, }, ], endpoints: Vec::new(), diff --git a/crates/openshell-providers/src/profiles.rs b/crates/openshell-providers/src/profiles.rs index 63a6b2eb3..53712f9ff 100644 --- a/crates/openshell-providers/src/profiles.rs +++ b/crates/openshell-providers/src/profiles.rs @@ -86,6 +86,36 @@ pub struct CredentialProfile { pub query_param: String, #[serde(default, skip_serializing_if = "Option::is_none")] pub refresh: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub token_grant: Option, +} + +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)] +pub struct TokenGrantProfile { + pub token_endpoint: String, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub audience: String, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub jwt_svid_audience: String, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub scopes: Vec, + #[serde(default, skip_serializing_if = "is_zero_i64")] + pub cache_ttl_seconds: i64, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub audience_overrides: Vec, +} + +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)] +pub struct TokenGrantAudienceOverrideProfile { + #[serde(default, skip_serializing_if = "String::is_empty")] + pub host: String, + #[serde(default, skip_serializing_if = "is_zero")] + pub port: u32, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub path: String, + pub audience: String, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub scopes: Vec, } #[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)] @@ -285,6 +315,7 @@ impl ProviderTypeProfile { .refresh .as_ref() .map(credential_refresh_from_proto), + token_grant: credential.token_grant.as_ref().map(token_grant_from_proto), }) .collect(), endpoints: profile.endpoints.iter().map(endpoint_from_proto).collect(), @@ -349,6 +380,7 @@ impl ProviderTypeProfile { header_name: credential.header_name.clone(), query_param: credential.query_param.clone(), refresh: credential.refresh.as_ref().map(credential_refresh_to_proto), + token_grant: credential.token_grant.as_ref().map(token_grant_to_proto), }) .collect(), endpoints: self.endpoints.iter().map(endpoint_to_proto).collect(), @@ -596,6 +628,64 @@ fn credential_refresh_to_proto(refresh: &CredentialRefreshProfile) -> ProviderCr } } +fn token_grant_from_proto( + token_grant: &openshell_core::proto::ProviderCredentialTokenGrant, +) -> TokenGrantProfile { + TokenGrantProfile { + token_endpoint: token_grant.token_endpoint.clone(), + audience: token_grant.audience.clone(), + jwt_svid_audience: token_grant.jwt_svid_audience.clone(), + scopes: token_grant.scopes.clone(), + cache_ttl_seconds: token_grant.cache_ttl_seconds, + audience_overrides: token_grant + .audience_overrides + .iter() + .map(token_grant_audience_override_from_proto) + .collect(), + } +} + +fn token_grant_to_proto( + token_grant: &TokenGrantProfile, +) -> openshell_core::proto::ProviderCredentialTokenGrant { + openshell_core::proto::ProviderCredentialTokenGrant { + token_endpoint: token_grant.token_endpoint.clone(), + audience: token_grant.audience.clone(), + jwt_svid_audience: token_grant.jwt_svid_audience.clone(), + scopes: token_grant.scopes.clone(), + cache_ttl_seconds: token_grant.cache_ttl_seconds, + audience_overrides: token_grant + .audience_overrides + .iter() + .map(token_grant_audience_override_to_proto) + .collect(), + } +} + +fn token_grant_audience_override_from_proto( + override_config: &openshell_core::proto::ProviderCredentialTokenGrantAudienceOverride, +) -> TokenGrantAudienceOverrideProfile { + TokenGrantAudienceOverrideProfile { + host: override_config.host.clone(), + port: override_config.port, + path: override_config.path.clone(), + audience: override_config.audience.clone(), + scopes: override_config.scopes.clone(), + } +} + +fn token_grant_audience_override_to_proto( + override_config: &TokenGrantAudienceOverrideProfile, +) -> openshell_core::proto::ProviderCredentialTokenGrantAudienceOverride { + openshell_core::proto::ProviderCredentialTokenGrantAudienceOverride { + host: override_config.host.clone(), + port: override_config.port, + path: override_config.path.clone(), + audience: override_config.audience.clone(), + scopes: override_config.scopes.clone(), + } +} + fn discovery_from_proto(discovery: &ProviderProfileDiscovery) -> DiscoveryProfile { DiscoveryProfile { credentials: discovery.credentials.clone(), @@ -1351,6 +1441,62 @@ credentials: assert!(exported.contains("client_secret")); } + #[test] + fn token_grant_audience_overrides_round_trip_through_proto() { + let profile = parse_profile_yaml( + r" +id: keycloak-example +display_name: Keycloak Example +credentials: + - name: access_token + auth_style: bearer + header_name: Authorization + token_grant: + token_endpoint: http://keycloak/realms/openshell/protocol/openid-connect/token + jwt_svid_audience: http://keycloak/realms/openshell + audience: api://default + scopes: [openid] + cache_ttl_seconds: 300 + audience_overrides: + - host: alpha.default.svc.cluster.local + port: 80 + audience: api://alpha + - host: beta.default.svc.cluster.local + port: 80 + path: /v1/** + audience: api://beta + scopes: [beta.read] +", + ) + .expect("profile should parse"); + + let token_grant = profile.credentials[0] + .token_grant + .as_ref() + .expect("token grant should parse"); + assert_eq!( + token_grant.jwt_svid_audience, + "http://keycloak/realms/openshell" + ); + assert_eq!(token_grant.audience_overrides.len(), 2); + assert_eq!(token_grant.audience_overrides[1].path, "/v1/**"); + assert_eq!(token_grant.audience_overrides[1].scopes, vec!["beta.read"]); + + let reparsed = ProviderTypeProfile::from_proto(&profile.to_proto()); + let reparsed_token_grant = reparsed.credentials[0] + .token_grant + .as_ref() + .expect("token grant should round trip"); + assert_eq!( + reparsed_token_grant.jwt_svid_audience, + token_grant.jwt_svid_audience + ); + assert_eq!( + reparsed_token_grant.audience_overrides, + token_grant.audience_overrides + ); + } + #[test] fn profile_json_round_trip_preserves_compact_dto_shape() { let profile = get_default_profile("github").expect("github profile"); diff --git a/crates/openshell-sandbox/Cargo.toml b/crates/openshell-sandbox/Cargo.toml index 6d527bc53..4a3f0a054 100644 --- a/crates/openshell-sandbox/Cargo.toml +++ b/crates/openshell-sandbox/Cargo.toml @@ -24,8 +24,9 @@ openshell-router = { path = "../openshell-router" } tokio = { workspace = true } # gRPC -tonic = { workspace = true, features = ["channel", "tls"] } +tonic = { workspace = true, features = ["channel", "tls-native-roots"] } tokio-stream = { workspace = true } +spiffe = { workspace = true } # CLI clap = { workspace = true } @@ -52,6 +53,7 @@ webpki-roots = { workspace = true } # HTTP bytes = { workspace = true } +reqwest = { workspace = true } # UUID uuid = { workspace = true } diff --git a/crates/openshell-sandbox/src/grpc_client.rs b/crates/openshell-sandbox/src/grpc_client.rs index ffa22f951..e35d2701d 100644 --- a/crates/openshell-sandbox/src/grpc_client.rs +++ b/crates/openshell-sandbox/src/grpc_client.rs @@ -4,8 +4,8 @@ //! gRPC client for fetching sandbox policy, provider environment, and inference //! route bundles from `OpenShell` server. //! -//! Every request carries a gateway-minted JWT in the `Authorization` header. -//! The token is resolved at startup from one of three sources: +//! Every request carries a sandbox bearer credential in the `Authorization` +//! header. The token is resolved at startup from one of three sources: //! //! 1. `OPENSHELL_SANDBOX_TOKEN` — raw JWT in the env (test harness path). //! 2. `OPENSHELL_SANDBOX_TOKEN_FILE` — file containing the JWT (Docker / @@ -15,7 +15,7 @@ //! supervisor exchanges it for a gateway JWT via `IssueSandboxToken` //! once at startup. //! -//! The resolved gateway JWT is held in process memory thereafter and +//! The resolved bearer credential is held in process memory thereafter and //! injected on every outbound call by [`AuthInterceptor`]. use std::collections::HashMap; @@ -54,18 +54,12 @@ enum TokenSource { K8sServiceAccount, } -#[derive(Debug)] -struct AcquiredToken { - token: String, - source: TokenSource, -} - /// Process-wide token slot. Initialized by the first [`connect_channel`] /// call and shared with every subsequent client and the renewal loop. static TOKEN_SLOT: OnceLock = OnceLock::new(); -/// Source used to acquire the process-wide token slot. -static TOKEN_SOURCE: OnceLock = OnceLock::new(); +/// Refresh strategy used by the process-wide token slot. +static TOKEN_REFRESH_MODE: OnceLock = OnceLock::new(); /// Serializes the first token acquisition. Several supervisor subsystems /// connect during startup; without this guard they can all observe an empty @@ -75,6 +69,17 @@ static TOKEN_INIT_LOCK: tokio::sync::Mutex<()> = tokio::sync::Mutex::const_new(( /// One-shot guard so the renewal loop spawns at most once per process. static REFRESH_SPAWNED: OnceLock<()> = OnceLock::new(); +#[derive(Clone, Debug)] +enum RefreshMode { + GatewayJwt(TokenSource), +} + +#[derive(Debug)] +struct AcquiredToken { + token: String, + refresh_mode: RefreshMode, +} + fn install_token_slot(token: &str) -> Result { let bearer = AsciiMetadataValue::try_from(format!("Bearer {token}")) .into_diagnostic() @@ -189,10 +194,11 @@ async fn build_plain_channel(endpoint: &str) -> Result { /// spawned once per process via [`REFRESH_SPAWNED`]. async fn connect_channel(endpoint: &str) -> Result { let channel = build_plain_channel(endpoint).await?; - let (slot, source) = token_slot(endpoint, &channel).await?; + let (slot, refresh_mode) = token_slot(endpoint, &channel).await?; let plain_channel = channel.clone(); let intercepted = InterceptedService::new(channel, AuthInterceptor::new(slot.clone())); if REFRESH_SPAWNED.set(()).is_ok() { + let RefreshMode::GatewayJwt(source) = refresh_mode; let refresh_channel = intercepted.clone(); let endpoint = endpoint.to_string(); tokio::spawn(async move { @@ -202,23 +208,29 @@ async fn connect_channel(endpoint: &str) -> Result { Ok(intercepted) } -async fn token_slot(endpoint: &str, plain_channel: &Channel) -> Result<(TokenSlot, TokenSource)> { +async fn token_slot(endpoint: &str, plain_channel: &Channel) -> Result<(TokenSlot, RefreshMode)> { if let Some(existing) = TOKEN_SLOT.get() { - let source = TOKEN_SOURCE.get().copied().unwrap_or(TokenSource::Env); - return Ok((existing.clone(), source)); + let refresh_mode = TOKEN_REFRESH_MODE + .get() + .cloned() + .unwrap_or(RefreshMode::GatewayJwt(TokenSource::Env)); + return Ok((existing.clone(), refresh_mode)); } let _guard = TOKEN_INIT_LOCK.lock().await; if let Some(existing) = TOKEN_SLOT.get() { - let source = TOKEN_SOURCE.get().copied().unwrap_or(TokenSource::Env); - return Ok((existing.clone(), source)); + let refresh_mode = TOKEN_REFRESH_MODE + .get() + .cloned() + .unwrap_or(RefreshMode::GatewayJwt(TokenSource::Env)); + return Ok((existing.clone(), refresh_mode)); } let acquired = acquire_sandbox_token(endpoint, plain_channel).await?; let slot = install_token_slot(&acquired.token)?; - let _ = TOKEN_SOURCE.set(acquired.source); - Ok((slot, acquired.source)) + let _ = TOKEN_REFRESH_MODE.set(acquired.refresh_mode.clone()); + Ok((slot, acquired.refresh_mode)) } /// Resolve the sandbox JWT used to authenticate every outbound RPC. @@ -234,7 +246,7 @@ async fn acquire_sandbox_token(endpoint: &str, plain_channel: &Channel) -> Resul debug!(source = "env", "loaded sandbox token"); return Ok(AcquiredToken { token: t, - source: TokenSource::Env, + refresh_mode: RefreshMode::GatewayJwt(TokenSource::Env), }); } @@ -247,7 +259,7 @@ async fn acquire_sandbox_token(endpoint: &str, plain_channel: &Channel) -> Resul debug!(source = "file", path = %path, "loaded sandbox token"); return Ok(AcquiredToken { token: contents.trim().to_string(), - source: TokenSource::File, + refresh_mode: RefreshMode::GatewayJwt(TokenSource::File), }); } @@ -256,7 +268,7 @@ async fn acquire_sandbox_token(endpoint: &str, plain_channel: &Channel) -> Resul { return Ok(AcquiredToken { token: acquire_k8s_sandbox_token(endpoint, plain_channel, &sa_path).await?, - source: TokenSource::K8sServiceAccount, + refresh_mode: RefreshMode::GatewayJwt(TokenSource::K8sServiceAccount), }); } @@ -674,6 +686,7 @@ pub async fn fetch_provider_environment( environment: inner.environment, provider_env_revision: inner.provider_env_revision, credential_expires_at_ms: inner.credential_expires_at_ms, + dynamic_credentials: inner.dynamic_credentials, }) } @@ -704,6 +717,7 @@ pub struct ProviderEnvironmentResult { pub environment: HashMap, pub provider_env_revision: u64, pub credential_expires_at_ms: HashMap, + pub dynamic_credentials: HashMap, } impl CachedOpenShellClient { diff --git a/crates/openshell-sandbox/src/l7/graphql.rs b/crates/openshell-sandbox/src/l7/graphql.rs index 2ff502d1c..82c35720e 100644 --- a/crates/openshell-sandbox/src/l7/graphql.rs +++ b/crates/openshell-sandbox/src/l7/graphql.rs @@ -802,6 +802,8 @@ network_policies: cmdline_paths: Vec::new(), secret_resolver: None, activity_tx: None, + dynamic_credentials: None, + token_grant_resolver: None, }; let request_info = crate::l7::L7RequestInfo { action: req.action, diff --git a/crates/openshell-sandbox/src/l7/mod.rs b/crates/openshell-sandbox/src/l7/mod.rs index 703aafae4..365bb379a 100644 --- a/crates/openshell-sandbox/src/l7/mod.rs +++ b/crates/openshell-sandbox/src/l7/mod.rs @@ -15,6 +15,7 @@ pub mod provider; pub mod relay; pub mod rest; pub mod tls; +pub(crate) mod token_grant_injection; pub(crate) mod websocket; /// Application-layer protocol for L7 inspection. diff --git a/crates/openshell-sandbox/src/l7/relay.rs b/crates/openshell-sandbox/src/l7/relay.rs index 9efa7ca9f..ff12f6229 100644 --- a/crates/openshell-sandbox/src/l7/relay.rs +++ b/crates/openshell-sandbox/src/l7/relay.rs @@ -40,6 +40,17 @@ pub struct L7EvalContext { pub(crate) secret_resolver: Option>, /// Anonymous activity counter channel. pub(crate) activity_tx: Option, + /// Dynamic credentials (token grants) keyed by endpoint-bound provider metadata. + pub(crate) dynamic_credentials: Option< + Arc< + std::sync::RwLock< + std::collections::HashMap, + >, + >, + >, + /// Dynamic token grant resolver for endpoint-bound credentials. + pub(crate) token_grant_resolver: + Option>, } #[derive(Default)] @@ -769,9 +780,12 @@ where let _ = &eval_target; if allowed || config.enforcement == EnforcementMode::Audit { + let req_with_auth = + crate::l7::token_grant_injection::inject_if_needed(req, ctx).await?; + // Forward request to upstream and relay response let outcome = crate::l7::rest::relay_http_request_with_options_guarded( - &req, + &req_with_auth, client, upstream, crate::l7::rest::RelayRequestOptions { @@ -802,7 +816,7 @@ where ctx, websocket_request, &redacted_target, - &req.query_params, + &req_with_auth.query_params, Some(engine), ); options.websocket.permessage_deflate = websocket_permessage_deflate; @@ -1258,11 +1272,29 @@ where ocsf_emit!(event); } + let req_with_auth = match crate::l7::token_grant_injection::inject_if_needed(req, ctx).await + { + Ok(req) => req, + Err(e) => { + warn!( + host = %ctx.host, + port = ctx.port, + error = %e, + "Token grant failed in passthrough relay" + ); + let response = + b"HTTP/1.1 502 Bad Gateway\r\nContent-Length: 0\r\nConnection: close\r\n\r\n"; + client.write_all(response).await.into_diagnostic()?; + client.flush().await.into_diagnostic()?; + return Ok(()); + } + }; + // Forward request with credential rewriting and relay the response. // relay_http_request_with_resolver handles both directions: it sends // the request upstream and reads the response back to the client. let outcome = crate::l7::rest::relay_http_request_with_options_guarded( - &req, + &req_with_auth, client, upstream, crate::l7::rest::RelayRequestOptions { @@ -1309,6 +1341,128 @@ mod tests { const TEST_POLICY: &str = include_str!("../../data/sandbox-policy.rego"); + fn rest_token_grant_relay_context( + resolver_response: std::result::Result<&str, &str>, + ) -> ( + L7EndpointConfig, + TunnelPolicyEngine, + L7EvalContext, + crate::l7::token_grant_injection::test_support::TokenGrantTestFixture, + ) { + let data = r#" +network_policies: + rest_api: + name: rest_api + endpoints: + - host: api.example.test + port: 8080 + protocol: rest + enforcement: enforce + rules: + - allow: + method: GET + path: "/v1/**" + binaries: + - { path: /usr/bin/curl } +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).unwrap(); + let input = NetworkInput { + host: "api.example.test".into(), + port: 8080, + binary_path: PathBuf::from("/usr/bin/curl"), + binary_sha256: "unused".into(), + ancestors: vec![], + cmdline_paths: vec![], + }; + let (endpoint_config, generation) = engine + .query_endpoint_config_with_generation(&input) + .unwrap(); + let config = crate::l7::parse_l7_config(&endpoint_config.unwrap()).unwrap(); + let tunnel_engine = engine.clone_engine_for_tunnel(generation).unwrap(); + let provider_key = "api.example.test\t8080\t/v1/**\tprovider:access_token"; + let fixture = match resolver_response { + Ok(token) => { + crate::l7::token_grant_injection::test_support::TokenGrantTestFixture::success( + provider_key, + token, + ) + } + Err(error) => { + crate::l7::token_grant_injection::test_support::TokenGrantTestFixture::failure( + provider_key, + error, + ) + } + }; + let ctx = L7EvalContext { + host: "api.example.test".into(), + port: 8080, + policy_name: "rest_api".into(), + binary_path: "/usr/bin/curl".into(), + ancestors: vec![], + cmdline_paths: vec![], + secret_resolver: None, + activity_tx: None, + dynamic_credentials: Some(fixture.dynamic_credentials()), + token_grant_resolver: Some(fixture.resolver()), + }; + + (config, tunnel_engine, ctx, fixture) + } + + fn passthrough_token_grant_relay_context( + resolver_response: std::result::Result<&str, &str>, + ) -> ( + PolicyGenerationGuard, + L7EvalContext, + crate::l7::token_grant_injection::test_support::TokenGrantTestFixture, + ) { + let policy_data = "network_policies: {}\n"; + let engine = OpaEngine::from_strings(TEST_POLICY, policy_data).unwrap(); + let generation_guard = engine + .generation_guard(engine.current_generation()) + .unwrap(); + let provider_key = "api.example.test\t8080\t/v1/**\tprovider:access_token"; + let fixture = match resolver_response { + Ok(token) => { + crate::l7::token_grant_injection::test_support::TokenGrantTestFixture::success( + provider_key, + token, + ) + } + Err(error) => { + crate::l7::token_grant_injection::test_support::TokenGrantTestFixture::failure( + provider_key, + error, + ) + } + }; + let ctx = L7EvalContext { + host: "api.example.test".into(), + port: 8080, + policy_name: "rest_api".into(), + binary_path: "/usr/bin/curl".into(), + ancestors: vec![], + cmdline_paths: vec![], + secret_resolver: None, + activity_tx: None, + dynamic_credentials: Some(fixture.dynamic_credentials()), + token_grant_resolver: Some(fixture.resolver()), + }; + + (generation_guard, ctx, fixture) + } + + fn authorization_header_count(headers: &str) -> usize { + headers + .lines() + .filter(|line| { + line.split_once(':') + .is_some_and(|(name, _)| name.eq_ignore_ascii_case("authorization")) + }) + .count() + } + #[test] fn parse_rejection_detail_adds_l7_hint_for_encoded_slash() { let detail = parse_rejection_detail( @@ -1342,6 +1496,226 @@ mod tests { ); } + #[tokio::test] + async fn l7_rest_relay_injects_token_grant_authorization_header() { + let (config, tunnel_engine, ctx, fixture) = + rest_token_grant_relay_context(Ok("grant-token")); + let (mut app, mut relay_client) = tokio::io::duplex(8192); + let (mut relay_upstream, mut upstream) = tokio::io::duplex(8192); + let relay = tokio::spawn(async move { + relay_with_inspection( + &config, + tunnel_engine, + &mut relay_client, + &mut relay_upstream, + &ctx, + ) + .await + }); + + app.write_all( + b"GET /v1/projects HTTP/1.1\r\nHost: api.example.test\r\nAuthorization: Bearer stale-token\r\nConnection: close\r\n\r\n", + ) + .await + .unwrap(); + + let mut upstream_request = [0u8; 1024]; + let n = tokio::time::timeout( + std::time::Duration::from_secs(1), + upstream.read(&mut upstream_request), + ) + .await + .expect("request should reach upstream") + .unwrap(); + let upstream_request = String::from_utf8_lossy(&upstream_request[..n]); + + assert!(upstream_request.starts_with("GET /v1/projects HTTP/1.1\r\n")); + assert!(upstream_request.contains("Authorization: Bearer grant-token\r\n")); + assert!(!upstream_request.contains("stale-token")); + assert_eq!(authorization_header_count(&upstream_request), 1); + + upstream + .write_all(b"HTTP/1.1 204 No Content\r\nContent-Length: 0\r\nConnection: close\r\n\r\n") + .await + .unwrap(); + + let mut client_response = [0u8; 512]; + let n = tokio::time::timeout( + std::time::Duration::from_secs(1), + app.read(&mut client_response), + ) + .await + .expect("response should reach client") + .unwrap(); + assert!(String::from_utf8_lossy(&client_response[..n]).contains("204 No Content")); + drop(app); + + tokio::time::timeout(std::time::Duration::from_secs(1), relay) + .await + .expect("relay should finish") + .unwrap() + .unwrap(); + + fixture.assert_one_request("api.example.test\t8080\t/v1/**\tprovider:access_token"); + } + + #[tokio::test] + async fn l7_rest_relay_token_grant_failure_does_not_forward_request() { + let (config, tunnel_engine, ctx, fixture) = + rest_token_grant_relay_context(Err("oauth unavailable")); + let (mut app, mut relay_client) = tokio::io::duplex(8192); + let (mut relay_upstream, mut upstream) = tokio::io::duplex(8192); + let relay = tokio::spawn(async move { + relay_with_inspection( + &config, + tunnel_engine, + &mut relay_client, + &mut relay_upstream, + &ctx, + ) + .await + }); + + app.write_all( + b"GET /v1/projects HTTP/1.1\r\nHost: api.example.test\r\nConnection: close\r\n\r\n", + ) + .await + .unwrap(); + + let err = tokio::time::timeout(std::time::Duration::from_secs(1), relay) + .await + .expect("relay should fail promptly") + .unwrap() + .expect_err("token grant failure should fail the L7 relay"); + assert!(err.to_string().contains("Token grant failed")); + assert!(err.to_string().contains("oauth unavailable")); + + let mut upstream_request = [0u8; 128]; + let n = tokio::time::timeout( + std::time::Duration::from_secs(1), + upstream.read(&mut upstream_request), + ) + .await + .expect("upstream should close without forwarded data") + .unwrap(); + assert_eq!(n, 0, "unauthenticated request must not reach upstream"); + + fixture.assert_one_request("api.example.test\t8080\t/v1/**\tprovider:access_token"); + } + + #[tokio::test] + async fn passthrough_relay_injects_token_grant_authorization_header() { + let (generation_guard, ctx, fixture) = + passthrough_token_grant_relay_context(Ok("grant-token")); + let (mut app, mut relay_client) = tokio::io::duplex(8192); + let (mut relay_upstream, mut upstream) = tokio::io::duplex(8192); + let relay = tokio::spawn(async move { + relay_passthrough_with_credentials( + &mut relay_client, + &mut relay_upstream, + &ctx, + &generation_guard, + ) + .await + }); + + app.write_all( + b"GET /v1/projects HTTP/1.1\r\nHost: api.example.test\r\nAuthorization: Bearer stale-token\r\nConnection: close\r\n\r\n", + ) + .await + .unwrap(); + + let mut upstream_request = [0u8; 1024]; + let n = tokio::time::timeout( + std::time::Duration::from_secs(1), + upstream.read(&mut upstream_request), + ) + .await + .expect("request should reach upstream") + .unwrap(); + let upstream_request = String::from_utf8_lossy(&upstream_request[..n]); + + assert!(upstream_request.starts_with("GET /v1/projects HTTP/1.1\r\n")); + assert!(upstream_request.contains("Authorization: Bearer grant-token\r\n")); + assert!(!upstream_request.contains("stale-token")); + assert_eq!(authorization_header_count(&upstream_request), 1); + + upstream + .write_all(b"HTTP/1.1 204 No Content\r\nContent-Length: 0\r\nConnection: close\r\n\r\n") + .await + .unwrap(); + + let mut client_response = [0u8; 512]; + let n = tokio::time::timeout( + std::time::Duration::from_secs(1), + app.read(&mut client_response), + ) + .await + .expect("response should reach client") + .unwrap(); + assert!(String::from_utf8_lossy(&client_response[..n]).contains("204 No Content")); + drop(app); + + tokio::time::timeout(std::time::Duration::from_secs(1), relay) + .await + .expect("relay should finish") + .unwrap() + .unwrap(); + + fixture.assert_one_request("api.example.test\t8080\t/v1/**\tprovider:access_token"); + } + + #[tokio::test] + async fn passthrough_relay_token_grant_failure_returns_bad_gateway_without_forwarding() { + let (generation_guard, ctx, fixture) = + passthrough_token_grant_relay_context(Err("oauth unavailable")); + let (mut app, mut relay_client) = tokio::io::duplex(8192); + let (mut relay_upstream, mut upstream) = tokio::io::duplex(8192); + let relay = tokio::spawn(async move { + relay_passthrough_with_credentials( + &mut relay_client, + &mut relay_upstream, + &ctx, + &generation_guard, + ) + .await + }); + + app.write_all( + b"GET /v1/projects HTTP/1.1\r\nHost: api.example.test\r\nConnection: close\r\n\r\n", + ) + .await + .unwrap(); + + tokio::time::timeout(std::time::Duration::from_secs(1), relay) + .await + .expect("relay should finish") + .unwrap() + .unwrap(); + + let mut client_response = [0u8; 512]; + let n = tokio::time::timeout( + std::time::Duration::from_secs(1), + app.read(&mut client_response), + ) + .await + .expect("bad gateway response should reach client") + .unwrap(); + assert!(String::from_utf8_lossy(&client_response[..n]).contains("502 Bad Gateway")); + + let mut upstream_request = [0u8; 128]; + let n = tokio::time::timeout( + std::time::Duration::from_secs(1), + upstream.read(&mut upstream_request), + ) + .await + .expect("upstream should close without forwarded data") + .unwrap(); + assert_eq!(n, 0, "unauthenticated request must not reach upstream"); + + fixture.assert_one_request("api.example.test\t8080\t/v1/**\tprovider:access_token"); + } + #[test] fn websocket_text_policy_requires_explicit_message_rule() { let data = r#" @@ -1383,6 +1757,8 @@ network_policies: cmdline_paths: vec![], secret_resolver: None, activity_tx: None, + dynamic_credentials: None, + token_grant_resolver: None, }; let request = L7RequestInfo { action: "WEBSOCKET_TEXT".into(), @@ -1439,6 +1815,8 @@ network_policies: cmdline_paths: vec![], secret_resolver: None, activity_tx: None, + dynamic_credentials: None, + token_grant_resolver: None, }; let (mut app, mut relay_client) = tokio::io::duplex(8192); @@ -1544,6 +1922,8 @@ network_policies: cmdline_paths: vec![], secret_resolver: resolver.map(Arc::new), activity_tx: None, + dynamic_credentials: None, + token_grant_resolver: None, }; let (mut app, mut relay_client) = tokio::io::duplex(8192); @@ -1662,6 +2042,8 @@ network_policies: cmdline_paths: vec![], secret_resolver: resolver.map(Arc::new), activity_tx: None, + dynamic_credentials: None, + token_grant_resolver: None, }; let (mut app, mut relay_client) = tokio::io::duplex(8192); @@ -1833,6 +2215,8 @@ network_policies: cmdline_paths: vec![], secret_resolver: None, activity_tx: None, + dynamic_credentials: None, + token_grant_resolver: None, }; let (mut app, mut relay_client) = tokio::io::duplex(8192); @@ -1921,6 +2305,8 @@ network_policies: cmdline_paths: vec![], secret_resolver: None, activity_tx: None, + dynamic_credentials: None, + token_grant_resolver: None, }; let (mut app, mut relay_client) = tokio::io::duplex(8192); diff --git a/crates/openshell-sandbox/src/l7/token_grant_injection.rs b/crates/openshell-sandbox/src/l7/token_grant_injection.rs new file mode 100644 index 000000000..33ea2ae38 --- /dev/null +++ b/crates/openshell-sandbox/src/l7/token_grant_injection.rs @@ -0,0 +1,579 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Endpoint-bound dynamic token grant injection for HTTP relay paths. + +use std::future::Future; +use std::pin::Pin; +use std::sync::Arc; + +use miette::{Result, miette}; +use openshell_core::proto::ProviderCredentialTokenGrant; +use openshell_ocsf::{ + ActionId, ActivityId, DispositionId, Endpoint, NetworkActivityBuilder, SeverityId, StatusId, + ocsf_emit, +}; +use tracing::warn; + +use crate::l7::provider::L7Request; +use crate::l7::relay::L7EvalContext; + +pub struct TokenGrantRequest<'a> { + pub provider_key: &'a str, + pub token_endpoint: &'a str, + pub jwt_svid_audience: &'a str, + pub audience: &'a str, + pub scopes: &'a [String], + pub cache_ttl_seconds: i64, +} + +pub trait TokenGrantResolver: Send + Sync { + fn obtain<'a>( + &'a self, + request: TokenGrantRequest<'a>, + ) -> Pin> + Send + 'a>>; +} + +#[derive(Default)] +pub struct SpiffeTokenGrantResolver; + +impl TokenGrantResolver for SpiffeTokenGrantResolver { + fn obtain<'a>( + &'a self, + request: TokenGrantRequest<'a>, + ) -> Pin> + Send + 'a>> { + Box::pin(async move { + crate::token_grant::obtain_provider_token( + request.provider_key, + request.token_endpoint, + request.jwt_svid_audience, + request.audience, + request.scopes, + request.cache_ttl_seconds, + ) + .await + }) + } +} + +pub fn default_resolver() -> Arc { + Arc::new(SpiffeTokenGrantResolver) +} + +/// Checks for endpoint-bound token grant credentials and injects an +/// Authorization header before forwarding the request upstream. +pub async fn inject_if_needed(req: L7Request, ctx: &L7EvalContext) -> Result { + let request_path = req.target.split('?').next().unwrap_or(req.target.as_str()); + let token_grant_credential = ctx.dynamic_credentials.as_ref().and_then(|dyn_creds| { + dyn_creds.read().map_or(None, |creds_guard| { + creds_guard + .iter() + .filter_map(|(key, cred)| { + let score = + dynamic_credential_key_match_score(key, &ctx.host, ctx.port, request_path)?; + cred.token_grant + .is_some() + .then(|| (score, key.clone(), cred.clone())) + }) + .max_by_key(|(score, key, _)| (*score, key.clone())) + .map(|(_, key, cred)| (key, cred)) + }) + }); + + if let Some((provider_key, cred)) = token_grant_credential + && let Some(ref token_grant) = cred.token_grant + { + let resolver = ctx + .token_grant_resolver + .as_ref() + .ok_or_else(|| miette!("token grant resolver unavailable"))?; + let request = token_grant_request(&provider_key, token_grant); + + match resolver.obtain(request).await { + Ok(access_token) => { + ocsf_emit!( + NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Other) + .action(ActionId::Allowed) + .disposition(DispositionId::Allowed) + .severity(SeverityId::Informational) + .dst_endpoint(Endpoint::from_domain(&ctx.host, ctx.port)) + .message(format!( + "Token grant successful for {} to {}:{}", + provider_key, ctx.host, ctx.port + )) + .build() + ); + let modified_raw_header = + inject_authorization_header(&req.raw_header, &access_token)?; + return Ok(L7Request { + action: req.action, + target: req.target, + query_params: req.query_params, + raw_header: modified_raw_header, + body_length: req.body_length, + }); + } + Err(e) => { + warn!( + host = %ctx.host, + port = ctx.port, + provider = %provider_key, + error = %e, + "Token grant failed" + ); + ocsf_emit!( + NetworkActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Fail) + .action(ActionId::Denied) + .disposition(DispositionId::Blocked) + .severity(SeverityId::High) + .status(StatusId::Failure) + .dst_endpoint(Endpoint::from_domain(&ctx.host, ctx.port)) + .message(format!( + "Token grant failed for {} to {}:{}: {}", + provider_key, ctx.host, ctx.port, e + )) + .build() + ); + return Err(miette!("Token grant failed: {}", e)); + } + } + } + + Ok(req) +} + +fn token_grant_request<'a>( + provider_key: &'a str, + token_grant: &'a ProviderCredentialTokenGrant, +) -> TokenGrantRequest<'a> { + TokenGrantRequest { + provider_key, + token_endpoint: &token_grant.token_endpoint, + jwt_svid_audience: &token_grant.jwt_svid_audience, + audience: &token_grant.audience, + scopes: &token_grant.scopes, + cache_ttl_seconds: token_grant.cache_ttl_seconds, + } +} + +#[cfg(test)] +fn dynamic_credential_key_matches(key: &str, host: &str, port: u16, request_path: &str) -> bool { + dynamic_credential_key_match_score(key, host, port, request_path).is_some() +} + +fn dynamic_credential_key_match_score( + key: &str, + host: &str, + port: u16, + request_path: &str, +) -> Option { + let mut parts = key.splitn(4, '\t'); + let endpoint_host = parts.next()?; + let endpoint_port = parts.next()?; + let endpoint_path = parts.next()?; + let _provider_key = parts.next()?; + + if endpoint_port.parse::().ok() != Some(port) { + return None; + } + + let host_lc = host.to_ascii_lowercase(); + let endpoint_host_lc = endpoint_host.to_ascii_lowercase(); + if !host_pattern_matches(&endpoint_host_lc, &host_lc) + || !crate::l7::endpoint_path_matches(endpoint_path, request_path) + { + return None; + } + + Some(host_pattern_specificity(&endpoint_host_lc) + endpoint_path_specificity(endpoint_path)) +} + +fn host_pattern_matches(pattern: &str, host: &str) -> bool { + if pattern == host { + return true; + } + if !pattern.contains('*') { + return false; + } + + let pattern_labels: Vec<&str> = pattern.split('.').collect(); + let host_labels: Vec<&str> = host.split('.').collect(); + host_pattern_labels_match(&pattern_labels, &host_labels) +} + +fn host_pattern_labels_match(pattern: &[&str], host: &[&str]) -> bool { + match pattern.split_first() { + None => host.is_empty(), + Some((label, rest)) if *label == "**" => { + host_pattern_labels_match(rest, host) + || (!host.is_empty() && host_pattern_labels_match(pattern, &host[1..])) + } + Some((label, rest)) if *label == "*" => { + !host.is_empty() && host_pattern_labels_match(rest, &host[1..]) + } + Some((literal, rest)) => { + host.first().is_some_and(|label| label == literal) + && host_pattern_labels_match(rest, &host[1..]) + } + } +} + +fn host_pattern_specificity(pattern: &str) -> u32 { + let wildcard_penalty = count_as_u32(pattern.matches('*').count()); + let label_count = count_as_u32(pattern.split('.').filter(|label| !label.is_empty()).count()); + let literal_chars = count_as_u32(pattern.chars().filter(|ch| *ch != '*').count()); + 100_000u32 + .saturating_sub(wildcard_penalty.saturating_mul(10_000)) + .saturating_add(label_count.saturating_mul(100)) + .saturating_add(literal_chars) +} + +fn endpoint_path_specificity(path: &str) -> u32 { + if path.is_empty() || path == "**" { + return 0; + } + 1_000_000u32.saturating_add(count_as_u32(path.chars().filter(|ch| *ch != '*').count())) +} + +fn count_as_u32(count: usize) -> u32 { + u32::try_from(count).unwrap_or(u32::MAX) +} + +fn inject_authorization_header(raw_header: &[u8], access_token: &str) -> Result> { + let header_end = raw_header + .windows(4) + .position(|w| w == b"\r\n\r\n") + .ok_or_else(|| miette!("HTTP headers missing final CRLF CRLF"))?; + + let header_block = std::str::from_utf8(&raw_header[..header_end]) + .map_err(|_| miette!("HTTP headers contain invalid UTF-8"))?; + let mut lines = header_block.split("\r\n"); + let request_line = lines + .next() + .ok_or_else(|| miette!("HTTP headers missing request line"))?; + + let auth_header = format!("Authorization: Bearer {access_token}"); + let mut new_raw_header = Vec::with_capacity(raw_header.len() + auth_header.len() + 2); + new_raw_header.extend_from_slice(request_line.as_bytes()); + new_raw_header.extend_from_slice(b"\r\n"); + + for line in lines { + if line.is_empty() { + break; + } + if line + .split_once(':') + .is_some_and(|(name, _)| name.eq_ignore_ascii_case("authorization")) + { + continue; + } + new_raw_header.extend_from_slice(line.as_bytes()); + new_raw_header.extend_from_slice(b"\r\n"); + } + + new_raw_header.extend_from_slice(auth_header.as_bytes()); + new_raw_header.extend_from_slice(&raw_header[header_end..]); + + Ok(new_raw_header) +} + +#[cfg(test)] +pub mod test_support { + use super::*; + use openshell_core::proto::{ProviderCredentialTokenGrant, ProviderProfileCredential}; + use std::collections::HashMap; + use std::sync::{Arc, Mutex}; + + struct FakeTokenGrantResolver { + requests: Arc>>, + response: std::result::Result, + } + + #[derive(Debug, Clone, PartialEq, Eq)] + struct OwnedTokenGrantRequest { + provider_key: String, + token_endpoint: String, + jwt_svid_audience: String, + audience: String, + scopes: Vec, + cache_ttl_seconds: i64, + } + + pub struct TokenGrantTestFixture { + dynamic_credentials: Arc>>, + resolver: Arc, + requests: Arc>>, + } + + impl TokenGrantTestFixture { + pub fn success(key: &str, token: &str) -> Self { + Self::new(key, Ok(token)) + } + + pub fn failure(key: &str, error: &str) -> Self { + Self::new(key, Err(error)) + } + + fn new(key: &str, response: std::result::Result<&str, &str>) -> Self { + let requests = Arc::new(Mutex::new(Vec::new())); + let resolver = Arc::new(FakeTokenGrantResolver { + requests: requests.clone(), + response: response.map(str::to_string).map_err(str::to_string), + }); + + let mut dynamic_credentials = HashMap::new(); + dynamic_credentials.insert( + key.to_string(), + ProviderProfileCredential { + name: "access_token".to_string(), + auth_style: "bearer".to_string(), + header_name: "Authorization".to_string(), + token_grant: Some(token_grant()), + ..Default::default() + }, + ); + + Self { + dynamic_credentials: Arc::new(std::sync::RwLock::new(dynamic_credentials)), + resolver, + requests, + } + } + + pub fn dynamic_credentials( + &self, + ) -> Arc>> { + self.dynamic_credentials.clone() + } + + pub fn resolver(&self) -> Arc { + self.resolver.clone() + } + + pub fn assert_one_request(&self, expected_provider_key: &str) { + let requests = self + .requests + .lock() + .expect("fake token grant requests lock poisoned"); + assert_eq!(requests.len(), 1); + + let request = &requests[0]; + assert_eq!(request.provider_key, expected_provider_key); + assert_eq!(request.token_endpoint, "https://auth.example.com/token"); + assert_eq!(request.jwt_svid_audience, "https://auth.example.com"); + assert_eq!(request.audience, "api://example"); + assert_eq!(request.scopes, ["read"]); + assert_eq!(request.cache_ttl_seconds, 300); + } + } + + fn token_grant() -> ProviderCredentialTokenGrant { + ProviderCredentialTokenGrant { + token_endpoint: "https://auth.example.com/token".to_string(), + audience: "api://example".to_string(), + jwt_svid_audience: "https://auth.example.com".to_string(), + scopes: vec!["read".to_string()], + cache_ttl_seconds: 300, + audience_overrides: Vec::new(), + } + } + + impl TokenGrantResolver for FakeTokenGrantResolver { + fn obtain<'a>( + &'a self, + request: TokenGrantRequest<'a>, + ) -> Pin> + Send + 'a>> { + let owned = OwnedTokenGrantRequest { + provider_key: request.provider_key.to_string(), + token_endpoint: request.token_endpoint.to_string(), + jwt_svid_audience: request.jwt_svid_audience.to_string(), + audience: request.audience.to_string(), + scopes: request.scopes.to_vec(), + cache_ttl_seconds: request.cache_ttl_seconds, + }; + Box::pin(async move { + self.requests + .lock() + .expect("fake token grant requests lock poisoned") + .push(owned); + self.response.clone().map_err(|err| miette!("{err}")) + }) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::l7::provider::{BodyLength, L7Request}; + use crate::l7::token_grant_injection::test_support::TokenGrantTestFixture; + + #[test] + fn dynamic_credential_key_matches_endpoint_host_port_and_path() { + let key = "api.example.com\t443\t/repos/**\tgithub:access_token"; + + assert!(dynamic_credential_key_matches( + key, + "api.example.com", + 443, + "/repos/owner/repo" + )); + assert!(!dynamic_credential_key_matches( + key, + "uploads.example.com", + 443, + "/repos/owner/repo" + )); + assert!(!dynamic_credential_key_matches( + key, + "api.example.com", + 80, + "/repos/owner/repo" + )); + assert!(!dynamic_credential_key_matches( + key, + "api.example.com", + 443, + "/orgs/owner" + )); + } + + #[test] + fn dynamic_credential_key_matches_wildcard_hosts_and_empty_path() { + let key = "*.example.com\t443\t\tprovider:access_token"; + + assert!(dynamic_credential_key_matches( + key, + "api.example.com", + 443, + "/anything" + )); + assert!(!dynamic_credential_key_matches( + key, + "api.other.com", + 443, + "/anything" + )); + assert!(!dynamic_credential_key_matches( + key, + "nested.api.example.com", + 443, + "/anything" + )); + } + + #[test] + fn dynamic_credential_key_matches_double_wildcard_hosts() { + let key = "**.example.com\t443\t\tprovider:access_token"; + + assert!(dynamic_credential_key_matches( + key, + "api.example.com", + 443, + "/anything" + )); + assert!(dynamic_credential_key_matches( + key, + "nested.api.example.com", + 443, + "/anything" + )); + } + + #[test] + fn dynamic_credential_match_score_prefers_path_specific_key() { + let default_key = "alpha.default.svc.cluster.local\t80\t\tprovider:access_token"; + let path_key = "alpha.default.svc.cluster.local\t80\t/admin/**\tprovider:access_token"; + let request_path = "/admin/users"; + + let default_score = dynamic_credential_key_match_score( + default_key, + "alpha.default.svc.cluster.local", + 80, + request_path, + ) + .expect("default key should match"); + let path_score = dynamic_credential_key_match_score( + path_key, + "alpha.default.svc.cluster.local", + 80, + request_path, + ) + .expect("path key should match"); + + assert!(path_score > default_score); + } + + #[test] + fn inject_authorization_header_replaces_existing_authorization() { + let raw = b"GET /v1 HTTP/1.1\r\nHost: api.example.com\r\nauthorization: Bearer stale-token\r\nAccept: application/json\r\n\r\n"; + + let rewritten = + inject_authorization_header(raw, "grant-token").expect("header should rewrite"); + let rewritten = String::from_utf8(rewritten).expect("rewritten header should be UTF-8"); + + assert!(rewritten.contains("Authorization: Bearer grant-token\r\n")); + assert!(!rewritten.contains("stale-token")); + assert_eq!( + rewritten + .lines() + .filter(|line| line + .split_once(':') + .is_some_and(|(name, _)| name.eq_ignore_ascii_case("authorization"))) + .count(), + 1 + ); + } + + #[test] + fn inject_authorization_header_preserves_header_terminator_before_body() { + let raw = b"POST /v1 HTTP/1.1\r\nHost: api.example.com\r\nContent-Length: 2\r\n\r\nOK"; + + let rewritten = + inject_authorization_header(raw, "grant-token").expect("header should rewrite"); + + assert_eq!( + rewritten, + b"POST /v1 HTTP/1.1\r\nHost: api.example.com\r\nContent-Length: 2\r\nAuthorization: Bearer grant-token\r\n\r\nOK" + ); + } + + #[tokio::test] + async fn inject_if_needed_uses_configured_resolver() { + let fixture = TokenGrantTestFixture::success( + "api.example.com\t443\t/v1/**\tprovider:access_token", + "grant-token", + ); + + let ctx = L7EvalContext { + host: "api.example.com".into(), + port: 443, + policy_name: "api".into(), + binary_path: "/usr/bin/curl".into(), + ancestors: vec![], + cmdline_paths: vec![], + secret_resolver: None, + activity_tx: None, + dynamic_credentials: Some(fixture.dynamic_credentials()), + token_grant_resolver: Some(fixture.resolver()), + }; + let req = L7Request { + action: "GET".to_string(), + target: "/v1/projects".to_string(), + query_params: std::collections::HashMap::new(), + raw_header: b"GET /v1/projects HTTP/1.1\r\nHost: api.example.com\r\n\r\n".to_vec(), + body_length: BodyLength::None, + }; + + let rewritten = inject_if_needed(req, &ctx) + .await + .expect("fake token grant should inject"); + let rewritten = + String::from_utf8(rewritten.raw_header).expect("rewritten request should be UTF-8"); + + assert!(rewritten.contains("Authorization: Bearer grant-token\r\n")); + fixture.assert_one_request("api.example.com\t443\t/v1/**\tprovider:access_token"); + } +} diff --git a/crates/openshell-sandbox/src/l7/websocket.rs b/crates/openshell-sandbox/src/l7/websocket.rs index 89a6e6c51..79c820e26 100644 --- a/crates/openshell-sandbox/src/l7/websocket.rs +++ b/crates/openshell-sandbox/src/l7/websocket.rs @@ -1271,6 +1271,8 @@ network_policies: cmdline_paths: vec![], secret_resolver: None, activity_tx: None, + dynamic_credentials: None, + token_grant_resolver: None, }; let (mut client_write, mut relay_read) = tokio::io::duplex(MAX_TEXT_MESSAGE_BYTES + 1024); let (mut relay_write, mut upstream_read) = tokio::io::duplex(MAX_TEXT_MESSAGE_BYTES + 1024); diff --git a/crates/openshell-sandbox/src/lib.rs b/crates/openshell-sandbox/src/lib.rs index e9d8921b6..276298ffc 100644 --- a/crates/openshell-sandbox/src/lib.rs +++ b/crates/openshell-sandbox/src/lib.rs @@ -25,8 +25,10 @@ pub mod proxy; mod sandbox; mod secrets; mod skills; +mod spiffe_endpoint; mod ssh; mod supervisor_session; +mod token_grant; use miette::{IntoDiagnostic, Result}; #[cfg(target_os = "linux")] @@ -370,57 +372,65 @@ pub async fn run_sandbox( // Fetch provider environment variables from the server. // This is done after loading the policy so the sandbox can still start // even if provider env fetch fails (graceful degradation). - let (provider_env_revision, provider_env, provider_credential_expires_at_ms) = - if let (Some(id), Some(endpoint)) = (&sandbox_id, &openshell_endpoint) { - match grpc_client::fetch_provider_environment(endpoint, id).await { - Ok(result) => { - ocsf_emit!( - ConfigStateChangeBuilder::new(ocsf_ctx()) - .severity(SeverityId::Informational) - .status(StatusId::Success) - .state(StateId::Enabled, "loaded") - .message(format!( - "Fetched provider environment [env_count:{}]", - result.environment.len() - )) - .build() - ); - ( - result.provider_env_revision, - result.environment, - result.credential_expires_at_ms, - ) - } - Err(e) => { - ocsf_emit!( - ConfigStateChangeBuilder::new(ocsf_ctx()) - .severity(SeverityId::Medium) - .status(StatusId::Failure) - .state(StateId::Other, "degraded") - .message(format!( - "Failed to fetch provider environment, continuing without: {e}" - )) - .build() - ); - ( - 0, - std::collections::HashMap::new(), - std::collections::HashMap::new(), - ) - } + let ( + provider_env_revision, + provider_env, + provider_credential_expires_at_ms, + dynamic_credentials, + ) = if let (Some(id), Some(endpoint)) = (&sandbox_id, &openshell_endpoint) { + match grpc_client::fetch_provider_environment(endpoint, id).await { + Ok(result) => { + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Enabled, "loaded") + .message(format!( + "Fetched provider environment [env_count:{}]", + result.environment.len() + )) + .build() + ); + ( + result.provider_env_revision, + result.environment, + result.credential_expires_at_ms, + result.dynamic_credentials, + ) } - } else { - ( - 0, - std::collections::HashMap::new(), - std::collections::HashMap::new(), - ) - }; + Err(e) => { + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .state(StateId::Other, "degraded") + .message(format!( + "Failed to fetch provider environment, continuing without: {e}" + )) + .build() + ); + ( + 0, + std::collections::HashMap::new(), + std::collections::HashMap::new(), + std::collections::HashMap::new(), + ) + } + } + } else { + ( + 0, + std::collections::HashMap::new(), + std::collections::HashMap::new(), + std::collections::HashMap::new(), + ) + }; let provider_credentials = provider_credentials::ProviderCredentialState::from_environment( provider_env_revision, provider_env, provider_credential_expires_at_ms, + dynamic_credentials, ); let provider_env = provider_credentials.snapshot().child_env.clone(); @@ -591,6 +601,13 @@ pub async fn run_sandbox( #[allow(clippy::no_effect_underscore_binding)] let _netns: Option<()> = None; + // Prepare the child-only mount namespace before the supervisor seccomp + // prelude blocks mount operations. Children enter this namespace with + // `setns` in pre_exec so supervisor identity sockets stay hidden from + // untrusted code while remaining available to the supervisor for refresh. + #[cfg(target_os = "linux")] + process::prepare_supervisor_identity_mount_namespace_from_env()?; + // Install the supervisor seccomp prelude after privileged startup helpers // (network namespace setup, nftables probes) complete, but before the SSH // listener and workload process are exposed. @@ -2623,6 +2640,7 @@ async fn run_policy_poll_loop(ctx: PolicyPollLoopContext) -> Result<()> { env_result.provider_env_revision, env_result.environment, env_result.credential_expires_at_ms, + env_result.dynamic_credentials, ); current_provider_env_revision = env_result.provider_env_revision; ocsf_emit!( diff --git a/crates/openshell-sandbox/src/process.rs b/crates/openshell-sandbox/src/process.rs index 76786a84d..43285d38e 100644 --- a/crates/openshell-sandbox/src/process.rs +++ b/crates/openshell-sandbox/src/process.rs @@ -16,14 +16,40 @@ use nix::unistd::{Group, Pid, User}; use std::collections::HashMap; use std::ffi::CString; #[cfg(target_os = "linux")] -use std::os::unix::io::RawFd; +use std::os::fd::{AsRawFd, OwnedFd, RawFd}; +#[cfg(target_os = "linux")] +use std::os::unix::ffi::OsStrExt; +#[cfg(any(test, target_os = "linux"))] +use std::path::Path; use std::path::PathBuf; use std::process::Stdio; +#[cfg(target_os = "linux")] +use std::sync::OnceLock; use tokio::process::{Child, Command}; use tracing::debug; +const SUPERVISOR_ONLY_ENV_VARS: &[&str] = &[ + openshell_core::sandbox_env::SANDBOX_TOKEN, + openshell_core::sandbox_env::SANDBOX_TOKEN_FILE, + openshell_core::sandbox_env::K8S_SA_TOKEN_FILE, + openshell_core::sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET, +]; + +pub fn is_supervisor_only_env_var(key: &str) -> bool { + SUPERVISOR_ONLY_ENV_VARS.contains(&key) +} + +fn strip_supervisor_only_env(cmd: &mut Command) { + for key in SUPERVISOR_ONLY_ENV_VARS { + cmd.env_remove(key); + } +} + fn inject_provider_env(cmd: &mut Command, provider_env: &HashMap) { for (key, value) in provider_env { + if is_supervisor_only_env_var(key) { + continue; + } cmd.env(key, value); } } @@ -129,6 +155,222 @@ fn parse_pids_max(contents: &str) -> RuntimePidLimitStatus { } } +// Pins the pre-seccomp child mount namespace where supervisor identity sockets +// are shadowed. Children enter it with setns before dropping privileges. +#[cfg(target_os = "linux")] +static SUPERVISOR_IDENTITY_MOUNT_NS: OnceLock> = + OnceLock::new(); + +#[cfg(target_os = "linux")] +pub struct SupervisorIdentityMountNamespace { + fd: OwnedFd, +} + +#[cfg(target_os = "linux")] +type SupervisorIdentityNsRef = &'static SupervisorIdentityMountNamespace; + +#[cfg(target_os = "linux")] +impl SupervisorIdentityMountNamespace { + fn from_socket_path(socket_path: &str) -> Result> { + let Some(target) = supervisor_identity_mount_target(socket_path)? else { + return Ok(None); + }; + Ok(Some(Self { + fd: create_supervisor_identity_mount_namespace(&target)?, + })) + } + + pub fn enter_for_child(&self) -> std::io::Result<()> { + set_mount_namespace(self.fd.as_raw_fd()) + } +} + +#[cfg(target_os = "linux")] +pub fn prepare_supervisor_identity_mount_namespace_from_env() -> Result<()> { + if SUPERVISOR_IDENTITY_MOUNT_NS.get().is_some() { + return Ok(()); + } + + let Some((_env_name, socket_path)) = supervisor_identity_socket_path_from_env() else { + let _ = SUPERVISOR_IDENTITY_MOUNT_NS.set(None); + return Ok(()); + }; + let namespace = SupervisorIdentityMountNamespace::from_socket_path(&socket_path)?; + let _ = SUPERVISOR_IDENTITY_MOUNT_NS.set(namespace); + Ok(()) +} + +#[cfg(target_os = "linux")] +pub fn supervisor_identity_mount_from_env() -> Result> { + let Some(namespace) = SUPERVISOR_IDENTITY_MOUNT_NS.get() else { + if supervisor_identity_socket_path_from_env().is_some() { + return Err(miette::miette!( + "supervisor identity mount namespace was not prepared before startup hardening" + )); + } + return Ok(None); + }; + Ok(namespace.as_ref()) +} + +#[cfg(target_os = "linux")] +fn supervisor_identity_socket_path_from_env() -> Option<(&'static str, String)> { + std::env::var(openshell_core::sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET) + .ok() + .filter(|socket_path| !socket_path.trim().is_empty()) + .map(|socket_path| { + ( + openshell_core::sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET, + socket_path, + ) + }) +} + +#[cfg(any(test, target_os = "linux"))] +fn supervisor_identity_mount_target(socket_path: &str) -> Result> { + let trimmed = socket_path.trim(); + if trimmed.is_empty() { + return Ok(None); + } + if trimmed.starts_with("tcp:") { + return Err(miette::miette!( + "{} must be a UNIX socket path so sandbox child processes can hide it", + openshell_core::sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET + )); + } + let path = trimmed.strip_prefix("unix:").unwrap_or(trimmed); + let path = Path::new(path); + if !path.is_absolute() { + return Err(miette::miette!( + "{} must be an absolute UNIX socket path", + openshell_core::sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET + )); + } + let Some(parent) = path.parent() else { + return Err(miette::miette!( + "{} has no parent directory", + openshell_core::sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET + )); + }; + if parent == Path::new("/") { + return Err(miette::miette!( + "{} must live below a dedicated directory, not directly under /", + openshell_core::sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET + )); + } + Ok(Some(parent.to_path_buf())) +} + +#[cfg(target_os = "linux")] +fn cstring_path(path: &Path) -> Result { + CString::new(path.as_os_str().as_bytes()) + .map_err(|_| miette::miette!("path contains an interior NUL byte: {}", path.display())) +} + +#[cfg(target_os = "linux")] +fn create_supervisor_identity_mount_namespace(target: &Path) -> Result { + let original_ns = open_current_mount_namespace() + .map_err(|err| miette::miette!("failed to open original mount namespace: {err}"))?; + + private_mount_namespace() + .map_err(|err| miette::miette!("failed to create supervisor identity namespace: {err}"))?; + + let target = cstring_path(target)?; + let result = (|| -> Result { + mount_empty_tmpfs(&target).map_err(|err| { + miette::miette!("failed to hide supervisor identity mount from child namespace: {err}") + })?; + open_current_mount_namespace() + .map_err(|err| miette::miette!("failed to open sanitized mount namespace: {err}")) + })(); + + set_mount_namespace(original_ns.as_raw_fd()).map_err(|restore_err| { + let result_msg = result.as_ref().err().map_or_else( + || "sanitized namespace was created".to_string(), + ToString::to_string, + ); + miette::miette!( + "failed to restore original mount namespace after supervisor identity isolation setup: \ + {restore_err}; setup result: {result_msg}" + ) + })?; + + result +} + +#[cfg(target_os = "linux")] +fn open_current_mount_namespace() -> std::io::Result { + let file = std::fs::File::open("/proc/self/ns/mnt")?; + Ok(file.into()) +} + +#[cfg(target_os = "linux")] +fn private_mount_namespace() -> std::io::Result<()> { + #[allow(unsafe_code)] + let rc = unsafe { libc::unshare(libc::CLONE_NEWNS) }; + if rc != 0 { + return Err(std::io::Error::other(format!( + "failed to create private mount namespace: {}", + std::io::Error::last_os_error() + ))); + } + + #[allow(unsafe_code)] + let rc = unsafe { + let flags: libc::c_ulong = libc::MS_REC | libc::MS_PRIVATE; + libc::mount( + std::ptr::null(), + c"/".as_ptr(), + std::ptr::null(), + flags, + std::ptr::null(), + ) + }; + if rc != 0 { + return Err(std::io::Error::other(format!( + "failed to mark mount namespace private: {}", + std::io::Error::last_os_error() + ))); + } + Ok(()) +} + +#[cfg(target_os = "linux")] +fn set_mount_namespace(fd: RawFd) -> std::io::Result<()> { + #[allow(unsafe_code)] + let rc = unsafe { libc::setns(fd, libc::CLONE_NEWNS) }; + if rc != 0 { + return Err(std::io::Error::other(format!( + "failed to enter mount namespace: {}", + std::io::Error::last_os_error() + ))); + } + Ok(()) +} + +#[cfg(target_os = "linux")] +fn mount_empty_tmpfs(target: &CString) -> std::io::Result<()> { + #[allow(unsafe_code)] + let rc = unsafe { + let flags: libc::c_ulong = + libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RDONLY; + libc::mount( + c"tmpfs".as_ptr(), + target.as_ptr(), + c"tmpfs".as_ptr(), + flags, + c"mode=0555,size=4k".as_ptr().cast(), + ) + }; + if rc != 0 { + return Err(std::io::Error::other(format!( + "failed to hide supervisor identity mount from child process: {}", + std::io::Error::last_os_error() + ))); + } + Ok(()) +} + /// Handle to a running process. pub struct ProcessHandle { child: Child, @@ -211,14 +453,11 @@ impl ProcessHandle { .kill_on_drop(true) .env(openshell_core::sandbox_env::SANDBOX, "1"); - // Strip supervisor-only credentials from the entrypoint's inherited - // environment. The entrypoint drops to the sandbox user before - // `exec`; without this strip, anything running as the sandbox user - // (e.g. an SSH-spawned shell) could read /proc//environ - // and recover the gateway-minted JWT. Issue #1354. - cmd.env_remove(openshell_core::sandbox_env::SANDBOX_TOKEN) - .env_remove(openshell_core::sandbox_env::SANDBOX_TOKEN_FILE) - .env_remove(openshell_core::sandbox_env::K8S_SA_TOKEN_FILE); + // Strip supervisor-only identity material from the entrypoint's + // inherited environment. The entrypoint drops to the sandbox user + // before `exec`; without this strip, sandbox code could recover + // supervisor credentials from its inherited environment. + strip_supervisor_only_env(&mut cmd); inject_provider_env(&mut cmd, provider_env); @@ -269,6 +508,10 @@ impl ProcessHandle { #[cfg(target_os = "linux")] let prepared_sandbox = sandbox::linux::prepare(policy, workdir) .map_err(|err| miette::miette!("Failed to prepare sandbox: {err}"))?; + #[cfg(target_os = "linux")] + let supervisor_identity_mount = supervisor_identity_mount_from_env().map_err(|err| { + miette::miette!("Failed to prepare supervisor identity isolation: {err}") + })?; // Set up process group for signal handling (non-interactive mode only). // In interactive mode, we inherit the parent's process group to maintain @@ -297,6 +540,11 @@ impl ProcessHandle { } } + #[cfg(target_os = "linux")] + if let Some(mount) = supervisor_identity_mount { + mount.enter_for_child()?; + } + // Drop privileges. initgroups/setgid/setuid need access to // /etc/group and /etc/passwd which would be blocked if // Landlock were already enforced. @@ -346,14 +594,9 @@ impl ProcessHandle { .kill_on_drop(true) .env(openshell_core::sandbox_env::SANDBOX, "1"); - // Strip supervisor-only credentials from the entrypoint's inherited - // environment. The entrypoint drops to the sandbox user before - // `exec`; without this strip, anything running as the sandbox user - // (e.g. an SSH-spawned shell) could read /proc//environ - // and recover the gateway-minted JWT. Issue #1354. - cmd.env_remove(openshell_core::sandbox_env::SANDBOX_TOKEN) - .env_remove(openshell_core::sandbox_env::SANDBOX_TOKEN_FILE) - .env_remove(openshell_core::sandbox_env::K8S_SA_TOKEN_FILE); + // Strip supervisor-only identity material from the entrypoint's + // inherited environment. + strip_supervisor_only_env(&mut cmd); inject_provider_env(&mut cmd, provider_env); @@ -935,4 +1178,95 @@ mod tests { let stdout = String::from_utf8(output.stdout).expect("utf8"); assert!(stdout.contains("ANTHROPIC_API_KEY=openshell:resolve:env:ANTHROPIC_API_KEY")); } + + #[tokio::test] + async fn inject_provider_env_skips_supervisor_identity_material() { + let mut cmd = Command::new("/usr/bin/env"); + cmd.env_clear() + .stdin(StdStdio::null()) + .stdout(StdStdio::piped()) + .stderr(StdStdio::null()); + + let provider_env = HashMap::from([ + ( + "ANTHROPIC_API_KEY".to_string(), + "openshell:resolve:env:ANTHROPIC_API_KEY".to_string(), + ), + ( + openshell_core::sandbox_env::SANDBOX_TOKEN.to_string(), + "provider-token".to_string(), + ), + ( + openshell_core::sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET.to_string(), + "/spiffe-workload-api/spire-agent.sock".to_string(), + ), + ]); + + inject_provider_env(&mut cmd, &provider_env); + + let output = cmd.output().await.expect("spawn env"); + assert!(output.status.success()); + let stdout = String::from_utf8(output.stdout).expect("utf8"); + assert!(stdout.contains("ANTHROPIC_API_KEY=openshell:resolve:env:ANTHROPIC_API_KEY")); + assert!(!stdout.contains(openshell_core::sandbox_env::SANDBOX_TOKEN)); + assert!(!stdout.contains(openshell_core::sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET)); + } + + #[tokio::test] + async fn strip_supervisor_only_env_removes_identity_material() { + let mut cmd = Command::new("/usr/bin/env"); + cmd.stdin(StdStdio::null()) + .stdout(StdStdio::piped()) + .stderr(StdStdio::null()) + .env("OPENSHELL_ENDPOINT", "https://gateway.example.test"); + + for key in SUPERVISOR_ONLY_ENV_VARS { + cmd.env(key, format!("{key}-secret")); + } + + strip_supervisor_only_env(&mut cmd); + + let output = cmd.output().await.expect("spawn env"); + assert!(output.status.success()); + let stdout = String::from_utf8(output.stdout).expect("utf8"); + + for key in SUPERVISOR_ONLY_ENV_VARS { + assert!( + !stdout + .lines() + .any(|line| line.starts_with(&format!("{key}="))), + "{key} must not be inherited by sandbox child processes" + ); + } + assert!(stdout.contains("OPENSHELL_ENDPOINT=https://gateway.example.test")); + } + + #[test] + fn supervisor_identity_mount_target_uses_socket_parent() { + assert_eq!( + supervisor_identity_mount_target("/spiffe-workload-api/spire-agent.sock") + .expect("plain path should parse"), + Some(PathBuf::from("/spiffe-workload-api")) + ); + assert_eq!( + supervisor_identity_mount_target("unix:/spiffe-workload-api/spire-agent.sock") + .expect("unix path should parse"), + Some(PathBuf::from("/spiffe-workload-api")) + ); + } + + #[test] + fn supervisor_identity_mount_target_ignores_empty_socket_path() { + assert_eq!( + supervisor_identity_mount_target(" ").expect("empty path should be ignored"), + None + ); + } + + #[test] + fn supervisor_identity_mount_target_rejects_unhideable_endpoints() { + assert!(supervisor_identity_mount_target("tcp:127.0.0.1:8081").is_err()); + assert!(supervisor_identity_mount_target("spiffe-workload-api/spire-agent.sock").is_err()); + assert!(supervisor_identity_mount_target("/spire-agent.sock").is_err()); + } } diff --git a/crates/openshell-sandbox/src/provider_credentials.rs b/crates/openshell-sandbox/src/provider_credentials.rs index ae91e8d6e..acd116a83 100644 --- a/crates/openshell-sandbox/src/provider_credentials.rs +++ b/crates/openshell-sandbox/src/provider_credentials.rs @@ -13,6 +13,7 @@ const MAX_RETAINED_CREDENTIAL_GENERATIONS: usize = 8; pub struct ProviderCredentialSnapshot { pub revision: u64, pub child_env: HashMap, + pub dynamic_credentials: HashMap, } #[derive(Debug)] @@ -33,6 +34,7 @@ impl ProviderCredentialState { revision: u64, env: HashMap, credential_expires_at_ms: HashMap, + dynamic_credentials: HashMap, ) -> Self { let (child_env, generation_resolver, current_resolver) = SecretResolver::from_provider_env_for_current_revision( @@ -43,6 +45,7 @@ impl ProviderCredentialState { let snapshot = Arc::new(ProviderCredentialSnapshot { revision, child_env, + dynamic_credentials, }); let generations: VecDeque<_> = generation_resolver.map(Arc::new).into_iter().collect(); let current_resolver = current_resolver.map(Arc::new); @@ -79,6 +82,7 @@ impl ProviderCredentialState { revision: u64, env: HashMap, credential_expires_at_ms: HashMap, + dynamic_credentials: HashMap, ) -> usize { let (child_env, generation_resolver, current_resolver) = SecretResolver::from_provider_env_for_current_revision( @@ -94,6 +98,7 @@ impl ProviderCredentialState { inner.current = Arc::new(ProviderCredentialSnapshot { revision, child_env, + dynamic_credentials, }); inner.current_resolver = current_resolver.map(Arc::new); @@ -132,6 +137,7 @@ mod tests { 10, HashMap::from([("GITHUB_TOKEN".to_string(), "old".to_string())]), HashMap::new(), + HashMap::new(), ); let first = state.snapshot(); assert_eq!( @@ -143,6 +149,7 @@ mod tests { 11, HashMap::from([("GITHUB_TOKEN".to_string(), "new".to_string())]), HashMap::new(), + HashMap::new(), ); let second = state.snapshot(); assert_eq!( @@ -175,9 +182,10 @@ mod tests { 10, HashMap::from([("GITHUB_TOKEN".to_string(), "old".to_string())]), HashMap::new(), + HashMap::new(), ); - state.install_environment(11, HashMap::new(), HashMap::new()); + state.install_environment(11, HashMap::new(), HashMap::new(), HashMap::new()); assert!(state.snapshot().child_env.is_empty()); let resolver = state.resolver().expect("old resolver retained"); @@ -208,12 +216,14 @@ mod tests { 10, HashMap::from([("GITHUB_TOKEN".to_string(), "old".to_string())]), HashMap::from([("GITHUB_TOKEN".to_string(), now_ms - 1_000)]), + HashMap::new(), ); state.install_environment( 11, HashMap::from([("GITHUB_TOKEN".to_string(), "new".to_string())]), HashMap::from([("GITHUB_TOKEN".to_string(), now_ms + 60_000)]), + HashMap::new(), ); let resolver = state.resolver().expect("resolver"); diff --git a/crates/openshell-sandbox/src/proxy.rs b/crates/openshell-sandbox/src/proxy.rs index ae100d734..227a9e61e 100644 --- a/crates/openshell-sandbox/src/proxy.rs +++ b/crates/openshell-sandbox/src/proxy.rs @@ -242,6 +242,11 @@ impl ProxyHandle { let resolver = provider_credentials .as_ref() .and_then(ProviderCredentialState::resolver); + let dynamic_credentials = provider_credentials.as_ref().map(|state| { + Arc::new(std::sync::RwLock::new( + state.snapshot().dynamic_credentials.clone(), + )) + }); let dtx = denial_tx.clone(); let atx = activity_tx.clone(); tokio::spawn(async move { @@ -255,6 +260,7 @@ impl ProxyHandle { policy_local, gw, resolver, + dynamic_credentials, dtx, atx, ) @@ -411,6 +417,13 @@ async fn handle_tcp_connection( policy_local_ctx: Option>, trusted_host_gateway: Arc>, secret_resolver: Option>, + dynamic_credentials: Option< + Arc< + std::sync::RwLock< + std::collections::HashMap, + >, + >, + >, denial_tx: Option>, activity_tx: Option, ) -> Result<()> { @@ -458,6 +471,7 @@ async fn handle_tcp_connection( policy_local_ctx, trusted_host_gateway, secret_resolver, + dynamic_credentials, denial_tx.as_ref(), activity_tx.as_ref(), ) @@ -953,6 +967,10 @@ async fn handle_tcp_connection( .collect(), secret_resolver: secret_resolver.clone(), activity_tx: activity_tx.clone(), + dynamic_credentials: dynamic_credentials.clone(), + token_grant_resolver: dynamic_credentials + .as_ref() + .map(|_| crate::l7::token_grant_injection::default_resolver()), }; if effective_tls_skip { @@ -2870,6 +2888,33 @@ where .await } +async fn inject_token_grant_for_forward_request( + method: &str, + upstream_target: &str, + forward_request_bytes: Vec, + l7_ctx: &crate::l7::relay::L7EvalContext, +) -> Result> { + let header_end = forward_request_bytes + .windows(4) + .position(|w| w == b"\r\n\r\n") + .map_or(forward_request_bytes.len(), |p| p + 4); + let header_str = std::str::from_utf8(&forward_request_bytes[..header_end]) + .into_diagnostic() + .map_err(|_| miette::miette!("Forward HTTP headers contain invalid UTF-8"))?; + let body_length = crate::l7::rest::parse_body_length(header_str)?; + let forward_request_for_token_grant = crate::l7::provider::L7Request { + action: method.to_string(), + target: upstream_target.to_string(), + query_params: std::collections::HashMap::new(), + raw_header: forward_request_bytes, + body_length, + }; + + crate::l7::token_grant_injection::inject_if_needed(forward_request_for_token_grant, l7_ctx) + .await + .map(|req| req.raw_header) +} + /// Handle a plain HTTP forward proxy request (non-CONNECT). /// /// Public IPs are allowed through when the endpoint passes OPA evaluation. @@ -2891,6 +2936,13 @@ async fn handle_forward_proxy( policy_local_ctx: Option>, trusted_host_gateway: Arc>, secret_resolver: Option>, + dynamic_credentials: Option< + Arc< + std::sync::RwLock< + std::collections::HashMap, + >, + >, + >, denial_tx: Option<&mpsc::UnboundedSender>, activity_tx: Option<&ActivitySender>, ) -> Result<()> { @@ -3120,6 +3172,10 @@ async fn handle_forward_proxy( .collect(), secret_resolver: secret_resolver.clone(), activity_tx: activity_tx.cloned(), + dynamic_credentials: dynamic_credentials.clone(), + token_grant_resolver: dynamic_credentials + .as_ref() + .map(|_| crate::l7::token_grant_injection::default_resolver()), }; let mut l7_activity_pending = false; @@ -3790,6 +3846,36 @@ async fn handle_forward_proxy( } emit_forward_success_activity(activity_tx, l7_activity_pending); + forward_request_bytes = match inject_token_grant_for_forward_request( + method, + &upstream_target, + forward_request_bytes, + &l7_ctx, + ) + .await + { + Ok(bytes) => bytes, + Err(e) => { + warn!( + dst_host = %host_lc, + dst_port = port, + error = %e, + "token grant failed in forward proxy" + ); + respond( + client, + &build_json_error_response( + 502, + "Bad Gateway", + "token_grant_failed", + "dynamic token grant failed", + ), + ) + .await?; + return Ok(()); + } + }; + // 9. Rewrite request and forward to upstream let rewritten = match rewrite_forward_request( &forward_request_bytes, @@ -4136,6 +4222,53 @@ mod tests { .map_err(|e| miette::miette!("upstream task failed: {e}")) } + fn forward_token_grant_context( + resolver_response: std::result::Result<&str, &str>, + ) -> ( + crate::l7::relay::L7EvalContext, + crate::l7::token_grant_injection::test_support::TokenGrantTestFixture, + ) { + let provider_key = "api.example.test\t8080\t/v1/**\tprovider:access_token"; + let fixture = match resolver_response { + Ok(token) => { + crate::l7::token_grant_injection::test_support::TokenGrantTestFixture::success( + provider_key, + token, + ) + } + Err(error) => { + crate::l7::token_grant_injection::test_support::TokenGrantTestFixture::failure( + provider_key, + error, + ) + } + }; + let ctx = crate::l7::relay::L7EvalContext { + host: "api.example.test".into(), + port: 8080, + policy_name: "rest_api".into(), + binary_path: "/usr/bin/curl".into(), + ancestors: vec![], + cmdline_paths: vec![], + secret_resolver: None, + activity_tx: None, + dynamic_credentials: Some(fixture.dynamic_credentials()), + token_grant_resolver: Some(fixture.resolver()), + }; + + (ctx, fixture) + } + + fn authorization_header_count(headers: &str) -> usize { + headers + .lines() + .filter(|line| { + line.split_once(':') + .is_some_and(|(name, _)| name.eq_ignore_ascii_case("authorization")) + }) + .count() + } + fn forward_websocket_policy_parts( data: &str, host: &str, @@ -4177,6 +4310,8 @@ mod tests { cmdline_paths: vec![], secret_resolver: None, activity_tx: None, + dynamic_credentials: None, + token_grant_resolver: None, }; (config, tunnel_engine, ctx) } @@ -4343,6 +4478,8 @@ mod tests { cmdline_paths: vec![], secret_resolver: resolver, activity_tx: None, + dynamic_credentials: None, + token_grant_resolver: None, }; let query_params = std::collections::HashMap::new(); @@ -4384,6 +4521,8 @@ mod tests { cmdline_paths: vec![], secret_resolver: None, activity_tx: None, + dynamic_credentials: None, + token_grant_resolver: None, }; let query_params = std::collections::HashMap::new(); let config = websocket_l7_config(crate::l7::L7Protocol::Rest, false); @@ -6124,6 +6263,40 @@ network_policies: // --- rewrite_forward_request tests --- + #[tokio::test] + async fn forward_proxy_injects_token_grant_before_rewriting_request() { + let (ctx, fixture) = forward_token_grant_context(Ok("grant-token")); + let raw = b"GET http://api.example.test:8080/v1/projects HTTP/1.1\r\nHost: api.example.test:8080\r\nAuthorization: Bearer stale-token\r\nConnection: close\r\n\r\n".to_vec(); + + let with_token = inject_token_grant_for_forward_request("GET", "/v1/projects", raw, &ctx) + .await + .expect("forward token grant should inject"); + let rewritten = + rewrite_forward_request(&with_token, with_token.len(), "/v1/projects", None, false) + .expect("forward request should rewrite"); + let rewritten = String::from_utf8_lossy(&rewritten); + + assert!(rewritten.starts_with("GET /v1/projects HTTP/1.1\r\n")); + assert!(rewritten.contains("Authorization: Bearer grant-token\r\n")); + assert!(!rewritten.contains("stale-token")); + assert_eq!(authorization_header_count(&rewritten), 1); + fixture.assert_one_request("api.example.test\t8080\t/v1/**\tprovider:access_token"); + } + + #[tokio::test] + async fn forward_proxy_token_grant_failure_returns_error_before_rewrite() { + let (ctx, fixture) = forward_token_grant_context(Err("oauth unavailable")); + let raw = b"GET http://api.example.test:8080/v1/projects HTTP/1.1\r\nHost: api.example.test:8080\r\nConnection: close\r\n\r\n".to_vec(); + + let err = inject_token_grant_for_forward_request("GET", "/v1/projects", raw, &ctx) + .await + .expect_err("forward token grant failure should stop request rewriting"); + + assert!(err.to_string().contains("Token grant failed")); + assert!(err.to_string().contains("oauth unavailable")); + fixture.assert_one_request("api.example.test\t8080\t/v1/**\tprovider:access_token"); + } + #[test] fn test_rewrite_get_request() { let raw = diff --git a/crates/openshell-sandbox/src/spiffe_endpoint.rs b/crates/openshell-sandbox/src/spiffe_endpoint.rs new file mode 100644 index 000000000..449462627 --- /dev/null +++ b/crates/openshell-sandbox/src/spiffe_endpoint.rs @@ -0,0 +1,17 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::path::Path; + +/// Convert a path to a SPIFFE Workload API endpoint URL. +/// +/// If the path already has a scheme (`unix:` or `tcp:`), use it as-is. +/// Otherwise, assume it is a Unix socket path and prepend `unix:`. +pub fn workload_api_endpoint(path: &Path) -> String { + let path = path.to_string_lossy(); + if path.starts_with("unix:") || path.starts_with("tcp:") { + path.into_owned() + } else { + format!("unix:{path}") + } +} diff --git a/crates/openshell-sandbox/src/ssh.rs b/crates/openshell-sandbox/src/ssh.rs index 67fbc7e57..9578b8ae6 100644 --- a/crates/openshell-sandbox/src/ssh.rs +++ b/crates/openshell-sandbox/src/ssh.rs @@ -5,7 +5,7 @@ use crate::child_env; use crate::policy::SandboxPolicy; -use crate::process::drop_privileges; +use crate::process::{drop_privileges, is_supervisor_only_env_var}; use crate::provider_credentials::ProviderCredentialState; use crate::sandbox; #[cfg(target_os = "linux")] @@ -700,6 +700,9 @@ fn apply_child_env( } for (key, value) in provider_env { + if is_supervisor_only_env_var(key) { + continue; + } cmd.env(key, value); } } @@ -790,7 +793,7 @@ fn spawn_pty_shell( netns_fd, #[cfg(target_os = "linux")] prepared_sandbox, - ); + )?; } let mut child = cmd.spawn()?; @@ -936,7 +939,7 @@ fn spawn_pipe_exec( netns_fd, #[cfg(target_os = "linux")] prepared_sandbox, - ); + )?; } let mut child = cmd.spawn()?; @@ -1059,6 +1062,13 @@ mod unsafe_pty { } #[allow(unsafe_code)] + #[cfg_attr( + not(target_os = "linux"), + allow( + clippy::unnecessary_wraps, + reason = "Linux pre_exec setup can fail while non-Linux setup cannot." + ) + )] pub fn install_pre_exec( cmd: &mut Command, policy: SandboxPolicy, @@ -1066,11 +1076,16 @@ mod unsafe_pty { slave_fd: RawFd, netns_fd: Option, #[cfg(target_os = "linux")] prepared: crate::sandbox::linux::PreparedSandbox, - ) { + ) -> anyhow::Result<()> { // Wrap in Option so we can .take() it out of the FnMut closure. // pre_exec is only called once (after fork, before exec). #[cfg(target_os = "linux")] let mut prepared = Some(prepared); + #[cfg(target_os = "linux")] + let supervisor_identity_mount = crate::process::supervisor_identity_mount_from_env() + .map_err(|err| { + anyhow::anyhow!("failed to prepare supervisor identity isolation: {err}") + })?; unsafe { cmd.pre_exec(move || { setsid().map_err(|err| std::io::Error::other(err.to_string()))?; @@ -1080,40 +1095,61 @@ mod unsafe_pty { netns_fd, &policy, #[cfg(target_os = "linux")] + supervisor_identity_mount, + #[cfg(target_os = "linux")] prepared.take(), ) }); } + Ok(()) } /// Pre-exec hook for pipe-based (non-PTY) exec. /// /// Skips `setsid` and `TIOCSCTTY` since there is no controlling terminal. #[allow(unsafe_code)] + #[cfg_attr( + not(target_os = "linux"), + allow( + clippy::unnecessary_wraps, + reason = "Linux pre_exec setup can fail while non-Linux setup cannot." + ) + )] pub fn install_pre_exec_no_pty( cmd: &mut Command, policy: SandboxPolicy, _workdir: Option, netns_fd: Option, #[cfg(target_os = "linux")] prepared: crate::sandbox::linux::PreparedSandbox, - ) { + ) -> anyhow::Result<()> { #[cfg(target_os = "linux")] let mut prepared = Some(prepared); + #[cfg(target_os = "linux")] + let supervisor_identity_mount = crate::process::supervisor_identity_mount_from_env() + .map_err(|err| { + anyhow::anyhow!("failed to prepare supervisor identity isolation: {err}") + })?; unsafe { cmd.pre_exec(move || { enter_netns_and_sandbox( netns_fd, &policy, #[cfg(target_os = "linux")] + supervisor_identity_mount, + #[cfg(target_os = "linux")] prepared.take(), ) }); } + Ok(()) } fn enter_netns_and_sandbox( netns_fd: Option, policy: &SandboxPolicy, + #[cfg(target_os = "linux")] supervisor_identity_mount: Option< + &crate::process::SupervisorIdentityMountNamespace, + >, #[cfg(target_os = "linux")] prepared: Option, ) -> std::io::Result<()> { // Enter network namespace before dropping privileges. @@ -1132,6 +1168,11 @@ mod unsafe_pty { #[cfg(not(target_os = "linux"))] let _ = netns_fd; + #[cfg(target_os = "linux")] + if let Some(mount) = supervisor_identity_mount { + mount.enter_for_child()?; + } + // Drop privileges. initgroups/setgid/setuid need /etc/group and // /etc/passwd which would be blocked if Landlock were already enforced. drop_privileges(policy).map_err(|err| std::io::Error::other(err.to_string()))?; @@ -1517,7 +1558,8 @@ mod tests { None, ) .expect("prepare should succeed in test environment"), - ); + ) + .expect("install pre_exec should succeed"); let output = cmd .spawn() diff --git a/crates/openshell-sandbox/src/token_grant.rs b/crates/openshell-sandbox/src/token_grant.rs new file mode 100644 index 000000000..e6dc7677c --- /dev/null +++ b/crates/openshell-sandbox/src/token_grant.rs @@ -0,0 +1,1037 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! `OAuth2` JWT client assertion token grant using SPIFFE JWT-SVID. +//! +//! When a provider profile includes a `token_grant` configuration, the +//! supervisor obtains `OAuth2` access tokens on-demand by authenticating to the +//! token service using the sandbox's SPIFFE JWT-SVID as the client assertion. +//! +//! ## Flow +//! +//! 1. HTTP proxy intercepts outbound request to provider endpoint +//! 2. Check token cache for unexpired access token +//! 3. On cache miss or expiry: +//! a. Fetch JWT-SVID from SPIRE agent (via Workload API) +//! b. POST to token service with JWT client assertion grant +//! c. Cache the returned access token with TTL +//! 4. Inject `Authorization: Bearer ` header +//! +//! ## Configuration +//! +//! Token grant parameters come from the provider profile `token_grant` field: +//! - `token_endpoint` — `OAuth2` token service URL +//! - `jwt_svid_audience` — SPIRE JWT-SVID audience override (optional) +//! - `audience` — Resource audience to request from the token service +//! - `scopes` — `OAuth2` scopes to request (optional) +//! - `cache_ttl_seconds` — Cache override (0 = use `expires_in` from response) +//! +//! ## Environment +//! +//! Requires `OPENSHELL_PROVIDER_SPIFFE_WORKLOAD_API_SOCKET` to be set (path to +//! the SPIFFE Workload API socket). + +use std::collections::HashMap; +use std::future::Future; +use std::sync::{Arc, LazyLock, RwLock}; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +use miette::{IntoDiagnostic, Result, WrapErr}; +use openshell_core::sandbox_env; +use serde::Deserialize; +use spiffe::WorkloadApiClient; + +/// Token cache shared across all provider token grants. +static TOKEN_CACHE: LazyLock = LazyLock::new(TokenCache::new); +const MAX_OAUTH_ERROR_FIELD_LEN: usize = 256; + +/// `OAuth2` token response from the authorization server. +#[derive(Debug, Clone, Deserialize)] +struct TokenResponse { + access_token: String, + #[serde(default)] + #[allow(dead_code)] + token_type: String, + #[serde(default)] + expires_in: i64, + #[serde(default)] + #[allow(dead_code)] + scope: String, +} + +#[derive(Debug, Deserialize)] +struct OAuthErrorResponse { + error: Option, + error_description: Option, +} + +/// Cached access token with expiration metadata. +#[derive(Debug, Clone)] +struct CachedToken { + access_token: String, + expires_at_ms: i64, +} + +/// Thread-safe token cache keyed by provider name. +struct TokenCache { + tokens: Arc>>, +} + +impl TokenCache { + fn new() -> Self { + Self { + tokens: Arc::new(RwLock::new(HashMap::new())), + } + } + + /// Get a cached token if it exists and is not expired. + fn get(&self, provider_name: &str) -> Option { + let now_ms = current_time_ms(); + let tokens = self.tokens.read().ok()?; + let cached = tokens.get(provider_name)?; + if cached.expires_at_ms > now_ms { + Some(cached.access_token.clone()) + } else { + None + } + } + + /// Store a token with expiration time. + fn set(&self, provider_name: String, access_token: String, expires_at_ms: i64) { + if let Ok(mut tokens) = self.tokens.write() { + tokens.insert( + provider_name, + CachedToken { + access_token, + expires_at_ms, + }, + ); + } + } +} + +/// Obtain an `OAuth2` access token for a provider using JWT client assertion grant. +/// +/// This function fetches the sandbox's SPIFFE JWT-SVID from the local SPIRE +/// agent, then exchanges it for an access token with a POST request to the provider's +/// token endpoint with the JWT client assertion grant flow (RFC 7523). +/// +/// Tokens are cached per provider name with TTL. Subsequent calls return the +/// cached token if it has not expired. +/// +/// # Arguments +/// +/// * `provider_name` — Unique provider identifier (used as cache key) +/// * `token_endpoint` — `OAuth2` token service URL +/// * `jwt_svid_audience` — Optional audience to request when fetching the JWT-SVID +/// * `audience` — Resource audience to request in the token request +/// * `scopes` — `OAuth2` scopes to request (may be empty) +/// * `cache_ttl_override` — Cache TTL in seconds (0 = use `expires_in` from response) +/// +/// # Errors +/// +/// Returns an error if: +/// - SPIFFE Workload API socket is not configured +/// - SPIRE agent is unreachable +/// - JWT-SVID fetch fails +/// - Token service request fails +/// - Token response is invalid +pub async fn obtain_provider_token( + provider_name: &str, + token_endpoint: &str, + jwt_svid_audience: &str, + audience: &str, + scopes: &[String], + cache_ttl_override: i64, +) -> Result { + obtain_provider_token_with_grant( + ObtainProviderTokenInput { + cache: &TOKEN_CACHE, + provider_name, + token_endpoint, + jwt_svid_audience, + audience, + scopes, + cache_ttl_override, + }, + |jwt_audience| async move { + // Fetch JWT-SVID with authorization server as audience + // For RFC 7523, the JWT assertion's aud claim identifies the issuer/realm + let jwt_svid = fetch_jwt_svid_for_token_grant(&jwt_audience).await?; + + // Perform OAuth2 JWT client assertion grant + // The audience parameter in the token request specifies the resource server + perform_token_grant(token_endpoint, &jwt_svid, audience, scopes).await + }, + ) + .await +} + +struct ObtainProviderTokenInput<'a> { + cache: &'a TokenCache, + provider_name: &'a str, + token_endpoint: &'a str, + jwt_svid_audience: &'a str, + audience: &'a str, + scopes: &'a [String], + cache_ttl_override: i64, +} + +async fn obtain_provider_token_with_grant( + input: ObtainProviderTokenInput<'_>, + grant: F, +) -> Result +where + F: FnOnce(String) -> Fut, + Fut: Future>, +{ + // Derive authorization server audience from token endpoint + // For Keycloak: http://keycloak/realms/openshell/protocol/openid-connect/token + // -> http://keycloak/realms/openshell + let jwt_audience = effective_jwt_svid_audience(input.token_endpoint, input.jwt_svid_audience); + let cache_key = token_cache_key( + input.provider_name, + input.token_endpoint, + &jwt_audience, + input.audience, + input.scopes, + ); + + // Check cache first + if let Some(cached) = input.cache.get(&cache_key) { + return Ok(cached); + } + + let token_response = grant(jwt_audience).await?; + + // Calculate expiration time + let expires_at_ms = if input.cache_ttl_override > 0 { + current_time_ms() + (input.cache_ttl_override * 1000) + } else if token_response.expires_in > 0 { + current_time_ms() + (token_response.expires_in * 1000) + } else { + // Default to 5 minutes if no expiry provided + current_time_ms() + (300 * 1000) + }; + + // Cache the token + input.cache.set( + cache_key, + token_response.access_token.clone(), + expires_at_ms, + ); + + Ok(token_response.access_token) +} + +/// Fetch JWT-SVID from SPIRE agent for token grant authentication. +/// +/// This function connects to the local SPIRE agent via the Workload API and +/// requests a JWT-SVID with the specified audience. The JWT-SVID is used as +/// the client assertion in the `OAuth2` grant request. +async fn fetch_jwt_svid_for_token_grant(audience: &str) -> Result { + let socket_path = provider_spiffe_workload_api_socket_from_env()?; + + let endpoint = + crate::spiffe_endpoint::workload_api_endpoint(std::path::Path::new(&socket_path)); + + // Connect to SPIRE agent + let client = WorkloadApiClient::connect_to(&endpoint) + .await + .into_diagnostic() + .wrap_err_with(|| { + format!("failed to connect to SPIFFE Workload API endpoint {endpoint}") + })?; + + // Fetch JWT-SVID with token service audience + // None = use the sandbox's default SPIFFE ID + client + .fetch_jwt_token([audience], None) + .await + .into_diagnostic() + .wrap_err("failed to fetch JWT-SVID for token grant") +} + +fn provider_spiffe_workload_api_socket_from_env() -> Result { + std::env::var(sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET) + .ok() + .filter(|value| !value.trim().is_empty()) + .ok_or_else(|| { + miette::miette!( + "{} not set — SPIFFE authentication unavailable for token grant", + sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET + ) + }) +} + +/// Perform `OAuth2` JWT client assertion grant. +/// +/// POSTs to the token endpoint with: +/// - `grant_type=client_credentials` +/// - `client_assertion_type=urn:ietf:params:oauth:client-assertion-type:jwt-spiffe` +/// - `client_assertion=` (client identity is in the JWT's `sub` claim) +/// - `audience=` (if provided) +/// - `scope=` (if provided) +/// +/// Note: `client_id` is NOT included - the client is identified by the `sub` claim +/// in the JWT-SVID itself. +async fn perform_token_grant( + token_endpoint: &str, + jwt_svid: &str, + audience: &str, + scopes: &[String], +) -> Result { + let mut form_params = vec![ + ("grant_type", "client_credentials"), + ( + "client_assertion_type", + "urn:ietf:params:oauth:client-assertion-type:jwt-spiffe", + ), + ("client_assertion", jwt_svid), + ]; + + // Add audience if provided + let audience_param; + if !audience.is_empty() { + audience_param = audience.to_string(); + form_params.push(("audience", &audience_param)); + } + + // Add scopes if provided + let scope_param; + if !scopes.is_empty() { + scope_param = scopes.join(" "); + form_params.push(("scope", &scope_param)); + } + + // POST to token endpoint + let client = reqwest::Client::new(); + let response = client + .post(token_endpoint) + .form(&form_params) + .send() + .await + .into_diagnostic() + .wrap_err_with(|| format!("failed to POST to token endpoint {token_endpoint}"))?; + + // Check response status + if !response.status().is_success() { + let status = response.status(); + let body = response + .text() + .await + .unwrap_or_else(|_| "".to_string()); + return Err(miette::miette!( + "{}", + token_grant_failure_message(status, &body) + )); + } + + // Parse token response + response + .json::() + .await + .into_diagnostic() + .wrap_err("failed to parse token response as JSON") +} + +/// Derive the issuer/realm URL from a token endpoint URL. +/// +/// For Keycloak token endpoints like: +/// `http://keycloak/realms/openshell/protocol/openid-connect/token` +/// Returns: +/// `http://keycloak/realms/openshell` +/// +/// This is used as the JWT-SVID audience claim when authenticating to the +/// authorization server via JWT client assertion (RFC 7523). +fn derive_issuer_from_token_endpoint(token_endpoint: &str) -> String { + // For Keycloak, strip everything after /realms/{realm-name} + if let Some(realms_idx) = token_endpoint.find("/realms/") { + // Find the next path segment after the realm name + let after_realms = &token_endpoint[realms_idx + "/realms/".len()..]; + if let Some(slash_idx) = after_realms.find('/') { + // Return everything up to (but not including) the next slash + let realm_end = realms_idx + "/realms/".len() + slash_idx; + return token_endpoint[..realm_end].to_string(); + } + } + + // Fallback: if we can't parse it, use the full token endpoint + // This works for some OAuth2 servers that accept the token endpoint as aud + token_endpoint.to_string() +} + +fn effective_jwt_svid_audience(token_endpoint: &str, jwt_svid_audience: &str) -> String { + if jwt_svid_audience.is_empty() { + derive_issuer_from_token_endpoint(token_endpoint) + } else { + jwt_svid_audience.to_string() + } +} + +fn token_cache_key( + provider_name: &str, + token_endpoint: &str, + jwt_svid_audience: &str, + audience: &str, + scopes: &[String], +) -> String { + format!( + "{}\t{}\t{}\t{}\t{}", + provider_name, + token_endpoint, + jwt_svid_audience, + audience, + scopes.join(" ") + ) +} + +fn token_grant_failure_message(status: reqwest::StatusCode, body: &str) -> String { + let Ok(error_response) = serde_json::from_str::(body) else { + return format!("token grant failed with status {status}"); + }; + + let error = error_response + .error + .as_deref() + .map(sanitize_oauth_error_field) + .filter(|value| !value.is_empty()); + let description = error_response + .error_description + .as_deref() + .map(sanitize_oauth_error_field) + .filter(|value| !value.is_empty()); + + match (error, description) { + (Some(error), Some(description)) => { + format!( + "token grant failed with status {status}: error={error}; error_description={description}" + ) + } + (Some(error), None) => { + format!("token grant failed with status {status}: error={error}") + } + (None, Some(description)) => { + format!("token grant failed with status {status}: error_description={description}") + } + (None, None) => format!("token grant failed with status {status}"), + } +} + +fn sanitize_oauth_error_field(value: &str) -> String { + value + .chars() + .map(|ch| if ch.is_control() { ' ' } else { ch }) + .take(MAX_OAUTH_ERROR_FIELD_LEN) + .collect::() + .trim() + .to_string() +} + +/// Get current Unix timestamp in milliseconds. +fn current_time_ms() -> i64 { + let millis = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or(Duration::from_secs(0)) + .as_millis(); + i64::try_from(millis).unwrap_or(i64::MAX) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + use std::sync::{ + Arc, + atomic::{AtomicUsize, Ordering}, + }; + use tokio::io::{AsyncReadExt, AsyncWriteExt}; + use tokio::net::TcpListener; + + #[derive(Debug)] + struct CapturedTokenRequest { + request_line: String, + headers: HashMap, + form: HashMap, + } + + async fn token_endpoint_once( + status: &str, + body: &str, + ) -> (String, tokio::task::JoinHandle) { + let listener = TcpListener::bind("127.0.0.1:0") + .await + .expect("bind token endpoint"); + let addr = listener.local_addr().expect("token endpoint local addr"); + let status = status.to_string(); + let body = body.to_string(); + let handle = tokio::spawn(async move { + let (mut stream, _) = listener.accept().await.expect("accept token request"); + let mut buf = Vec::new(); + let mut chunk = [0u8; 512]; + let mut expected_len = None; + + loop { + let n = stream.read(&mut chunk).await.expect("read token request"); + assert!(n > 0, "token request stream closed before headers"); + buf.extend_from_slice(&chunk[..n]); + + if expected_len.is_none() + && let Some(header_end) = header_end(&buf) + { + let headers = String::from_utf8_lossy(&buf[..header_end]); + let content_length = headers + .lines() + .find_map(|line| { + let (name, value) = line.split_once(':')?; + name.eq_ignore_ascii_case("content-length") + .then(|| value.trim().parse::().ok()) + .flatten() + }) + .unwrap_or(0); + expected_len = Some(header_end + content_length); + } + + if expected_len.is_some_and(|len| buf.len() >= len) { + break; + } + } + + let captured = parse_token_request(&buf); + let response = format!( + "HTTP/1.1 {status}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{body}", + body.len(), + ); + stream + .write_all(response.as_bytes()) + .await + .expect("write token response"); + captured + }); + + (format!("http://{addr}/token"), handle) + } + + fn header_end(buf: &[u8]) -> Option { + buf.windows(4) + .position(|w| w == b"\r\n\r\n") + .map(|idx| idx + 4) + } + + fn parse_token_request(buf: &[u8]) -> CapturedTokenRequest { + let header_end = header_end(buf).expect("request should contain header terminator"); + let headers = String::from_utf8_lossy(&buf[..header_end]); + let mut lines = headers.lines(); + let request_line = lines.next().expect("request line").to_string(); + let headers = lines + .filter_map(|line| { + let (name, value) = line.split_once(':')?; + Some((name.to_ascii_lowercase(), value.trim().to_string())) + }) + .collect(); + let body = String::from_utf8_lossy(&buf[header_end..]).to_string(); + + CapturedTokenRequest { + request_line, + headers, + form: parse_form_body(&body), + } + } + + fn parse_form_body(body: &str) -> HashMap { + body.split('&') + .filter(|part| !part.is_empty()) + .filter_map(|part| { + let (name, value) = part.split_once('=')?; + Some((decode_form_component(name), decode_form_component(value))) + }) + .collect() + } + + fn decode_form_component(value: &str) -> String { + let bytes = value.as_bytes(); + let mut decoded = Vec::with_capacity(bytes.len()); + let mut idx = 0; + while idx < bytes.len() { + match bytes[idx] { + b'+' => { + decoded.push(b' '); + idx += 1; + } + b'%' if idx + 2 < bytes.len() => { + let hex = &value[idx + 1..idx + 3]; + if let Ok(byte) = u8::from_str_radix(hex, 16) { + decoded.push(byte); + idx += 3; + } else { + decoded.push(bytes[idx]); + idx += 1; + } + } + byte => { + decoded.push(byte); + idx += 1; + } + } + } + String::from_utf8(decoded).expect("form body should be UTF-8") + } + + struct CountedTokenGrantInput<'a> { + cache: &'a TokenCache, + provider_name: &'a str, + token_endpoint: &'a str, + jwt_svid_audience: &'a str, + audience: &'a str, + scopes: &'a [String], + cache_ttl_override: i64, + expires_in: i64, + grant_calls: Arc, + } + + async fn obtain_counted_test_token(input: CountedTokenGrantInput<'_>) -> Result { + obtain_provider_token_with_grant( + ObtainProviderTokenInput { + cache: input.cache, + provider_name: input.provider_name, + token_endpoint: input.token_endpoint, + jwt_svid_audience: input.jwt_svid_audience, + audience: input.audience, + scopes: input.scopes, + cache_ttl_override: input.cache_ttl_override, + }, + move |_| { + let grant_calls = input.grant_calls.clone(); + async move { + let call = grant_calls.fetch_add(1, Ordering::SeqCst) + 1; + Ok(TokenResponse { + access_token: format!("token-{call}"), + token_type: "Bearer".to_string(), + expires_in: input.expires_in, + scope: input.scopes.join(" "), + }) + } + }, + ) + .await + } + + async fn obtain_token_without_grant_call( + cache: &TokenCache, + provider_name: &str, + token_endpoint: &str, + jwt_svid_audience: &str, + audience: &str, + scopes: &[String], + cache_ttl_override: i64, + ) -> Result { + obtain_provider_token_with_grant( + ObtainProviderTokenInput { + cache, + provider_name, + token_endpoint, + jwt_svid_audience, + audience, + scopes, + cache_ttl_override, + }, + |_| async { Err(miette::miette!("grant should not be called on cache hit")) }, + ) + .await + } + + #[test] + fn test_derive_issuer_from_keycloak_token_endpoint() { + let token_endpoint = "http://keycloak/realms/openshell/protocol/openid-connect/token"; + let issuer = derive_issuer_from_token_endpoint(token_endpoint); + assert_eq!(issuer, "http://keycloak/realms/openshell"); + } + + #[test] + fn test_derive_issuer_from_https_keycloak_endpoint() { + let token_endpoint = + "https://auth.example.com/realms/production/protocol/openid-connect/token"; + let issuer = derive_issuer_from_token_endpoint(token_endpoint); + assert_eq!(issuer, "https://auth.example.com/realms/production"); + } + + #[test] + fn test_derive_issuer_fallback_for_non_keycloak() { + let token_endpoint = "https://oauth.example.com/token"; + let issuer = derive_issuer_from_token_endpoint(token_endpoint); + // Fallback: returns the full token endpoint + assert_eq!(issuer, "https://oauth.example.com/token"); + } + + #[test] + fn effective_jwt_svid_audience_prefers_explicit_override() { + let audience = effective_jwt_svid_audience( + "http://keycloak/realms/openshell/protocol/openid-connect/token", + "spiffe://custom-audience", + ); + + assert_eq!(audience, "spiffe://custom-audience"); + } + + #[test] + fn token_cache_key_varies_by_resource_audience_and_scopes() { + let base = token_cache_key( + "alpha.default.svc.cluster.local\t80\t\tprovider:access_token", + "http://keycloak/realms/openshell/protocol/openid-connect/token", + "http://keycloak/realms/openshell", + "alpha", + &["alpha".to_string()], + ); + let different_audience = token_cache_key( + "alpha.default.svc.cluster.local\t80\t\tprovider:access_token", + "http://keycloak/realms/openshell/protocol/openid-connect/token", + "http://keycloak/realms/openshell", + "delta", + &["alpha".to_string()], + ); + let different_scopes = token_cache_key( + "alpha.default.svc.cluster.local\t80\t\tprovider:access_token", + "http://keycloak/realms/openshell/protocol/openid-connect/token", + "http://keycloak/realms/openshell", + "alpha", + &["delta".to_string()], + ); + + assert_ne!(base, different_audience); + assert_ne!(base, different_scopes); + } + + #[tokio::test] + async fn obtain_provider_token_uses_cache_for_same_key() { + let cache = TokenCache::new(); + let grant_calls = Arc::new(AtomicUsize::new(0)); + let scopes = vec!["read".to_string()]; + + let first = obtain_counted_test_token(CountedTokenGrantInput { + cache: &cache, + provider_name: "api.example.test\t443\t/v1/**\tprovider:access_token", + token_endpoint: "https://auth.example.com/token", + jwt_svid_audience: "https://auth.example.com", + audience: "api://resource", + scopes: &scopes, + cache_ttl_override: 0, + expires_in: 60, + grant_calls: grant_calls.clone(), + }) + .await + .expect("first call should grant token"); + let second = obtain_token_without_grant_call( + &cache, + "api.example.test\t443\t/v1/**\tprovider:access_token", + "https://auth.example.com/token", + "https://auth.example.com", + "api://resource", + &scopes, + 0, + ) + .await + .expect("second call should use cache"); + + assert_eq!(first, "token-1"); + assert_eq!(second, "token-1"); + assert_eq!(grant_calls.load(Ordering::SeqCst), 1); + } + + #[tokio::test] + async fn obtain_provider_token_separates_cache_by_audience_and_scopes() { + let cache = TokenCache::new(); + let grant_calls = Arc::new(AtomicUsize::new(0)); + let read_scope = vec!["read".to_string()]; + let write_scope = vec!["write".to_string()]; + + let first = obtain_counted_test_token(CountedTokenGrantInput { + cache: &cache, + provider_name: "api.example.test\t443\t/v1/**\tprovider:access_token", + token_endpoint: "https://auth.example.com/token", + jwt_svid_audience: "https://auth.example.com", + audience: "api://resource-one", + scopes: &read_scope, + cache_ttl_override: 0, + expires_in: 60, + grant_calls: grant_calls.clone(), + }) + .await + .expect("first audience should grant token"); + let different_audience = obtain_counted_test_token(CountedTokenGrantInput { + cache: &cache, + provider_name: "api.example.test\t443\t/v1/**\tprovider:access_token", + token_endpoint: "https://auth.example.com/token", + jwt_svid_audience: "https://auth.example.com", + audience: "api://resource-two", + scopes: &read_scope, + cache_ttl_override: 0, + expires_in: 60, + grant_calls: grant_calls.clone(), + }) + .await + .expect("different audience should grant token"); + let different_scope = obtain_counted_test_token(CountedTokenGrantInput { + cache: &cache, + provider_name: "api.example.test\t443\t/v1/**\tprovider:access_token", + token_endpoint: "https://auth.example.com/token", + jwt_svid_audience: "https://auth.example.com", + audience: "api://resource-one", + scopes: &write_scope, + cache_ttl_override: 0, + expires_in: 60, + grant_calls: grant_calls.clone(), + }) + .await + .expect("different scope should grant token"); + + assert_eq!(first, "token-1"); + assert_eq!(different_audience, "token-2"); + assert_eq!(different_scope, "token-3"); + assert_eq!(grant_calls.load(Ordering::SeqCst), 3); + } + + #[tokio::test] + async fn obtain_provider_token_regrants_after_expired_cache_entry() { + let cache = TokenCache::new(); + let grant_calls = Arc::new(AtomicUsize::new(0)); + let scopes = vec!["read".to_string()]; + let provider_name = "api.example.test\t443\t/v1/**\tprovider:access_token"; + let token_endpoint = "https://auth.example.com/token"; + let jwt_svid_audience = "https://auth.example.com"; + let audience = "api://resource"; + + let cache_key = token_cache_key( + provider_name, + token_endpoint, + jwt_svid_audience, + audience, + &scopes, + ); + cache.set( + cache_key, + "expired-token".to_string(), + current_time_ms() - 1, + ); + + let token = obtain_counted_test_token(CountedTokenGrantInput { + cache: &cache, + provider_name, + token_endpoint, + jwt_svid_audience, + audience, + scopes: &scopes, + cache_ttl_override: 0, + expires_in: 60, + grant_calls: grant_calls.clone(), + }) + .await + .expect("expired cache entry should grant token again"); + + assert_eq!(token, "token-1"); + assert_eq!(grant_calls.load(Ordering::SeqCst), 1); + } + + #[tokio::test] + async fn obtain_provider_token_cache_ttl_override_extends_zero_expires_in() { + let cache = TokenCache::new(); + let grant_calls = Arc::new(AtomicUsize::new(0)); + let scopes = vec!["read".to_string()]; + + let first = obtain_counted_test_token(CountedTokenGrantInput { + cache: &cache, + provider_name: "api.example.test\t443\t/v1/**\tprovider:access_token", + token_endpoint: "https://auth.example.com/token", + jwt_svid_audience: "https://auth.example.com", + audience: "api://resource", + scopes: &scopes, + cache_ttl_override: 60, + expires_in: 0, + grant_calls: grant_calls.clone(), + }) + .await + .expect("first override call should grant token"); + let second = obtain_token_without_grant_call( + &cache, + "api.example.test\t443\t/v1/**\tprovider:access_token", + "https://auth.example.com/token", + "https://auth.example.com", + "api://resource", + &scopes, + 60, + ) + .await + .expect("override should keep token cached"); + + assert_eq!(first, "token-1"); + assert_eq!(second, "token-1"); + assert_eq!(grant_calls.load(Ordering::SeqCst), 1); + } + + #[tokio::test] + async fn perform_token_grant_posts_jwt_assertion_and_parses_success_response() { + let (endpoint, request) = token_endpoint_once( + "200 OK", + r#"{"access_token":"access-123","token_type":"Bearer","expires_in":42,"scope":"read write"}"#, + ) + .await; + let scopes = vec!["read".to_string(), "write".to_string()]; + + let response = perform_token_grant(&endpoint, "jwt-svid-token", "api://resource", &scopes) + .await + .expect("token grant should succeed"); + let request = request.await.expect("token endpoint task should finish"); + + assert_eq!(response.access_token, "access-123"); + assert_eq!(response.expires_in, 42); + assert_eq!(request.request_line, "POST /token HTTP/1.1"); + assert_eq!( + request.headers.get("content-type").map(String::as_str), + Some("application/x-www-form-urlencoded") + ); + assert_eq!( + request.form.get("grant_type").map(String::as_str), + Some("client_credentials") + ); + assert_eq!( + request + .form + .get("client_assertion_type") + .map(String::as_str), + Some("urn:ietf:params:oauth:client-assertion-type:jwt-spiffe") + ); + assert_eq!( + request.form.get("client_assertion").map(String::as_str), + Some("jwt-svid-token") + ); + assert_eq!( + request.form.get("audience").map(String::as_str), + Some("api://resource") + ); + assert_eq!( + request.form.get("scope").map(String::as_str), + Some("read write") + ); + assert!( + !request.form.contains_key("client_id"), + "JWT-SVID subject should identify the client" + ); + } + + #[tokio::test] + async fn perform_token_grant_omits_empty_audience_and_scope() { + let (endpoint, request) = + token_endpoint_once("200 OK", r#"{"access_token":"access-123"}"#).await; + + let response = perform_token_grant(&endpoint, "jwt-svid-token", "", &[]) + .await + .expect("token grant should succeed without audience or scopes"); + let request = request.await.expect("token endpoint task should finish"); + + assert_eq!(response.access_token, "access-123"); + assert_eq!( + request.form.get("client_assertion").map(String::as_str), + Some("jwt-svid-token") + ); + assert!(!request.form.contains_key("audience")); + assert!(!request.form.contains_key("scope")); + } + + #[tokio::test] + async fn perform_token_grant_reports_sanitized_oauth_error_response() { + let (endpoint, request) = token_endpoint_once( + "401 Unauthorized", + r#"{"error":"invalid_client","error_description":"bad jwt assertion"}"#, + ) + .await; + + let err = perform_token_grant(&endpoint, "jwt-svid-token", "api://resource", &[]) + .await + .expect_err("token grant should fail on OAuth error"); + let request = request.await.expect("token endpoint task should finish"); + + assert_eq!( + request.form.get("audience").map(String::as_str), + Some("api://resource") + ); + assert_eq!( + err.to_string(), + "token grant failed with status 401 Unauthorized: error=invalid_client; error_description=bad jwt assertion" + ); + } + + #[tokio::test] + async fn perform_token_grant_does_not_echo_unstructured_error_body() { + let (endpoint, request) = token_endpoint_once( + "500 Internal Server Error", + "internal stack trace with implementation details", + ) + .await; + + let err = perform_token_grant(&endpoint, "jwt-svid-token", "", &[]) + .await + .expect_err("token grant should fail on server error"); + let _request = request.await.expect("token endpoint task should finish"); + let message = err.to_string(); + + assert_eq!( + message, + "token grant failed with status 500 Internal Server Error" + ); + assert!(!message.contains("stack trace")); + assert!(!message.contains("implementation details")); + } + + #[tokio::test] + async fn perform_token_grant_reports_malformed_success_json() { + let (endpoint, request) = token_endpoint_once("200 OK", r#"{"access_token":42"#).await; + + let err = perform_token_grant(&endpoint, "jwt-svid-token", "", &[]) + .await + .expect_err("malformed token response should fail"); + let _request = request.await.expect("token endpoint task should finish"); + + assert!( + err.to_string() + .contains("failed to parse token response as JSON") + ); + } + + #[test] + fn token_grant_failure_message_reports_oauth_error_fields() { + let message = token_grant_failure_message( + reqwest::StatusCode::UNAUTHORIZED, + r#"{"error":"invalid_client","error_description":"Invalid client credentials"}"#, + ); + + assert_eq!( + message, + "token grant failed with status 401 Unauthorized: error=invalid_client; error_description=Invalid client credentials" + ); + } + + #[test] + fn token_grant_failure_message_omits_unstructured_response_body() { + let message = token_grant_failure_message( + reqwest::StatusCode::INTERNAL_SERVER_ERROR, + "internal error containing implementation details", + ); + + assert_eq!( + message, + "token grant failed with status 500 Internal Server Error" + ); + } + + #[test] + fn token_grant_failure_message_sanitizes_oauth_error_fields() { + let long_description = "a".repeat(MAX_OAUTH_ERROR_FIELD_LEN + 20); + let body = + format!(r#"{{"error":"invalid_client\n","error_description":"{long_description}"}}"#); + let message = token_grant_failure_message(reqwest::StatusCode::UNAUTHORIZED, &body); + + assert!(!message.contains('\n')); + assert!(message.contains("error=invalid_client")); + assert!(message.contains(&"a".repeat(MAX_OAUTH_ERROR_FIELD_LEN))); + assert!(!message.contains(&"a".repeat(MAX_OAUTH_ERROR_FIELD_LEN + 1))); + } +} diff --git a/crates/openshell-server/Cargo.toml b/crates/openshell-server/Cargo.toml index 0b7e3a97e..fe765bcb2 100644 --- a/crates/openshell-server/Cargo.toml +++ b/crates/openshell-server/Cargo.toml @@ -35,7 +35,7 @@ k8s-openapi = { workspace = true } tokio = { workspace = true } # gRPC -tonic = { workspace = true, features = ["channel", "tls"] } +tonic = { workspace = true, features = ["channel", "tls-native-roots"] } prost = { workspace = true } prost-types = { workspace = true } diff --git a/crates/openshell-server/src/auth/principal.rs b/crates/openshell-server/src/auth/principal.rs index a95eb831b..1d4cb7276 100644 --- a/crates/openshell-server/src/auth/principal.rs +++ b/crates/openshell-server/src/auth/principal.rs @@ -6,7 +6,7 @@ //! A `Principal` is the result of running the [`super::authenticator::Authenticator`] //! chain on an inbound request. It generalizes over the kinds of callers the //! gateway recognizes — human users (OIDC), sandbox supervisors (gateway-minted -//! JWT, future SPIFFE), and anonymous callers (truly unauthenticated methods +//! JWT), and anonymous callers (truly unauthenticated methods //! like health probes). //! //! Handlers read the principal from the gRPC `Request` extensions and gate @@ -52,8 +52,8 @@ pub struct SandboxPrincipal { /// How this principal was verified — used for audit logs and method-specific /// authorization checks. pub source: SandboxIdentitySource, - /// SPIFFE trust domain. Populated when the credential is SPIFFE-shaped; - /// reserved for future per-sandbox cert / SPIRE authenticators. + /// Optional namespace component parsed from sandbox identity credentials. + /// Gateway-minted sandbox JWTs currently use an identity-shaped subject. pub trust_domain: Option, } @@ -70,8 +70,6 @@ pub enum SandboxIdentitySource { /// Per-sandbox client certificate. Reserved for channel-bound sandbox /// identity. BootstrapCert { fingerprint: String }, - /// SPIRE-issued SVID. Reserved for SPIFFE/SPIRE sandbox identity. - SpiffeSvid { spiffe_id: String }, /// K8s `ServiceAccount` token used to bootstrap a gateway-minted JWT /// via `IssueSandboxToken`. Populated only on that one RPC path. K8sServiceAccount { pod_name: String, pod_uid: String }, diff --git a/crates/openshell-server/src/grpc/policy.rs b/crates/openshell-server/src/grpc/policy.rs index 380671f10..4f0aea12b 100644 --- a/crates/openshell-server/src/grpc/policy.rs +++ b/crates/openshell-server/src/grpc/policy.rs @@ -1256,6 +1256,7 @@ pub(super) async fn compute_provider_env_revision( Status::internal(format!("decode provider '{provider_name}' failed: {e}")) })?; hasher.update(provider.r#type.as_bytes()); + hash_provider_profile_revision(store, &provider.r#type, &mut hasher).await?; let mut credential_keys: Vec<_> = provider.credentials.keys().collect(); credential_keys.sort(); @@ -1281,6 +1282,41 @@ pub(super) async fn compute_provider_env_revision( )?)) } +async fn hash_provider_profile_revision( + store: &Store, + provider_type: &str, + hasher: &mut Sha256, +) -> Result<(), Status> { + if get_default_profile(provider_type).is_some() { + hasher.update(b"builtin-profile"); + hasher.update(provider_type.as_bytes()); + return Ok(()); + } + + hasher.update(b"custom-profile"); + match store + .get_by_name( + openshell_core::proto::StoredProviderProfile::object_type(), + provider_type, + ) + .await + .map_err(|e| { + Status::internal(format!( + "fetch provider profile '{provider_type}' failed: {e}" + )) + })? { + Some(record) => { + hasher.update(record.id.as_bytes()); + hasher.update(record.updated_at_ms.to_le_bytes()); + hasher.update(record.payload.as_slice()); + } + None => { + hasher.update(b"missing"); + } + } + Ok(()) +} + async fn profile_provider_policy_layers( store: &Store, provider_names: &[String], @@ -1389,6 +1425,7 @@ pub(super) async fn handle_get_sandbox_provider_environment( environment: provider_environment.environment, provider_env_revision, credential_expires_at_ms: provider_environment.credential_expires_at_ms, + dynamic_credentials: provider_environment.dynamic_credentials, })) } @@ -4883,6 +4920,88 @@ mod tests { ); } + #[tokio::test] + async fn provider_env_revision_changes_when_custom_profile_token_grant_changes() { + use openshell_core::proto::{ + ProviderCredentialTokenGrant, ProviderProfile, ProviderProfileCategory, + ProviderProfileCredential, StoredProviderProfile, + }; + use std::time::Duration; + + fn token_grant_profile(token_endpoint: &str) -> StoredProviderProfile { + StoredProviderProfile { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "profile-custom-token".to_string(), + name: "custom-token".to_string(), + created_at_ms: 1_000_000, + labels: HashMap::new(), + resource_version: 0, + }), + profile: Some(ProviderProfile { + id: "custom-token".to_string(), + display_name: "Custom Token".to_string(), + description: String::new(), + category: ProviderProfileCategory::Other as i32, + credentials: vec![ProviderProfileCredential { + name: "access_token".to_string(), + auth_style: "bearer".to_string(), + header_name: "authorization".to_string(), + token_grant: Some(ProviderCredentialTokenGrant { + token_endpoint: token_endpoint.to_string(), + audience: "api://default".to_string(), + ..Default::default() + }), + ..Default::default() + }], + endpoints: vec![NetworkEndpoint { + host: "api.custom.example".to_string(), + port: 443, + ..Default::default() + }], + binaries: Vec::new(), + inference_capable: false, + discovery: None, + }), + } + } + + let state = test_server_state().await; + state + .store + .put_message(&test_provider("work-custom-token", "custom-token")) + .await + .unwrap(); + state + .store + .put_message(&token_grant_profile("https://auth.example.com/token")) + .await + .unwrap(); + + let first = + compute_provider_env_revision(state.store.as_ref(), &["work-custom-token".to_string()]) + .await + .unwrap(); + + tokio::time::sleep(Duration::from_millis(2)).await; + state + .store + .put_message(&token_grant_profile( + "https://auth.example.com/rotated-token", + )) + .await + .unwrap(); + + let second = + compute_provider_env_revision(state.store.as_ref(), &["work-custom-token".to_string()]) + .await + .unwrap(); + + assert_ne!( + first, second, + "custom provider profile updates must trigger sandbox dynamic credential refresh" + ); + } + #[tokio::test] async fn sandbox_config_and_provider_env_follow_attached_provider_lifecycle() { use crate::grpc::sandbox::{ diff --git a/crates/openshell-server/src/grpc/provider.rs b/crates/openshell-server/src/grpc/provider.rs index 8743564bf..df46cc72b 100644 --- a/crates/openshell-server/src/grpc/provider.rs +++ b/crates/openshell-server/src/grpc/provider.rs @@ -8,7 +8,10 @@ use crate::persistence::{ ObjectId, ObjectLabels, ObjectName, ObjectType, Store, WriteCondition, generate_name, }; -use openshell_core::proto::{Provider, Sandbox}; +use openshell_core::proto::{ + Provider, ProviderCredentialTokenGrantAudienceOverride, ProviderProfile, + ProviderProfileCredential, Sandbox, +}; use openshell_core::telemetry::{ LifecycleOperation, ProviderProfile as TelemetryProviderProfile, TelemetryOutcome, }; @@ -36,10 +39,11 @@ fn redact_provider_credentials(mut provider: Provider) -> Provider { provider } -#[derive(Debug, Clone, Default, PartialEq, Eq)] +#[derive(Debug, Clone, Default, PartialEq)] pub(super) struct ProviderEnvironment { pub environment: std::collections::HashMap, pub credential_expires_at_ms: std::collections::HashMap, + pub dynamic_credentials: std::collections::HashMap, } impl ProviderEnvironment { @@ -535,9 +539,220 @@ pub(super) async fn resolve_provider_environment( Ok(ProviderEnvironment { environment: env, credential_expires_at_ms: expires, + dynamic_credentials: resolve_dynamic_credentials(store, provider_names).await?, }) } +/// Resolve dynamic credentials (token grants) from provider profiles. +/// +/// Returns a map of endpoint-bound keys to credential metadata for credentials +/// that have `token_grant` configuration. Keys are internal supervisor metadata: +/// host, port, endpoint path, and provider credential identity. +pub(super) async fn resolve_dynamic_credentials( + store: &Store, + provider_names: &[String], +) -> Result, Status> { + if provider_names.is_empty() { + return Ok(std::collections::HashMap::new()); + } + + let mut dynamic_creds = std::collections::HashMap::new(); + + for provider_name in provider_names { + let provider = store + .get_message_by_name::(provider_name) + .await + .map_err(|e| { + Status::internal(format!("failed to fetch provider '{provider_name}': {e}")) + })? + .ok_or_else(|| { + Status::failed_precondition(format!("provider '{provider_name}' not found")) + })?; + + let profile_id = + normalize_provider_type(&provider.r#type).unwrap_or(provider.r#type.as_str()); + let Some(profile) = get_provider_type_profile(store, profile_id).await? else { + continue; + }; + + insert_dynamic_credentials_for_profile( + &mut dynamic_creds, + &profile.to_proto(), + provider_name, + ); + } + + Ok(dynamic_creds) +} + +fn insert_dynamic_credentials_for_profile( + dynamic_creds: &mut std::collections::HashMap, + profile: &ProviderProfile, + provider_name: &str, +) { + for credential in &profile.credentials { + if credential.token_grant.is_none() { + continue; + } + for endpoint in &profile.endpoints { + for port in endpoint_ports(endpoint.port, &endpoint.ports) { + insert_dynamic_credentials_for_endpoint( + dynamic_creds, + &endpoint.host, + port, + &endpoint.path, + provider_name, + &credential.name, + credential, + ); + } + } + } +} + +fn endpoint_ports(port: u32, ports: &[u32]) -> Vec { + if ports.is_empty() { + if port == 0 { Vec::new() } else { vec![port] } + } else { + ports.iter().copied().filter(|port| *port != 0).collect() + } +} + +fn dynamic_credential_key( + host: &str, + port: u32, + path: &str, + provider_name: &str, + credential_name: &str, +) -> String { + format!( + "{}\t{port}\t{}\t{}:{}", + host.to_ascii_lowercase(), + path, + provider_name, + credential_name + ) +} + +fn insert_dynamic_credentials_for_endpoint( + dynamic_creds: &mut std::collections::HashMap, + endpoint_host: &str, + endpoint_port: u32, + endpoint_path: &str, + provider_name: &str, + credential_name: &str, + credential: &ProviderProfileCredential, +) { + let default_key = dynamic_credential_key( + endpoint_host, + endpoint_port, + endpoint_path, + provider_name, + credential_name, + ); + dynamic_creds.insert(default_key, resolved_dynamic_credential(credential, None)); + + let Some(token_grant) = credential.token_grant.as_ref() else { + return; + }; + + for override_config in &token_grant.audience_overrides { + if !token_grant_override_matches_endpoint(override_config, endpoint_host, endpoint_port) { + continue; + } + + let override_host = if override_config.host.is_empty() { + endpoint_host + } else { + override_config.host.as_str() + }; + let override_port = if override_config.port == 0 { + endpoint_port + } else { + override_config.port + }; + let override_path = if override_config.path.is_empty() { + endpoint_path + } else { + override_config.path.as_str() + }; + let override_key = dynamic_credential_key( + override_host, + override_port, + override_path, + provider_name, + credential_name, + ); + dynamic_creds.insert( + override_key, + resolved_dynamic_credential(credential, Some(override_config)), + ); + } +} + +fn resolved_dynamic_credential( + credential: &ProviderProfileCredential, + override_config: Option<&ProviderCredentialTokenGrantAudienceOverride>, +) -> ProviderProfileCredential { + let mut credential = credential.clone(); + if let Some(token_grant) = credential.token_grant.as_mut() { + if let Some(override_config) = override_config { + if !override_config.audience.is_empty() { + token_grant.audience.clone_from(&override_config.audience); + } + if !override_config.scopes.is_empty() { + token_grant.scopes.clone_from(&override_config.scopes); + } + } + token_grant.audience_overrides.clear(); + } + credential +} + +fn token_grant_override_matches_endpoint( + override_config: &ProviderCredentialTokenGrantAudienceOverride, + endpoint_host: &str, + endpoint_port: u32, +) -> bool { + let host_matches = override_config.host.is_empty() + || host_pattern_matches(&override_config.host, endpoint_host) + || host_pattern_matches(endpoint_host, &override_config.host); + let port_matches = override_config.port == 0 || override_config.port == endpoint_port; + host_matches && port_matches +} + +fn host_pattern_matches(pattern: &str, host: &str) -> bool { + let pattern = pattern.to_ascii_lowercase(); + let host = host.to_ascii_lowercase(); + if pattern == host { + return true; + } + if !pattern.contains('*') { + return false; + } + + let pattern_labels: Vec<&str> = pattern.split('.').collect(); + let host_labels: Vec<&str> = host.split('.').collect(); + host_pattern_labels_match(&pattern_labels, &host_labels) +} + +fn host_pattern_labels_match(pattern: &[&str], host: &[&str]) -> bool { + match pattern.split_first() { + None => host.is_empty(), + Some((label, rest)) if *label == "**" => { + host_pattern_labels_match(rest, host) + || (!host.is_empty() && host_pattern_labels_match(pattern, &host[1..])) + } + Some((label, rest)) if *label == "*" => { + !host.is_empty() && host_pattern_labels_match(rest, &host[1..]) + } + Some((literal, rest)) => { + host.first().is_some_and(|label| label == literal) + && host_pattern_labels_match(rest, &host[1..]) + } + } +} + pub async fn validate_provider_environment_keys_unique( store: &Store, provider_names: &[String], @@ -702,10 +917,9 @@ use openshell_core::proto::{ GetProviderRequest, ImportProviderProfilesRequest, ImportProviderProfilesResponse, LintProviderProfilesRequest, LintProviderProfilesResponse, ListProviderProfilesRequest, ListProviderProfilesResponse, ListProvidersRequest, ListProvidersResponse, - ProviderCredentialRefreshStrategy, ProviderProfile, ProviderProfileDiagnostic, - ProviderProfileImportItem, ProviderProfileResponse, ProviderResponse, - RotateProviderCredentialRequest, RotateProviderCredentialResponse, StoredProviderProfile, - UpdateProviderRequest, + ProviderCredentialRefreshStrategy, ProviderProfileDiagnostic, ProviderProfileImportItem, + ProviderProfileResponse, ProviderResponse, RotateProviderCredentialRequest, + RotateProviderCredentialResponse, StoredProviderProfile, UpdateProviderRequest, }; use openshell_providers::{ CredentialRefreshProfile, ProfileValidationDiagnostic, ProviderTypeProfile, default_profiles, @@ -1614,6 +1828,7 @@ mod tests { DeleteProviderProfileRequest, GetProviderProfileRequest, ImportProviderProfilesRequest, L7Allow, L7Rule, LintProviderProfilesRequest, ListProviderProfilesRequest, NetworkBinary, NetworkEndpoint, ProviderCredentialRefresh, ProviderCredentialRefreshMaterial, + ProviderCredentialTokenGrant, ProviderCredentialTokenGrantAudienceOverride, ProviderProfile, ProviderProfileCategory, ProviderProfileCredential, ProviderProfileImportItem, Sandbox, SandboxSpec, }; @@ -1678,6 +1893,76 @@ mod tests { ); } + #[test] + fn dynamic_credentials_expand_endpoint_audience_overrides() { + let service_audiences = [ + ("alpha.default.svc.cluster.local", "alpha"), + ("beta.default.svc.cluster.local", "beta"), + ("gamma.default.svc.cluster.local", "gamma"), + ("delta.default.svc.cluster.local", "delta"), + ]; + let credential = ProviderProfileCredential { + name: "access_token".to_string(), + description: String::new(), + env_vars: Vec::new(), + required: false, + auth_style: "bearer".to_string(), + header_name: "Authorization".to_string(), + query_param: String::new(), + refresh: None, + token_grant: Some(ProviderCredentialTokenGrant { + token_endpoint: "http://keycloak/realms/openshell/protocol/openid-connect/token" + .to_string(), + audience: "api://default".to_string(), + jwt_svid_audience: "http://keycloak/realms/openshell".to_string(), + scopes: vec!["openid".to_string()], + cache_ttl_seconds: 300, + audience_overrides: service_audiences + .iter() + .map( + |(host, audience)| ProviderCredentialTokenGrantAudienceOverride { + host: (*host).to_string(), + port: 80, + path: String::new(), + audience: (*audience).to_string(), + scopes: vec![(*audience).to_string()], + }, + ) + .collect(), + }), + }; + let profile = ProviderProfile { + id: "keycloak-sso".to_string(), + display_name: "Keycloak SSO".to_string(), + description: String::new(), + category: ProviderProfileCategory::Other as i32, + credentials: vec![credential], + endpoints: service_audiences + .iter() + .map(|(host, _)| NetworkEndpoint { + host: (*host).to_string(), + port: 80, + ..Default::default() + }) + .collect(), + binaries: Vec::new(), + inference_capable: false, + discovery: None, + }; + + let mut dynamic_creds = HashMap::new(); + insert_dynamic_credentials_for_profile(&mut dynamic_creds, &profile, "keycloak"); + + assert_eq!(dynamic_creds.len(), 4); + for (host, audience) in service_audiences { + let key = dynamic_credential_key(host, 80, "", "keycloak", "access_token"); + let grant = dynamic_creds[&key].token_grant.as_ref().unwrap(); + assert_eq!(grant.audience, audience); + assert_eq!(grant.scopes, vec![audience.to_string()]); + assert!(grant.audience_overrides.is_empty()); + } + } + fn provider_with_values(name: &str, provider_type: &str) -> Provider { Provider { metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { @@ -1758,6 +2043,7 @@ mod tests { }, ], }), + token_grant: None, } } @@ -1794,6 +2080,7 @@ mod tests { header_name: "authorization".to_string(), query_param: String::new(), refresh: None, + token_grant: None, } } @@ -3224,6 +3511,7 @@ mod tests { }, ], }), + token_grant: None, }], endpoints: vec![], binaries: vec![], @@ -3650,6 +3938,24 @@ mod tests { assert!(!result.contains_key("endpoint")); } + #[tokio::test] + async fn resolve_provider_env_allows_static_provider_without_profile() { + let store = test_store().await; + create_provider_record( + &store, + provider_with_values("static-provider", "unprofiled-static-api"), + ) + .await + .unwrap(); + + let result = resolve_provider_environment(&store, &["static-provider".to_string()]) + .await + .unwrap(); + + assert_eq!(result.get("API_TOKEN"), Some(&"token-123".to_string())); + assert!(result.dynamic_credentials.is_empty()); + } + #[tokio::test] async fn resolve_provider_env_skips_expired_credentials_and_returns_expiry_metadata() { let store = test_store().await; diff --git a/crates/openshell-server/src/multiplex.rs b/crates/openshell-server/src/multiplex.rs index e94326f98..2abde7bc4 100644 --- a/crates/openshell-server/src/multiplex.rs +++ b/crates/openshell-server/src/multiplex.rs @@ -283,9 +283,9 @@ where /// for local single-user gateways, or to an unsafe local developer user when /// `auth.allow_unauthenticated_users` is explicitly enabled. /// -/// When neither OIDC nor gateway-minted JWTs are configured (a barebones +/// When neither OIDC nor sandbox credentials are configured (a barebones /// dev gateway), the chain is left as `None` so the router short-circuits -/// to pass-through. +/// to pass-through unless mTLS or local unauthenticated users are enabled. fn build_authenticator_chain(state: &ServerState) -> Option { let mut authenticators: Vec> = Vec::new(); if let Some(k8s) = state.k8s_sa_authenticator.clone() { @@ -368,19 +368,13 @@ fn unauthenticated_dev_user_principal() -> Principal { }) } -fn status_response(status: tonic::Status) -> Response { - let response = status.into_http(); - let (parts, body) = response.into_parts(); - let body = tonic::body::BoxBody::new(body); - Response::from_parts(parts, body) +fn status_response(status: tonic::Status) -> Response { + status.into_http() } impl tower::Service> for AuthGrpcRouter where - S: tower::Service, Response = Response> - + Clone - + Send - + 'static, + S: tower::Service, Response = Response> + Clone + Send + 'static, S::Future: Send, S::Error: Send + Into>, B: Send + 'static, @@ -951,7 +945,7 @@ mod tests { } impl Service> for PrincipalRecorder { - type Response = Response; + type Response = Response; type Error = std::convert::Infallible; type Future = Pin> + Send>>; @@ -962,14 +956,7 @@ mod tests { fn call(&mut self, req: Request) -> Self::Future { let principal = req.extensions().get::().cloned(); *self.recorded.lock().unwrap() = principal; - Box::pin(async move { - let body = tonic::body::BoxBody::new( - Full::new(Bytes::new()) - .map_err(|never| match never {}) - .boxed_unsync(), - ); - Ok(Response::new(body)) - }) + Box::pin(async move { Ok(Response::new(tonic::body::Body::empty())) }) } } diff --git a/crates/openshell-tui/Cargo.toml b/crates/openshell-tui/Cargo.toml index b0ac0c7ca..723528cd7 100644 --- a/crates/openshell-tui/Cargo.toml +++ b/crates/openshell-tui/Cargo.toml @@ -21,7 +21,7 @@ ratatui = { workspace = true } crossterm = { workspace = true } terminal-colorsaurus = { workspace = true } tokio = { workspace = true } -tonic = { workspace = true, features = ["tls"] } +tonic = { workspace = true, features = ["tls-native-roots"] } miette = { workspace = true } owo-colors = { workspace = true } serde = { workspace = true } diff --git a/deploy/helm/openshell/README.md b/deploy/helm/openshell/README.md index ab5b6eb45..dae410e78 100644 --- a/deploy/helm/openshell/README.md +++ b/deploy/helm/openshell/README.md @@ -57,6 +57,8 @@ See [`values.yaml`](values.yaml) for source defaults. Selected overlays: - [`ci/values-cert-manager.yaml`](ci/values-cert-manager.yaml) - cert-manager integration - [`ci/values-keycloak.yaml`](ci/values-keycloak.yaml) - Keycloak OIDC integration - [`ci/values-high-availability.yaml`](ci/values-high-availability.yaml) - HA gateway test overlay with bundled PostgreSQL +- [`ci/values-spire.yaml`](ci/values-spire.yaml) - SPIFFE/SPIRE provider token grants +- [`ci/values-spire-stack.yaml`](ci/values-spire-stack.yaml) - SPIRE hardened chart values for local development ### Database backend @@ -123,6 +125,17 @@ sandbox JWT signing Secret. This precedence applies even if external non-cert-manager TLS source manages TLS and you pre-create the sandbox JWT signing Secret. +## SPIFFE/SPIRE provider token grants + +Set `server.providerTokenGrants.spiffe.enabled=true` to let sandbox supervisors +use SPIFFE JWT-SVIDs for dynamic provider token grants. The chart keeps +supervisor-to-gateway authentication on gateway-minted sandbox JWTs and passes +the SPIFFE Workload API socket path to the Kubernetes driver so sandbox pods can +mount the SPIFFE CSI socket. + +For local development, uncomment the SPIRE Helm releases in `skaffold.yaml` and +add `ci/values-spire.yaml` to the OpenShell release values files. + ## Values | Key | Type | Default | Description | @@ -204,6 +217,8 @@ JWT signing Secret. | server.oidc.rolesClaim | string | `""` | Dot-separated path to the roles array in the JWT claims. Keycloak: "realm_access.roles", Entra ID: "roles", Okta: "groups". | | server.oidc.scopesClaim | string | `""` | Dot-separated path to the scopes array in the JWT claims. | | server.oidc.userRole | string | `""` | Role name for standard user access. | +| server.providerTokenGrants.spiffe.enabled | bool | `false` | Mount the SPIFFE Workload API socket into sandbox pods for dynamic provider token grants. | +| server.providerTokenGrants.spiffe.workloadApiSocketPath | string | `"/spiffe-workload-api/spire-agent.sock"` | Path to the SPIFFE Workload API socket mounted into sandbox pods. | | server.sandboxImage | string | `"ghcr.io/nvidia/openshell-community/sandboxes/base:latest"` | Default sandbox image used when requests do not specify one. | | server.sandboxImagePullPolicy | string | `""` | Kubernetes imagePullPolicy for sandbox pods. Empty = Kubernetes default (Always for :latest, IfNotPresent otherwise). Set to "Always" for dev clusters so new images are picked up without manual eviction. | | server.sandboxImagePullSecrets | list | `[]` | Image pull secrets attached to sandbox pods. Referenced Secrets must exist in the sandbox namespace. | diff --git a/deploy/helm/openshell/README.md.gotmpl b/deploy/helm/openshell/README.md.gotmpl index 5fc4019e8..b8492dab5 100644 --- a/deploy/helm/openshell/README.md.gotmpl +++ b/deploy/helm/openshell/README.md.gotmpl @@ -57,6 +57,8 @@ See [`values.yaml`](values.yaml) for source defaults. Selected overlays: - [`ci/values-cert-manager.yaml`](ci/values-cert-manager.yaml) - cert-manager integration - [`ci/values-keycloak.yaml`](ci/values-keycloak.yaml) - Keycloak OIDC integration - [`ci/values-high-availability.yaml`](ci/values-high-availability.yaml) - HA gateway test overlay with bundled PostgreSQL +- [`ci/values-spire.yaml`](ci/values-spire.yaml) - SPIFFE/SPIRE provider token grants +- [`ci/values-spire-stack.yaml`](ci/values-spire-stack.yaml) - SPIRE hardened chart values for local development ### Database backend @@ -123,5 +125,16 @@ sandbox JWT signing Secret. This precedence applies even if external non-cert-manager TLS source manages TLS and you pre-create the sandbox JWT signing Secret. +## SPIFFE/SPIRE provider token grants + +Set `server.providerTokenGrants.spiffe.enabled=true` to let sandbox supervisors +use SPIFFE JWT-SVIDs for dynamic provider token grants. The chart keeps +supervisor-to-gateway authentication on gateway-minted sandbox JWTs and passes +the SPIFFE Workload API socket path to the Kubernetes driver so sandbox pods can +mount the SPIFFE CSI socket. + +For local development, uncomment the SPIRE Helm releases in `skaffold.yaml` and +add `ci/values-spire.yaml` to the OpenShell release values files. + {{ template "chart.valuesSection" . }} {{ template "helm-docs.versionFooter" . }} diff --git a/deploy/helm/openshell/ci/values-spire-stack.yaml b/deploy/helm/openshell/ci/values-spire-stack.yaml new file mode 100644 index 000000000..b55f7cfc5 --- /dev/null +++ b/deploy/helm/openshell/ci/values-spire-stack.yaml @@ -0,0 +1,24 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# SPIRE hardened chart values for the local Helm dev environment. +global: + spire: + clusterName: openshell-dev + jwtIssuer: https://spire-spiffe-oidc-discovery-provider.spire.svc.cluster.local + trustDomain: openshell.local + +spire-server: + defaultJwtSvidTTL: 5m + controllerManager: + identities: + clusterSPIFFEIDs: + openshell-sandboxes: + enabled: true + spiffeIDTemplate: 'spiffe://{{ .TrustDomain }}/openshell/sandbox/{{ index .PodMeta.Annotations "openshell.io/sandbox-id" }}' + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: openshell + podSelector: + matchLabels: + openshell.ai/managed-by: openshell diff --git a/deploy/helm/openshell/ci/values-spire.yaml b/deploy/helm/openshell/ci/values-spire.yaml new file mode 100644 index 000000000..201520e81 --- /dev/null +++ b/deploy/helm/openshell/ci/values-spire.yaml @@ -0,0 +1,9 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# OpenShell overlay for local SPIRE-backed provider token grants. +server: + providerTokenGrants: + spiffe: + enabled: true + workloadApiSocketPath: /spiffe-workload-api/spire-agent.sock diff --git a/deploy/helm/openshell/templates/gateway-config.yaml b/deploy/helm/openshell/templates/gateway-config.yaml index f46547c3f..da75e5949 100644 --- a/deploy/helm/openshell/templates/gateway-config.yaml +++ b/deploy/helm/openshell/templates/gateway-config.yaml @@ -103,6 +103,9 @@ data: service_account_name = {{ include "openshell.sandboxServiceAccountName" . | quote }} supervisor_sideload_method = {{ include "openshell.supervisorSideloadMethod" . | quote }} sa_token_ttl_secs = {{ .Values.server.sandboxJwt.k8sSaTokenTtlSecs | default 3600 }} + {{- if .Values.server.providerTokenGrants.spiffe.enabled }} + provider_spiffe_workload_api_socket_path = {{ .Values.server.providerTokenGrants.spiffe.workloadApiSocketPath | quote }} + {{- end }} {{- if .Values.server.sandboxImagePullPolicy }} image_pull_policy = {{ .Values.server.sandboxImagePullPolicy | quote }} {{- end }} diff --git a/deploy/helm/openshell/tests/gateway_config_test.yaml b/deploy/helm/openshell/tests/gateway_config_test.yaml index 6b14fe12a..7b7be28da 100644 --- a/deploy/helm/openshell/tests/gateway_config_test.yaml +++ b/deploy/helm/openshell/tests/gateway_config_test.yaml @@ -290,3 +290,26 @@ tests: secretKeyRef: name: my-external-secret key: uri + - it: renders provider SPIFFE token grants while keeping gateway JWT auth + set: + server.providerTokenGrants.spiffe.enabled: true + template: templates/gateway-config.yaml + asserts: + - matchRegex: + path: data["gateway.toml"] + pattern: '\[openshell\.gateway\.gateway_jwt\]' + - matchRegex: + path: data["gateway.toml"] + pattern: 'provider_spiffe_workload_api_socket_path\s*=\s*"/spiffe-workload-api/spire-agent\.sock"' + - notMatchRegex: + path: data["gateway.toml"] + pattern: '\[openshell\.gateway\.spiffe\]' + + - it: keeps the gateway sandbox JWT secret mounted when provider SPIFFE grants are enabled + set: + server.providerTokenGrants.spiffe.enabled: true + template: templates/statefulset.yaml + asserts: + - matchRegex: + path: spec.template.spec.volumes[1].name + pattern: '^sandbox-jwt$' diff --git a/deploy/helm/openshell/values.yaml b/deploy/helm/openshell/values.yaml index f0cd43c73..8f0b8ed6e 100644 --- a/deploy/helm/openshell/values.yaml +++ b/deploy/helm/openshell/values.yaml @@ -232,6 +232,16 @@ server: # (owner-read only). Override to 0440 or 0444 if the container UID # does not match the volume file owner. secretDefaultMode: "" + # Dynamic provider token grants. When SPIFFE is enabled here, sandbox + # supervisors mount the SPIFFE Workload API socket so provider profiles can + # exchange JWT-SVIDs for upstream access tokens. Supervisor-to-gateway + # authentication still uses gateway-minted sandbox JWTs. + providerTokenGrants: + spiffe: + # -- Mount the SPIFFE Workload API socket into sandbox pods for dynamic provider token grants. + enabled: false + # -- Path to the SPIFFE Workload API socket mounted into sandbox pods. + workloadApiSocketPath: /spiffe-workload-api/spire-agent.sock # OIDC (OpenID Connect) configuration for JWT-based authentication. # When issuer is set, the server validates Bearer tokens on gRPC requests. oidc: diff --git a/docs/kubernetes/access-control.mdx b/docs/kubernetes/access-control.mdx index 5c333bb53..8824b6de1 100644 --- a/docs/kubernetes/access-control.mdx +++ b/docs/kubernetes/access-control.mdx @@ -19,6 +19,14 @@ The Helm chart always generates mTLS certificates at install time. The gateway u For how the CLI resolves gateways and stores credentials, refer to [Gateway Authentication](/reference/gateway-auth). +## Sandbox Supervisor Identity + +Kubernetes sandbox supervisors authenticate back to the gateway as sandbox workloads. By default, the gateway mints its own sandbox JWTs and Kubernetes sandboxes bootstrap them with a projected ServiceAccount token. + +Dynamic provider token grants can use SPIFFE without changing supervisor-to-gateway authentication. Set `server.providerTokenGrants.spiffe.enabled=true` to mount the SPIFFE CSI Workload API socket into sandbox pods while keeping the projected ServiceAccount token bootstrap and gateway-minted sandbox JWT path. + +Provider token grants require a SPIFFE implementation such as SPIRE and a `ClusterSPIFFEID` that assigns per-sandbox IDs from the pod's `openshell.io/sandbox-id` annotation. Provider profiles with `token_grant` metadata cause the sandbox supervisor to request JWT-SVIDs and exchange them for upstream OAuth2 access tokens. + ## OIDC User Authentication Set `server.oidc.issuer` to enable OIDC. The gateway validates the `Authorization: Bearer ` header on every request against the issuer's JWKS endpoint. diff --git a/docs/reference/gateway-config.mdx b/docs/reference/gateway-config.mdx index c70d8acbd..a0ceace2e 100644 --- a/docs/reference/gateway-config.mdx +++ b/docs/reference/gateway-config.mdx @@ -184,6 +184,9 @@ workspace_default_storage_size = "10Gi" # default_runtime_class_name = "kata-containers" # Kubelet clamps projected tokens below 600 seconds. The driver caps values at 86400. sa_token_ttl_secs = 3600 +# Optional SPIFFE Workload API socket mounted into sandbox pods for dynamic +# provider token grants. Supervisor-to-gateway auth still uses gateway JWTs. +provider_spiffe_workload_api_socket_path = "/spiffe-workload-api/spire-agent.sock" ``` ### Docker diff --git a/docs/sandboxes/providers-v2.mdx b/docs/sandboxes/providers-v2.mdx index 3a1e0bde7..214ee10b7 100644 --- a/docs/sandboxes/providers-v2.mdx +++ b/docs/sandboxes/providers-v2.mdx @@ -44,7 +44,7 @@ openshell settings delete --global --key providers_v2_enabled ``` -The feature flag controls provider-derived policy layers. It does not change the current credential injection model. OpenShell still injects placeholder environment variables into sandbox processes and resolves those placeholders in outbound HTTP traffic. +The feature flag controls provider-derived policy layers. OpenShell still supports placeholder environment variables for provider credentials, and provider profiles can also declare dynamic token grants that the sandbox proxy resolves on demand for matching HTTP endpoints. ## Available Features @@ -60,6 +60,7 @@ Providers v2 currently includes these user-facing features: - Runtime sandbox provider lifecycle commands under `openshell sandbox provider list|attach|detach`. - Credential refresh configuration with `openshell provider refresh status|configure|rotate|delete`. - Credential expiry metadata with `openshell provider update --credential-expires-at`; values accept Unix epoch milliseconds or ISO/RFC3339 timestamps. +- Dynamic token grants that use the sandbox's SPIFFE JWT-SVID as an OAuth2 client assertion and inject a short-lived `Authorization: Bearer` token for matching profile endpoints. ## Roadmap @@ -67,8 +68,8 @@ The following Providers v2 design items are not part of the current behavior: | Roadmap item | Current behavior | |---|---| -| Profile-driven explicit credential injection | Profile `auth_style`, `header_name`, and `query_param` fields are stored and validated, but runtime injection still depends on environment placeholders generated from provider credentials. | -| Endpoint and binary scoped credential injection | Provider profile endpoints and binaries affect policy composition. They do not yet restrict which outbound requests can receive credential injection. | +| General profile-driven credential placement | Static `auth_style`, `header_name`, and `query_param` placement metadata is stored and validated, but static credential injection still depends on environment placeholders generated from provider credentials. Dynamic `token_grant` credentials inject bearer tokens for matching HTTP endpoints. | +| Endpoint and binary scoped credential injection | Provider profile endpoints and binaries affect policy composition. Dynamic token grants are endpoint-scoped. Static placeholder injection is not yet restricted by profile endpoint or binary metadata. | | Credential verification on create | `openshell provider create` does not yet probe provider verification endpoints or expose `--no-verify`. | | Automatic credential scope extraction | OpenShell does not yet inspect upstream provider responses to discover credential scopes. | | Inference mounting from attached providers | `inference_capable` is profile metadata. Attaching an inference-capable provider does not yet create `inference.local` routes. | @@ -159,8 +160,8 @@ credentials: required: true # Accepted values: basic, bearer, header, query. - # These fields describe the intended credential placement. - # Runtime injection still uses env placeholder resolution today. + # These fields describe static credential placement. + # Static runtime injection still uses env placeholder resolution. auth_style: bearer header_name: authorization query_param: api_key @@ -184,6 +185,23 @@ credentials: required: true secret: true + # Optional dynamic credential. The sandbox supervisor requests a + # SPIFFE JWT-SVID, exchanges it at token_endpoint, caches the returned + # access token, and injects it as Authorization: Bearer for matching + # endpoint traffic. + token_grant: + token_endpoint: https://login.example.com/realms/custom/protocol/openid-connect/token + audience: api://custom-api + jwt_svid_audience: https://login.example.com/realms/custom + scopes: [api.read, api.write] + cache_ttl_seconds: 300 + audience_overrides: + - host: api.example.com + port: 443 + path: /v1/projects/** + audience: api://custom-projects + scopes: [projects.read] + discovery: credentials: [api_token] @@ -239,7 +257,7 @@ binaries: `category` groups profiles in `openshell provider list-profiles`. Use one of the values in the category enum. -`credentials` declares the credential names, environment variables, auth metadata, and optional refresh metadata for the provider type. The current runtime still exposes configured credential keys as placeholder environment variables and resolves placeholders in outbound HTTP requests. +`credentials` declares the credential names, environment variables, auth metadata, optional refresh metadata, and optional dynamic token grant metadata for the provider type. Static credentials are exposed as placeholder environment variables and resolved in outbound HTTP requests. Dynamic token grants are resolved by the sandbox proxy on demand for matching profile endpoints. `discovery` controls what `--from-existing` scans when `providers_v2_enabled=true`. Each entry in `discovery.credentials` must name a @@ -279,6 +297,23 @@ Gateway-managed refresh strategies use these material keys: OpenShell keeps token endpoints profile-owned. Refresh material cannot override `token_url` or `token_uri` during refresh configuration. +### Dynamic Token Grants + +`token_grant` belongs to one credential declaration. When a sandbox with the provider attached sends HTTP traffic to a matching profile endpoint, the supervisor requests a SPIFFE JWT-SVID from the local Workload API, exchanges it at `token_endpoint`, caches the returned access token, and injects `Authorization: Bearer ` before forwarding the request upstream. + +Token grant fields: + +| Field | Required | Behavior | +|---|---|---| +| `token_endpoint` | Yes | OAuth2 token endpoint that accepts a SPIFFE JWT-SVID client assertion. | +| `audience` | No | Resource audience requested from the token service. | +| `jwt_svid_audience` | No | Audience used when requesting the JWT-SVID. When omitted, OpenShell derives an issuer-style audience from Keycloak token endpoint paths or falls back to the full token endpoint URL. | +| `scopes` | No | OAuth2 scopes sent as a space-separated `scope` parameter. | +| `cache_ttl_seconds` | No | Token cache TTL override. When omitted or `0`, OpenShell uses the token response `expires_in`, or five minutes if the response does not include an expiry. | +| `audience_overrides` | No | Endpoint-specific `audience` and `scopes` overrides selected by host, port, and path. | + +Token grants require the sandbox supervisor to have access to a SPIFFE Workload API socket. They apply to HTTP traffic that the proxy can inspect. Endpoints with `tls: skip` bypass TLS termination and cannot receive dynamic token grant injection for HTTPS traffic. + ## Provider Instances A provider instance stores concrete credentials and config for a profile type. Built-in profile IDs and imported custom profile IDs are accepted by `--type`. diff --git a/docs/security/best-practices.mdx b/docs/security/best-practices.mdx index 25e440f5b..0284384b1 100644 --- a/docs/security/best-practices.mdx +++ b/docs/security/best-practices.mdx @@ -122,7 +122,7 @@ This enables credential injection and L7 inspection without explicit configurati |---|---| | Default | Auto-detect and terminate. OpenShell generates the sandbox CA at startup and injects it into the process trust stores (`NODE_EXTRA_CA_CERTS`, `DENO_CERT`, `SSL_CERT_FILE`, `REQUESTS_CA_BUNDLE`, `CURL_CA_BUNDLE`, `GIT_SSL_CAINFO`). | | What you can change | Set `tls: skip` on an endpoint to disable TLS detection and termination for that endpoint. Use this for client-certificate mTLS to upstream or non-standard binary protocols. | -| Risk if relaxed | `tls: skip` disables credential injection and L7 inspection for that endpoint. The proxy relays encrypted traffic without seeing the contents. | +| Risk if relaxed | `tls: skip` disables placeholder credential rewriting, dynamic token grant injection, and L7 inspection for that endpoint. The proxy relays encrypted traffic without seeing the contents. | | Recommendation | Use auto-detect (the default) for most endpoints. Use `tls: skip` only when the upstream requires the client's own TLS certificate (mTLS) or uses a non-HTTP protocol. | ### SSRF Protection @@ -212,7 +212,7 @@ OpenShell applies seccomp in two phases. A narrow supervisor-startup prelude run | Aspect | Detail | |---|---| -| Startup prelude | After privileged bootstrap helpers complete, the supervisor sets `PR_SET_NO_NEW_PRIVS` and synchronizes a seccomp filter across all runtime threads that blocks `mount`, the new mount API syscalls, `pivot_root`, `umount2`, `bpf`, `perf_event_open`, `userfaultfd`, module-loading syscalls, and kexec. This closes the long-lived privileged remount and kernel-surface window while leaving required setup syscalls such as `setns` available. | +| Startup prelude | After privileged bootstrap helpers complete, including network setup and provider-token SPIFFE child mount-namespace preparation, the supervisor sets `PR_SET_NO_NEW_PRIVS` and synchronizes a seccomp filter across all runtime threads that blocks `mount`, the new mount API syscalls, `pivot_root`, `umount2`, `bpf`, `perf_event_open`, `userfaultfd`, module-loading syscalls, and kexec. This closes the long-lived privileged remount and kernel-surface window while leaving required setup syscalls such as `setns` available. | | Socket domains | The filter allows `AF_INET` and `AF_INET6` (for proxy communication) and blocks `AF_PACKET`, `AF_BLUETOOTH`, and `AF_VSOCK` with `EPERM`. `AF_NETLINK` is partially allowed: only `NETLINK_ROUTE` (protocol 0) is permitted so that `getifaddrs(3)` works; all other netlink protocols are blocked. Write operations via `NETLINK_ROUTE` still require `CAP_NET_ADMIN`, which the sandbox does not grant. | | Runtime unconditional syscall blocks | `memfd_create`, `ptrace`, `bpf`, `process_vm_readv`, `process_vm_writev`, `pidfd_open`, `pidfd_getfd`, `pidfd_send_signal`, `io_uring_setup`, `mount`, `fsopen`, `fsconfig`, `fsmount`, `fspick`, `move_mount`, `open_tree`, `setns`, `umount2`, `pivot_root`, `userfaultfd`, `perf_event_open`. | | Conditional syscall blocks | `execveat` with `AT_EMPTY_PATH`, `unshare` and `clone` with `CLONE_NEWUSER`, and `seccomp(SECCOMP_SET_MODE_FILTER)` are denied with `EPERM`. | @@ -225,9 +225,9 @@ OpenShell applies seccomp in two phases. A narrow supervisor-startup prelude run The sandbox supervisor applies enforcement in a specific order during process startup. This ordering is intentional: named network-namespace setup still relies on privileged helpers, and privilege dropping still needs `/etc/group` and `/etc/passwd`, which Landlock subsequently restricts. -1. Privileged supervisor bootstrap helpers, including network-namespace setup and optional `nft` probes. +1. Privileged supervisor bootstrap helpers, including network-namespace setup, provider-token SPIFFE child mount-namespace setup, and optional `nft` probes. 2. Supervisor startup prelude seccomp (`PR_SET_NO_NEW_PRIVS` plus the early syscall denylist) synchronized across runtime threads. -3. Network namespace entry (`setns`) in child `pre_exec`. +3. Network and child-only mount namespace entry (`setns`) in child `pre_exec`. 4. Privilege drop (`initgroups` + `setgid` + `setuid`). 5. Core-dump hardening (`RLIMIT_CORE=0`, plus `PR_SET_DUMPABLE=0` on Linux). 6. Landlock filesystem restrictions. @@ -286,7 +286,7 @@ The following patterns weaken security without providing meaningful benefit. | Using `access: full` when finer rules would suffice | `access: full` with `protocol: rest` or `protocol: websocket` enables inspection but allows all methods and paths for that protocol. | Use `access: read-only` or explicit `rules` to restrict what the agent can do at the L7 level. | | Adding endpoints permanently when operator approval would suffice | Adding endpoints to the policy YAML makes them permanently reachable across all instances. | Use operator approval. Approved endpoints persist within the sandbox instance but reset on re-creation. | | Using broad binary globs | A glob like `/**` allows any binary to reach the endpoint, defeating binary-scoped enforcement. | Scope globs to specific directories (for example, `/sandbox/.vscode-server/**`). | -| Skipping TLS termination on HTTPS APIs | Setting `tls: skip` disables credential injection and L7 inspection. | Use the default auto-detect behavior unless the upstream requires client-certificate mTLS. | +| Skipping TLS termination on HTTPS APIs | Setting `tls: skip` disables placeholder credential rewriting, dynamic token grant injection, and L7 inspection. | Use the default auto-detect behavior unless the upstream requires client-certificate mTLS. | | Setting `enforcement: enforce` before auditing | Jumping to `enforce` without first running in `audit` mode risks breaking the agent's workflow. | Start with `audit`, review the logs, and switch to `enforce` after you validate the rules. | ## Related Topics diff --git a/examples/spiffe-token-grant-demo/README.md b/examples/spiffe-token-grant-demo/README.md new file mode 100644 index 000000000..173603459 --- /dev/null +++ b/examples/spiffe-token-grant-demo/README.md @@ -0,0 +1,138 @@ +# SPIFFE Token Grant Demo + +This example validates provider dynamic token grants using SPIFFE JWT-SVIDs. +It mirrors the PR 1781 alpha/beta flow without configuring OpenShell gateway +OIDC authentication. + +The demo deploys three in-cluster workloads: + +| Workload | Purpose | +|---|---| +| `token-issuer` | Accepts a SPIFFE JWT-SVID client assertion and returns a short-lived demo access token | +| `alpha` | Requires a bearer token with audience and scope `alpha` | +| `beta` | Requires a bearer token with audience and scope `beta` | + +The OpenShell provider profile in `provider-profile.yaml` configures a dynamic +credential with `token_grant`. When a sandbox curls `alpha` or `beta`, the +sandbox supervisor fetches a JWT-SVID from the SPIFFE Workload API, exchanges it +at `token-issuer`, and injects the returned access token into the outbound HTTP +request. + +## Prerequisites + +- A Kubernetes OpenShell dev cluster. +- SPIRE enabled for provider token grants. +- OpenShell configured with the Kubernetes ServiceAccount supervisor bootstrap + path. Gateway end-user OIDC is not required for this demo. +- `providers_v2_enabled=true` on the target gateway. + +For the Helm dev environment, deploy with the SPIRE releases and +`ci/values-spire.yaml` enabled in `deploy/helm/openshell/skaffold.yaml`. + +## Deploy Workloads + +From the repository root: + +```bash +KUBECONFIG=kubeconfig kubectl apply -k examples/spiffe-token-grant-demo/k8s +KUBECONFIG=kubeconfig kubectl rollout restart deployment/token-issuer deployment/alpha deployment/beta +KUBECONFIG=kubeconfig kubectl rollout status deployment/token-issuer +KUBECONFIG=kubeconfig kubectl rollout status deployment/alpha +KUBECONFIG=kubeconfig kubectl rollout status deployment/beta +``` + +## Register Provider And Test + +Port-forward the local gateway in one terminal: + +```bash +KUBECONFIG=kubeconfig kubectl port-forward -n openshell svc/openshell 8097:8080 +``` + +Then run: + +```bash +export XDG_CONFIG_HOME=/private/tmp/openshell-spiffe-token-demo-config +export GATEWAY=http://127.0.0.1:8097 + +openshell --gateway-endpoint "$GATEWAY" settings set \ + --global --key providers_v2_enabled --value true --yes + +openshell --gateway-endpoint "$GATEWAY" provider profile import \ + -f examples/spiffe-token-grant-demo/provider-profile.yaml + +openshell --gateway-endpoint "$GATEWAY" provider create \ + --name spiffe-token-demo \ + --type spiffe-token-demo \ + --credential access_token=unused + +openshell --gateway-endpoint "$GATEWAY" sandbox create \ + --name spiffe-token-demo \ + --provider spiffe-token-demo \ + --keep \ + --no-tty \ + -- echo "sandbox ready" + +openshell --gateway-endpoint "$GATEWAY" sandbox exec \ + --name spiffe-token-demo \ + --no-tty \ + -- curl -sS http://alpha.default.svc.cluster.local/ + +openshell --gateway-endpoint "$GATEWAY" sandbox exec \ + --name spiffe-token-demo \ + --no-tty \ + -- curl -sS http://beta.default.svc.cluster.local/ +``` + +Expected output includes endpoint-specific token claims: + +```text +alpha called with path /: + aud: alpha, account + scope: alpha profile email + azp: spiffe://openshell.local/openshell/sandbox/ + +beta called with path /: + aud: beta, account + scope: beta profile email + azp: spiffe://openshell.local/openshell/sandbox/ +``` + +The protected services also write proof-of-life logs when they accept a call: + +```bash +KUBECONFIG=kubeconfig kubectl logs deployment/alpha --tail=20 +KUBECONFIG=kubeconfig kubectl logs deployment/beta --tail=20 +``` + +Example log lines: + +```text +alpha accepted request path=/ aud="alpha, account" scope="alpha profile email" client_id=spiffe://openshell.local/openshell/sandbox/ +beta accepted request path=/ aud="beta, account" scope="beta profile email" client_id=spiffe://openshell.local/openshell/sandbox/ +``` + +## Automated Demo + +`demo.sh` applies the workloads, registers the provider profile, creates a +sandbox, curls alpha and beta, prints the alpha/beta pod logs, and deletes the +sandbox with `openshell` on exit. It leaves the Kubernetes demo workloads in +place. + +```bash +KUBECONFIG=kubeconfig bash examples/spiffe-token-grant-demo/demo.sh +``` + +## Cleanup + +Delete the sandbox through OpenShell: + +```bash +openshell --gateway-endpoint "$GATEWAY" sandbox delete spiffe-token-demo +``` + +Delete the demo workloads with Kubernetes: + +```bash +KUBECONFIG=kubeconfig kubectl delete -k examples/spiffe-token-grant-demo/k8s +``` diff --git a/examples/spiffe-token-grant-demo/demo.sh b/examples/spiffe-token-grant-demo/demo.sh new file mode 100755 index 000000000..0028b0611 --- /dev/null +++ b/examples/spiffe-token-grant-demo/demo.sh @@ -0,0 +1,135 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROFILE_FILE="${SCRIPT_DIR}/provider-profile.yaml" +K8S_DIR="${SCRIPT_DIR}/k8s" + +SANDBOX_NAME="${SANDBOX_NAME:-spiffe-token-demo}" +PROVIDER_NAME="${PROVIDER_NAME:-spiffe-token-demo}" +PROFILE_ID="${PROFILE_ID:-spiffe-token-demo}" +PORT_FORWARD_PORT="${PORT_FORWARD_PORT:-8097}" +GATEWAY_ENDPOINT="${GATEWAY_ENDPOINT:-http://127.0.0.1:${PORT_FORWARD_PORT}}" +KEEP_SANDBOX="${KEEP_SANDBOX:-0}" + +TEMP_CONFIG_HOME="" +if [[ -z "${XDG_CONFIG_HOME:-}" ]]; then + TEMP_CONFIG_HOME="$(mktemp -d)" + export XDG_CONFIG_HOME="$TEMP_CONFIG_HOME" +fi + +PF_PID="" + +cleanup() { + if [[ "$KEEP_SANDBOX" != "1" ]]; then + openshell --gateway-endpoint "$GATEWAY_ENDPOINT" sandbox delete "$SANDBOX_NAME" >/dev/null 2>&1 || true + fi + if [[ -n "$PF_PID" ]]; then + kill "$PF_PID" >/dev/null 2>&1 || true + fi + if [[ -n "$TEMP_CONFIG_HOME" ]]; then + rm -rf "$TEMP_CONFIG_HOME" + fi +} +trap cleanup EXIT + +run() { + printf "\n$ %s\n" "$*" + "$@" +} + +wait_for_port_forward() { + for _ in $(seq 1 60); do + if nc -z 127.0.0.1 "$PORT_FORWARD_PORT" >/dev/null 2>&1; then + return 0 + fi + sleep 0.25 + done + printf "gateway port-forward did not become ready\n" >&2 + exit 1 +} + +assert_contains() { + local haystack="$1" + local needle="$2" + if [[ "$haystack" != *"$needle"* ]]; then + printf "expected output to contain: %s\n" "$needle" >&2 + printf "actual output:\n%s\n" "$haystack" >&2 + exit 1 + fi +} + +sandbox_curl_until() { + local label="$1" + local url="$2" + local expected="$3" + local output="" + + for attempt in $(seq 1 12); do + printf "\n$ openshell sandbox exec %s curl (attempt %s)\n" "$label" "$attempt" + if output=$("${OS[@]}" sandbox exec --name "$SANDBOX_NAME" --no-tty -- curl -sS --max-time 10 "$url" 2>&1); then + printf "%s\n" "$output" + if [[ "$output" == *"$expected"* ]]; then + SANDBOX_CURL_OUTPUT="$output" + return 0 + fi + else + printf "%s\n" "$output" + fi + sleep 2 + done + + printf "timed out waiting for %s to return expected output\n" "$label" >&2 + printf "last output:\n%s\n" "$output" >&2 + exit 1 +} + +OS=(openshell --gateway-endpoint "$GATEWAY_ENDPOINT") + +run kubectl apply -k "$K8S_DIR" +run kubectl rollout restart deployment/token-issuer deployment/alpha deployment/beta +run kubectl rollout status deployment/token-issuer +run kubectl rollout status deployment/alpha +run kubectl rollout status deployment/beta + +kubectl -n openshell port-forward svc/openshell "${PORT_FORWARD_PORT}:8080" >/tmp/openshell-spiffe-token-demo-port-forward.log 2>&1 & +PF_PID=$! +wait_for_port_forward + +"${OS[@]}" sandbox delete "$SANDBOX_NAME" >/dev/null 2>&1 || true +"${OS[@]}" provider delete "$PROVIDER_NAME" >/dev/null 2>&1 || true +"${OS[@]}" provider profile delete "$PROFILE_ID" >/dev/null 2>&1 || true + +run "${OS[@]}" settings set --global --key providers_v2_enabled --value true --yes +run "${OS[@]}" provider profile lint -f "$PROFILE_FILE" +run "${OS[@]}" provider profile import -f "$PROFILE_FILE" +run "${OS[@]}" provider create --name "$PROVIDER_NAME" --type "$PROFILE_ID" --credential access_token=unused +run "${OS[@]}" sandbox create --name "$SANDBOX_NAME" --provider "$PROVIDER_NAME" --keep --no-tty -- echo "sandbox ready" + +sandbox_curl_until "alpha" "http://alpha.default.svc.cluster.local/" "alpha called with path /:" +ALPHA_OUTPUT="$SANDBOX_CURL_OUTPUT" +assert_contains "$ALPHA_OUTPUT" "alpha called with path /:" +assert_contains "$ALPHA_OUTPUT" "aud: alpha, account" +assert_contains "$ALPHA_OUTPUT" "scope: alpha profile email" +assert_contains "$ALPHA_OUTPUT" "azp: spiffe://openshell.local/openshell/sandbox/" + +sandbox_curl_until "beta" "http://beta.default.svc.cluster.local/" "beta called with path /:" +BETA_OUTPUT="$SANDBOX_CURL_OUTPUT" +assert_contains "$BETA_OUTPUT" "beta called with path /:" +assert_contains "$BETA_OUTPUT" "aud: beta, account" +assert_contains "$BETA_OUTPUT" "scope: beta profile email" +assert_contains "$BETA_OUTPUT" "azp: spiffe://openshell.local/openshell/sandbox/" + +sleep 1 + +printf "\n$ kubectl logs -l app=alpha --tail=20 --prefix=true\n" +kubectl logs -l app=alpha --tail=20 --prefix=true | sed 's/^/alpha> /' + +printf "\n$ kubectl logs -l app=beta --tail=20 --prefix=true\n" +kubectl logs -l app=beta --tail=20 --prefix=true | sed 's/^/beta> /' + +printf "\nSPIFFE token grant demo succeeded.\n" diff --git a/examples/spiffe-token-grant-demo/k8s/kustomization.yaml b/examples/spiffe-token-grant-demo/k8s/kustomization.yaml new file mode 100644 index 000000000..ccd3a94a3 --- /dev/null +++ b/examples/spiffe-token-grant-demo/k8s/kustomization.yaml @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +generatorOptions: + disableNameSuffixHash: true + +configMapGenerator: + - name: openshell-spiffe-token-demo-scripts + files: + - token-issuer.js + - protected-service.js + +resources: + - workloads.yaml diff --git a/examples/spiffe-token-grant-demo/k8s/protected-service.js b/examples/spiffe-token-grant-demo/k8s/protected-service.js new file mode 100644 index 000000000..071f4902a --- /dev/null +++ b/examples/spiffe-token-grant-demo/k8s/protected-service.js @@ -0,0 +1,110 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +const http = require("http"); +const crypto = require("crypto"); + +const PORT = Number(process.env.PORT || 8080); +const SERVICE_NAME = process.env.SERVICE_NAME || "alpha"; +const EXPECTED_AUDIENCE = process.env.EXPECTED_AUDIENCE || SERVICE_NAME; +const EXPECTED_SCOPE = process.env.EXPECTED_SCOPE || SERVICE_NAME; +const ACCESS_TOKEN_ISSUER = + process.env.ACCESS_TOKEN_ISSUER || "http://token-issuer.default.svc.cluster.local"; +const ACCESS_TOKEN_SECRET = process.env.ACCESS_TOKEN_SECRET || "openshell-demo-secret"; + +function b64urlDecode(value) { + const padded = `${value}${"=".repeat((4 - (value.length % 4)) % 4)}`; + return Buffer.from(padded.replace(/-/g, "+").replace(/_/g, "/"), "base64"); +} + +function b64urlEncode(value) { + return Buffer.from(value) + .toString("base64") + .replace(/=/g, "") + .replace(/\+/g, "-") + .replace(/\//g, "_"); +} + +function parseJwt(jwt) { + const parts = jwt.split("."); + if (parts.length !== 3) { + throw new Error("JWT must contain three segments"); + } + return { + payload: JSON.parse(b64urlDecode(parts[1]).toString("utf8")), + signingInput: `${parts[0]}.${parts[1]}`, + signature: parts[2], + }; +} + +function verifyAccessToken(jwt) { + const parsed = parseJwt(jwt); + const expected = b64urlEncode( + crypto.createHmac("sha256", ACCESS_TOKEN_SECRET).update(parsed.signingInput).digest(), + ); + if ( + parsed.signature.length !== expected.length || + !crypto.timingSafeEqual(Buffer.from(parsed.signature), Buffer.from(expected)) + ) { + throw new Error("access token signature validation failed"); + } + + const now = Math.floor(Date.now() / 1000); + if (parsed.payload.exp && parsed.payload.exp <= now) { + throw new Error("access token expired"); + } + if (parsed.payload.iss !== ACCESS_TOKEN_ISSUER) { + throw new Error(`unexpected access token issuer ${parsed.payload.iss}`); + } + const aud = Array.isArray(parsed.payload.aud) ? parsed.payload.aud : [parsed.payload.aud]; + if (!aud.includes(EXPECTED_AUDIENCE)) { + throw new Error(`access token audience did not include ${EXPECTED_AUDIENCE}`); + } + const scopes = String(parsed.payload.scope || "").split(/\s+/).filter(Boolean); + if (!scopes.includes(EXPECTED_SCOPE)) { + throw new Error(`access token scope did not include ${EXPECTED_SCOPE}`); + } + return parsed.payload; +} + +function text(res, status, body) { + res.writeHead(status, { "content-type": "text/plain" }); + res.end(body); +} + +http + .createServer((req, res) => { + try { + if (req.url === "/healthz") { + return text(res, 200, "ok\n"); + } + const auth = req.headers.authorization || ""; + const token = auth.startsWith("Bearer ") ? auth.slice("Bearer ".length) : ""; + if (!token) { + console.warn(`${SERVICE_NAME} rejected request path=${req.url} reason=missing_bearer_token`); + return text(res, 401, `${SERVICE_NAME} missing bearer token\n`); + } + const claims = verifyAccessToken(token); + const aud = Array.isArray(claims.aud) ? claims.aud.join(", ") : claims.aud; + console.log( + `${SERVICE_NAME} accepted request path=${req.url} aud="${aud}" scope="${claims.scope}" client_id=${claims.client_id}`, + ); + return text( + res, + 200, + `${SERVICE_NAME} called with path ${req.url}:\n` + + ` sub: ${claims.sub}\n` + + ` aud: ${aud}\n` + + ` iss: ${claims.iss}\n` + + ` scope: ${claims.scope}\n` + + ` azp: ${claims.azp}\n` + + ` client_id: ${claims.client_id}\n`, + ); + } catch (error) { + console.warn(`${SERVICE_NAME} rejected request path=${req.url} reason="${error.message}"`); + return text(res, 403, `${SERVICE_NAME} rejected token: ${error.message}\n`); + } + }) + .listen(PORT, "0.0.0.0", () => { + console.log(`${SERVICE_NAME} listening on ${PORT}`); + }); diff --git a/examples/spiffe-token-grant-demo/k8s/token-issuer.js b/examples/spiffe-token-grant-demo/k8s/token-issuer.js new file mode 100644 index 000000000..d1fdd3bf7 --- /dev/null +++ b/examples/spiffe-token-grant-demo/k8s/token-issuer.js @@ -0,0 +1,202 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +const http = require("http"); +const crypto = require("crypto"); + +const PORT = Number(process.env.PORT || 8080); +const JWKS_URI = + process.env.SPIRE_JWKS_URI || + "https://spire-spiffe-oidc-discovery-provider.spire.svc.cluster.local/keys"; +const SPIRE_ISSUER = + process.env.SPIRE_ISSUER || + "https://spire-spiffe-oidc-discovery-provider.spire.svc.cluster.local"; +const JWT_SVID_AUDIENCE = + process.env.JWT_SVID_AUDIENCE || "http://token-issuer.default.svc.cluster.local"; +const TRUST_DOMAIN_PREFIX = + process.env.TRUST_DOMAIN_PREFIX || "spiffe://openshell.local/openshell/sandbox/"; +const ACCESS_TOKEN_ISSUER = + process.env.ACCESS_TOKEN_ISSUER || "http://token-issuer.default.svc.cluster.local"; +const ACCESS_TOKEN_SECRET = process.env.ACCESS_TOKEN_SECRET || "openshell-demo-secret"; + +let cachedJwks; +let cachedJwksAt = 0; + +function b64urlDecode(value) { + const padded = `${value}${"=".repeat((4 - (value.length % 4)) % 4)}`; + return Buffer.from(padded.replace(/-/g, "+").replace(/_/g, "/"), "base64"); +} + +function b64urlEncode(value) { + return Buffer.from(value) + .toString("base64") + .replace(/=/g, "") + .replace(/\+/g, "-") + .replace(/\//g, "_"); +} + +function parseJwt(jwt) { + const parts = jwt.split("."); + if (parts.length !== 3) { + throw new Error("JWT must contain three segments"); + } + return { + header: JSON.parse(b64urlDecode(parts[0]).toString("utf8")), + payload: JSON.parse(b64urlDecode(parts[1]).toString("utf8")), + signingInput: `${parts[0]}.${parts[1]}`, + signature: b64urlDecode(parts[2]), + }; +} + +async function jwks() { + const now = Date.now(); + if (cachedJwks && now - cachedJwksAt < 60000) { + return cachedJwks; + } + const response = await fetch(JWKS_URI); + if (!response.ok) { + throw new Error(`JWKS fetch failed with HTTP ${response.status}`); + } + cachedJwks = await response.json(); + cachedJwksAt = now; + return cachedJwks; +} + +function hasAudience(payload, expected) { + const aud = Array.isArray(payload.aud) ? payload.aud : [payload.aud]; + return aud.includes(expected); +} + +async function verifyJwtSvid(jwt) { + const parsed = parseJwt(jwt); + if (parsed.header.alg !== "RS256") { + throw new Error(`unsupported JWT-SVID alg ${parsed.header.alg}`); + } + + const keys = await jwks(); + const jwk = keys.keys.find((key) => key.kid === parsed.header.kid); + if (!jwk) { + throw new Error(`no JWKS key for kid ${parsed.header.kid}`); + } + + const verifier = crypto.createVerify("RSA-SHA256"); + verifier.update(parsed.signingInput); + verifier.end(); + const publicKey = crypto.createPublicKey({ key: jwk, format: "jwk" }); + if (!verifier.verify(publicKey, parsed.signature)) { + throw new Error("JWT-SVID signature validation failed"); + } + + const now = Math.floor(Date.now() / 1000); + if (parsed.payload.exp && parsed.payload.exp <= now) { + throw new Error("JWT-SVID expired"); + } + if (parsed.payload.nbf && parsed.payload.nbf > now + 30) { + throw new Error("JWT-SVID not active yet"); + } + if (parsed.payload.iss !== SPIRE_ISSUER) { + throw new Error(`unexpected JWT-SVID issuer ${parsed.payload.iss}`); + } + if (!hasAudience(parsed.payload, JWT_SVID_AUDIENCE)) { + throw new Error(`JWT-SVID audience did not include ${JWT_SVID_AUDIENCE}`); + } + if (!String(parsed.payload.sub || "").startsWith(TRUST_DOMAIN_PREFIX)) { + throw new Error("JWT-SVID subject was not an OpenShell sandbox SPIFFE ID"); + } + return parsed.payload; +} + +function signAccessToken(payload) { + const header = b64urlEncode(JSON.stringify({ alg: "HS256", typ: "JWT" })); + const body = b64urlEncode(JSON.stringify(payload)); + const signingInput = `${header}.${body}`; + const signature = crypto + .createHmac("sha256", ACCESS_TOKEN_SECRET) + .update(signingInput) + .digest(); + return `${signingInput}.${b64urlEncode(signature)}`; +} + +function json(res, status, body) { + res.writeHead(status, { "content-type": "application/json" }); + res.end(JSON.stringify(body)); +} + +async function bodyText(req) { + const chunks = []; + for await (const chunk of req) { + chunks.push(chunk); + if (Buffer.concat(chunks).length > 1024 * 1024) { + throw new Error("request body too large"); + } + } + return Buffer.concat(chunks).toString("utf8"); +} + +async function handleToken(req, res) { + const params = new URLSearchParams(await bodyText(req)); + if (params.get("grant_type") !== "client_credentials") { + return json(res, 400, { error: "unsupported_grant_type" }); + } + if ( + params.get("client_assertion_type") !== + "urn:ietf:params:oauth:client-assertion-type:jwt-spiffe" + ) { + return json(res, 400, { error: "unsupported_client_assertion_type" }); + } + + const jwtSvid = params.get("client_assertion"); + if (!jwtSvid) { + return json(res, 400, { error: "missing_client_assertion" }); + } + + const resourceAudience = params.get("audience") || ""; + const requestedScopes = (params.get("scope") || "").split(/\s+/).filter(Boolean); + if (!["alpha", "beta"].includes(resourceAudience)) { + return json(res, 400, { error: "unsupported_audience", audience: resourceAudience }); + } + if (!requestedScopes.includes(resourceAudience)) { + return json(res, 403, { error: "missing_matching_scope" }); + } + + const svid = await verifyJwtSvid(jwtSvid); + const now = Math.floor(Date.now() / 1000); + const subjectHash = crypto.createHash("sha256").update(svid.sub).digest("hex").slice(0, 32); + const accessToken = signAccessToken({ + iss: ACCESS_TOKEN_ISSUER, + sub: subjectHash, + aud: [resourceAudience, "account"], + scope: `${requestedScopes.join(" ")} profile email`, + azp: svid.sub, + client_id: svid.sub, + iat: now, + exp: now + 300, + }); + + return json(res, 200, { + access_token: accessToken, + token_type: "Bearer", + expires_in: 300, + scope: `${requestedScopes.join(" ")} profile email`, + }); +} + +http + .createServer(async (req, res) => { + try { + if (req.url === "/healthz") { + res.writeHead(200, { "content-type": "text/plain" }); + return res.end("ok\n"); + } + if (req.method === "POST" && req.url === "/token") { + return await handleToken(req, res); + } + return json(res, 404, { error: "not_found" }); + } catch (error) { + console.error(error); + return json(res, 500, { error: "server_error", message: error.message }); + } + }) + .listen(PORT, "0.0.0.0", () => { + console.log(`token issuer listening on ${PORT}`); + }); diff --git a/examples/spiffe-token-grant-demo/k8s/workloads.yaml b/examples/spiffe-token-grant-demo/k8s/workloads.yaml new file mode 100644 index 000000000..5d87cea31 --- /dev/null +++ b/examples/spiffe-token-grant-demo/k8s/workloads.yaml @@ -0,0 +1,192 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: token-issuer + namespace: default + labels: + app: token-issuer +spec: + replicas: 1 + selector: + matchLabels: + app: token-issuer + template: + metadata: + labels: + app: token-issuer + spec: + containers: + - name: token-issuer + image: node:22-alpine + imagePullPolicy: IfNotPresent + command: ["node", "/demo/token-issuer.js"] + ports: + - name: http + containerPort: 8080 + env: + - name: NODE_TLS_REJECT_UNAUTHORIZED + value: "0" + - name: ACCESS_TOKEN_SECRET + value: openshell-demo-secret + - name: ACCESS_TOKEN_ISSUER + value: http://token-issuer.default.svc.cluster.local + - name: SPIRE_JWKS_URI + value: https://spire-spiffe-oidc-discovery-provider.spire.svc.cluster.local/keys + - name: SPIRE_ISSUER + value: https://spire-spiffe-oidc-discovery-provider.spire.svc.cluster.local + - name: JWT_SVID_AUDIENCE + value: http://token-issuer.default.svc.cluster.local + - name: TRUST_DOMAIN_PREFIX + value: spiffe://openshell.local/openshell/sandbox/ + readinessProbe: + httpGet: + path: /healthz + port: http + volumeMounts: + - name: scripts + mountPath: /demo + readOnly: true + volumes: + - name: scripts + configMap: + name: openshell-spiffe-token-demo-scripts +--- +apiVersion: v1 +kind: Service +metadata: + name: token-issuer + namespace: default +spec: + selector: + app: token-issuer + ports: + - name: http + port: 80 + targetPort: http +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: alpha + namespace: default + labels: + app: alpha +spec: + replicas: 1 + selector: + matchLabels: + app: alpha + template: + metadata: + labels: + app: alpha + spec: + containers: + - name: alpha + image: node:22-alpine + imagePullPolicy: IfNotPresent + command: ["node", "/demo/protected-service.js"] + ports: + - name: http + containerPort: 8080 + env: + - name: SERVICE_NAME + value: alpha + - name: EXPECTED_AUDIENCE + value: alpha + - name: EXPECTED_SCOPE + value: alpha + - name: ACCESS_TOKEN_SECRET + value: openshell-demo-secret + - name: ACCESS_TOKEN_ISSUER + value: http://token-issuer.default.svc.cluster.local + readinessProbe: + httpGet: + path: /healthz + port: http + volumeMounts: + - name: scripts + mountPath: /demo + readOnly: true + volumes: + - name: scripts + configMap: + name: openshell-spiffe-token-demo-scripts +--- +apiVersion: v1 +kind: Service +metadata: + name: alpha + namespace: default +spec: + selector: + app: alpha + ports: + - name: http + port: 80 + targetPort: http +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: beta + namespace: default + labels: + app: beta +spec: + replicas: 1 + selector: + matchLabels: + app: beta + template: + metadata: + labels: + app: beta + spec: + containers: + - name: beta + image: node:22-alpine + imagePullPolicy: IfNotPresent + command: ["node", "/demo/protected-service.js"] + ports: + - name: http + containerPort: 8080 + env: + - name: SERVICE_NAME + value: beta + - name: EXPECTED_AUDIENCE + value: beta + - name: EXPECTED_SCOPE + value: beta + - name: ACCESS_TOKEN_SECRET + value: openshell-demo-secret + - name: ACCESS_TOKEN_ISSUER + value: http://token-issuer.default.svc.cluster.local + readinessProbe: + httpGet: + path: /healthz + port: http + volumeMounts: + - name: scripts + mountPath: /demo + readOnly: true + volumes: + - name: scripts + configMap: + name: openshell-spiffe-token-demo-scripts +--- +apiVersion: v1 +kind: Service +metadata: + name: beta + namespace: default +spec: + selector: + app: beta + ports: + - name: http + port: 80 + targetPort: http diff --git a/examples/spiffe-token-grant-demo/provider-profile.yaml b/examples/spiffe-token-grant-demo/provider-profile.yaml new file mode 100644 index 000000000..059479900 --- /dev/null +++ b/examples/spiffe-token-grant-demo/provider-profile.yaml @@ -0,0 +1,48 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +id: spiffe-token-demo +display_name: SPIFFE token grant demo +description: Dynamic token grant for alpha/beta demo services using SPIFFE JWT-SVID authentication +category: other +credentials: + - name: access_token + description: Access token obtained via SPIFFE JWT client assertion grant + required: false + auth_style: bearer + header_name: Authorization + token_grant: + token_endpoint: http://token-issuer.default.svc.cluster.local/token + audience: demo-default + jwt_svid_audience: http://token-issuer.default.svc.cluster.local + scopes: [demo] + cache_ttl_seconds: 60 + audience_overrides: + - host: alpha.default.svc.cluster.local + port: 80 + audience: alpha + scopes: [alpha] + - host: beta.default.svc.cluster.local + port: 80 + audience: beta + scopes: [beta] +endpoints: + - host: alpha.default.svc.cluster.local + port: 80 + protocol: rest + tls: none + access: read-write + enforcement: enforce + allowed_ips: + - 10.43.0.0/16 + - host: beta.default.svc.cluster.local + port: 80 + protocol: rest + tls: none + access: read-write + enforcement: enforce + allowed_ips: + - 10.43.0.0/16 +binaries: + - /usr/bin/curl + - /usr/local/bin/curl diff --git a/mise.lock b/mise.lock index 4413fb67d..9c188e0f3 100644 --- a/mise.lock +++ b/mise.lock @@ -216,6 +216,7 @@ backend = "aqua:GoogleContainerTools/skaffold" url = "https://storage.googleapis.com/skaffold/releases/v2.20.0/skaffold-linux-arm64" [tools.skaffold."platforms.linux-x64"] +checksum = "blake3:4de6b14984ff1c7e5f107dd12d15890feb4b6600032d61158162c243a81d9156" url = "https://storage.googleapis.com/skaffold/releases/v2.20.0/skaffold-linux-amd64" [tools.skaffold."platforms.macos-arm64"] diff --git a/proto/openshell.proto b/proto/openshell.proto index bd144cba0..be66653d6 100644 --- a/proto/openshell.proto +++ b/proto/openshell.proto @@ -892,6 +892,48 @@ message ProviderProfileDiagnostic { string severity = 5; } +// Endpoint selector for token grant audience overrides. +message ProviderCredentialTokenGrantAudienceOverride { + // Optional: endpoint host selector. If omitted, inherits the profile endpoint host. + string host = 1; + + // Optional: endpoint port selector. If omitted, matches the expanded profile endpoint port. + uint32 port = 2; + + // Optional: endpoint path selector. If omitted, inherits the profile endpoint path. + string path = 3; + + // Resource audience to request for matching endpoints. + string audience = 4; + + // Optional: OAuth2 scopes to request. If omitted, inherits the token grant scopes. + repeated string scopes = 5; +} + +// Provider credential token grant configuration. +// When present, the credential is obtained dynamically via OAuth2 grant when needed. +message ProviderCredentialTokenGrant { + // OAuth2 token endpoint URL (e.g., https://keycloak.example.com/realms/my-realm/protocol/openid-connect/token) + string token_endpoint = 1; + + // Optional: default resource audience to request from the token service + string audience = 2; + + // Optional: audience to request when fetching the JWT-SVID from SPIRE. + // If omitted, the sandbox derives this from token_endpoint. + string jwt_svid_audience = 6; + + // Optional: OAuth2 scopes to request + repeated string scopes = 3; + + // Optional: override token cache TTL (seconds) + // If 0 or omitted, use expires_in from token response + int64 cache_ttl_seconds = 4; + + // Optional: endpoint-specific resource audience overrides. + repeated ProviderCredentialTokenGrantAudienceOverride audience_overrides = 5; +} + // Provider credential declaration. message ProviderProfileCredential { string name = 1; @@ -902,6 +944,7 @@ message ProviderProfileCredential { string header_name = 6; string query_param = 7; ProviderCredentialRefresh refresh = 8; + ProviderCredentialTokenGrant token_grant = 9; } enum ProviderCredentialRefreshStrategy { @@ -1099,6 +1142,10 @@ message GetSandboxProviderEnvironmentResponse { uint64 provider_env_revision = 2; // Expiration timestamps for returned environment variables. map credential_expires_at_ms = 3; + // Dynamic credentials that require token grants or other runtime injection. + // Maps endpoint-bound provider metadata to credential metadata. + // Supervisor uses this to inject Authorization headers for token grant credentials. + map dynamic_credentials = 4; } // ---------------------------------------------------------------------------