From 6d49eb8b41b1cc89f9fc888e5e0396f75fd02d0d Mon Sep 17 00:00:00 2001 From: Drew Newberry Date: Thu, 14 May 2026 17:12:12 -0700 Subject: [PATCH] refactor(gateway): move driver options into config Signed-off-by: Drew Newberry --- architecture/gateway.md | 15 +- crates/openshell-core/src/config.rs | 165 +-------- crates/openshell-driver-docker/README.md | 4 +- crates/openshell-driver-docker/src/lib.rs | 79 +++-- crates/openshell-driver-docker/src/tests.rs | 37 +- .../openshell-driver-kubernetes/src/config.rs | 9 +- crates/openshell-driver-podman/README.md | 3 +- crates/openshell-driver-podman/src/config.rs | 17 +- .../openshell-driver-podman/src/container.rs | 5 +- crates/openshell-driver-podman/src/driver.rs | 2 +- crates/openshell-driver-podman/src/main.rs | 6 +- crates/openshell-driver-vm/README.md | 44 +-- crates/openshell-server/src/cli.rs | 328 +++--------------- crates/openshell-server/src/compute/vm.rs | 73 ++-- crates/openshell-server/src/config_file.rs | 56 ++- crates/openshell-server/src/grpc/sandbox.rs | 15 +- crates/openshell-server/src/lib.rs | 100 +----- deploy/deb/init-gateway-config.sh | 56 +++ deploy/deb/openshell-gateway.service | 15 +- .../openshell/templates/gateway-config.yaml | 6 - deploy/helm/openshell/values.yaml | 5 - .../kube/manifests/openshell-helmchart.yaml | 2 - deploy/man/openshell-gateway.8.md | 23 +- deploy/man/openshell-gateway.env.5.md | 48 +-- deploy/rpm/CONFIGURATION.md | 79 +++-- deploy/rpm/QUICKSTART.md | 5 +- deploy/rpm/TROUBLESHOOTING.md | 13 +- deploy/rpm/init-gateway-env.sh | 89 +++-- deploy/snap/README.md | 18 +- deploy/snap/bin/openshell-gateway-wrapper | 27 ++ deploy/snap/meta/snap.yaml.in | 11 +- docs/kubernetes/ingress.mdx | 16 +- docs/kubernetes/setup.mdx | 1 - docs/reference/gateway-config.mdx | 16 +- docs/reference/sandbox-compute-drivers.mdx | 49 +-- docs/security/best-practices.mdx | 4 +- python/openshell/release_formula_test.py | 11 +- rfc/0003-gateway-configuration/README.md | 9 +- snapcraft.yaml | 13 +- tasks/scripts/gateway-docker.sh | 24 +- tasks/scripts/gateway-vm.sh | 22 +- tasks/scripts/gateway.sh | 56 ++- tasks/scripts/package-deb.sh | 2 + tasks/scripts/package-snap.sh | 2 + tasks/scripts/release.py | 64 ++-- tasks/scripts/vm/smoke-orphan-cleanup.sh | 20 +- 46 files changed, 657 insertions(+), 1007 deletions(-) create mode 100755 deploy/deb/init-gateway-config.sh create mode 100755 deploy/snap/bin/openshell-gateway-wrapper diff --git a/architecture/gateway.md b/architecture/gateway.md index 5840cb511..e9cbe187d 100644 --- a/architecture/gateway.md +++ b/architecture/gateway.md @@ -199,20 +199,21 @@ The gateway reads its configuration from three sources, merged in this precedence (highest first): ``` -CLI flag > OPENSHELL_* env var > TOML file > built-in default +Gateway CLI flag > gateway OPENSHELL_* env var > TOML file > built-in default ``` The TOML file is opt-in via `--config ` / `OPENSHELL_GATEWAY_CONFIG`. -When unset, the gateway behaves exactly as before — CLI flags and env vars -drive every setting. See `docs/reference/gateway-config.mdx` for worked -per-driver examples and RFC 0003 for the full schema. +Driver implementation settings live in the TOML driver tables. See +`docs/reference/gateway-config.mdx` for worked per-driver examples and RFC +0003 for the full schema. `database_url` is env-only and rejected when present in the file (`OPENSHELL_DB_URL` / `--db-url`). ### Driver inheritance -`[openshell.gateway]` carries a small set of values (`default_image`, +`[openshell.gateway]` carries a small set of values (`sandbox_namespace`, +`default_image`, `supervisor_image`, `guest_tls_ca/cert/key`, `client_tls_secret_name`, `host_gateway_ip`, `enable_user_namespaces`) that are inherited into each driver's `[openshell.drivers.]` table when the driver-specific table @@ -227,8 +228,8 @@ value means the same thing in both, so the key lives only under each driver's own table. Driver-specific values that are not part of the inheritance allowlist -(e.g. K8s `namespace`, Podman `socket_path`, VM `vcpus`) only come from -the driver's own table. +(e.g. Podman `socket_path`, VM `vcpus`) only come from the driver's own +table. ## Operational Constraints diff --git a/crates/openshell-core/src/config.rs b/crates/openshell-core/src/config.rs index 0017af1f3..7a4358df8 100644 --- a/crates/openshell-core/src/config.rs +++ b/crates/openshell-core/src/config.rs @@ -21,7 +21,7 @@ use std::str::FromStr; /// Default SSH port inside sandbox containers. pub const DEFAULT_SSH_PORT: u16 = 2222; -/// Default server / SSH gateway port. +/// Default gateway server port. pub const DEFAULT_SERVER_PORT: u16 = 8080; /// Default container stop timeout in seconds (SIGTERM → SIGKILL). @@ -216,75 +216,10 @@ pub struct Config { #[serde(default)] pub compute_drivers: Vec, - /// Kubernetes namespace for sandboxes. - #[serde(default = "default_sandbox_namespace")] - pub sandbox_namespace: String, - - /// Default container image for sandboxes. - #[serde(default = "default_sandbox_image")] - pub sandbox_image: String, - - /// Kubernetes `imagePullPolicy` for sandbox pods (e.g. `Always`, - /// `IfNotPresent`, `Never`). Defaults to empty, which lets Kubernetes - /// apply its own default (`:latest` → `Always`, anything else → - /// `IfNotPresent`). - #[serde(default)] - pub sandbox_image_pull_policy: String, - - /// gRPC endpoint for sandboxes to connect back to `OpenShell`. - /// Used by sandbox pods to fetch their policy at startup. - #[serde(default)] - pub grpc_endpoint: String, - - /// Public gateway host for SSH proxy connections. - #[serde(default = "default_ssh_gateway_host")] - pub ssh_gateway_host: String, - - /// Public gateway port for SSH proxy connections. - #[serde(default = "default_ssh_gateway_port")] - pub ssh_gateway_port: u16, - - /// SSH listen port inside sandbox containers that expose a TCP endpoint. - #[serde(default = "default_sandbox_ssh_port")] - pub sandbox_ssh_port: u16, - - /// Filesystem path where the sandbox supervisor binds its SSH Unix - /// socket. The supervisor is passed this path via - /// `OPENSHELL_SSH_SOCKET_PATH` / `--ssh-socket-path` and connects its - /// relay bridge to the same path. - /// - /// When the gateway orchestrates sandboxes that each live in their own - /// filesystem (K8s pod, libkrun VM, etc.), the default is safe. For - /// local dev where multiple supervisors share `/run`, override this to - /// something unique per sandbox. - #[serde(default = "default_sandbox_ssh_socket_path")] - pub sandbox_ssh_socket_path: String, - /// TTL for SSH session tokens, in seconds. 0 disables expiry. #[serde(default = "default_ssh_session_ttl_secs")] pub ssh_session_ttl_secs: u64, - /// Kubernetes secret name containing client TLS materials for sandbox pods. - /// When set, sandbox pods get this secret mounted so they can connect to - /// the server over mTLS. - #[serde(default)] - pub client_tls_secret_name: String, - - /// Host gateway IP for sandbox pod hostAliases. - /// When set, sandbox pods get hostAliases entries mapping - /// `host.docker.internal` and `host.openshell.internal` to this IP, - /// allowing them to reach services running on the Docker host. - #[serde(default)] - pub host_gateway_ip: String, - - /// Enable Kubernetes user namespace isolation (`hostUsers: false`) for - /// sandbox pods. When enabled, container UID 0 maps to an unprivileged - /// host UID and capabilities become namespaced. Requires Kubernetes 1.33+ - /// with user namespace support available (beta through 1.35, GA in 1.36+), - /// plus a supporting container runtime and Linux 5.12+. - #[serde(default)] - pub enable_user_namespaces: bool, - /// Browser-facing sandbox service routing configuration. #[serde(default)] pub service_routing: ServiceRoutingConfig, @@ -401,18 +336,7 @@ impl Config { oidc: None, database_url: String::new(), compute_drivers: vec![], - sandbox_namespace: default_sandbox_namespace(), - sandbox_image: default_sandbox_image(), - sandbox_image_pull_policy: String::new(), - grpc_endpoint: String::new(), - ssh_gateway_host: default_ssh_gateway_host(), - ssh_gateway_port: default_ssh_gateway_port(), - sandbox_ssh_port: default_sandbox_ssh_port(), - sandbox_ssh_socket_path: default_sandbox_ssh_socket_path(), ssh_session_ttl_secs: default_ssh_session_ttl_secs(), - client_tls_secret_name: String::new(), - host_gateway_ip: String::new(), - enable_user_namespaces: false, service_routing: ServiceRoutingConfig::default(), } } @@ -473,55 +397,6 @@ impl Config { self } - /// Create a new configuration with a sandbox namespace. - #[must_use] - pub fn with_sandbox_namespace(mut self, namespace: impl Into) -> Self { - self.sandbox_namespace = namespace.into(); - self - } - - /// Create a new configuration with a default sandbox image. - #[must_use] - pub fn with_sandbox_image(mut self, image: impl Into) -> Self { - self.sandbox_image = image.into(); - self - } - - /// Create a new configuration with a sandbox image pull policy. - #[must_use] - pub fn with_sandbox_image_pull_policy(mut self, policy: impl Into) -> Self { - self.sandbox_image_pull_policy = policy.into(); - self - } - - /// Create a new configuration with a gRPC endpoint for sandbox callback. - #[must_use] - pub fn with_grpc_endpoint(mut self, endpoint: impl Into) -> Self { - self.grpc_endpoint = endpoint.into(); - self - } - - /// Create a new configuration with the SSH gateway host. - #[must_use] - pub fn with_ssh_gateway_host(mut self, host: impl Into) -> Self { - self.ssh_gateway_host = host.into(); - self - } - - /// Create a new configuration with the SSH gateway port. - #[must_use] - pub const fn with_ssh_gateway_port(mut self, port: u16) -> Self { - self.ssh_gateway_port = port; - self - } - - /// Create a new configuration with the sandbox SSH port. - #[must_use] - pub const fn with_sandbox_ssh_port(mut self, port: u16) -> Self { - self.sandbox_ssh_port = port; - self - } - /// Create a new configuration with the SSH session TTL. #[must_use] pub const fn with_ssh_session_ttl_secs(mut self, secs: u64) -> Self { @@ -529,20 +404,6 @@ impl Config { self } - /// Set the Kubernetes secret name for sandbox client TLS materials. - #[must_use] - pub fn with_client_tls_secret_name(mut self, name: impl Into) -> Self { - self.client_tls_secret_name = name.into(); - self - } - - /// Set the host gateway IP for sandbox pod hostAliases. - #[must_use] - pub fn with_host_gateway_ip(mut self, ip: impl Into) -> Self { - self.host_gateway_ip = ip.into(); - self - } - /// Set the OIDC configuration for JWT-based authentication. #[must_use] pub fn with_oidc(mut self, oidc: OidcConfig) -> Self { @@ -647,30 +508,6 @@ fn default_log_level() -> String { "info".to_string() } -fn default_sandbox_namespace() -> String { - "default".to_string() -} - -fn default_sandbox_image() -> String { - format!("{}/base:latest", crate::image::DEFAULT_COMMUNITY_REGISTRY) -} - -fn default_ssh_gateway_host() -> String { - "127.0.0.1".to_string() -} - -const fn default_ssh_gateway_port() -> u16 { - DEFAULT_SERVER_PORT -} - -fn default_sandbox_ssh_socket_path() -> String { - "/run/openshell/ssh.sock".to_string() -} - -const fn default_sandbox_ssh_port() -> u16 { - DEFAULT_SSH_PORT -} - const fn default_ssh_session_ttl_secs() -> u64 { 86400 // 24 hours } diff --git a/crates/openshell-driver-docker/README.md b/crates/openshell-driver-docker/README.md index 20cfe6a0f..434e70d13 100644 --- a/crates/openshell-driver-docker/README.md +++ b/crates/openshell-driver-docker/README.md @@ -39,10 +39,10 @@ The agent child process does not retain these supervisor privileges. The Docker driver bind-mounts a host-side Linux `openshell-sandbox` binary into each sandbox container. Resolution order is: -1. `--docker-supervisor-bin` / `OPENSHELL_DOCKER_SUPERVISOR_BIN`. +1. `supervisor_bin` in `[openshell.drivers.docker]`. 2. A sibling `openshell-sandbox` next to the running `openshell-gateway` binary. 3. A local Linux cargo target build for the Docker daemon architecture. -4. `--docker-supervisor-image` / `OPENSHELL_DOCKER_SUPERVISOR_IMAGE`, or the +4. `supervisor_image` in `[openshell.drivers.docker]`, or the release-matched default supervisor image, extracting `/openshell-sandbox`. Release and Docker-image gateway builds bake the matching supervisor image tag diff --git a/crates/openshell-driver-docker/src/lib.rs b/crates/openshell-driver-docker/src/lib.rs index 28ca98cbf..30507422b 100644 --- a/crates/openshell-driver-docker/src/lib.rs +++ b/crates/openshell-driver-docker/src/lib.rs @@ -65,7 +65,7 @@ const DOCKER_NETWORK_DRIVER: &str = "bridge"; /// Default image holding the Linux `openshell-sandbox` binary. The gateway /// pulls this image and extracts the binary to a host-side cache when no -/// explicit `--docker-supervisor-bin` override or local build is available. +/// explicit `supervisor_bin` override or local build is available. const DEFAULT_DOCKER_SUPERVISOR_IMAGE_REPO: &str = "ghcr.io/nvidia/openshell/supervisor"; /// Path to the supervisor binary inside the `openshell/supervisor` image @@ -130,6 +130,18 @@ pub trait SupervisorReadiness: Send + Sync + 'static { #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] #[serde(default, deny_unknown_fields)] pub struct DockerComputeConfig { + /// Default OCI image for sandboxes. + pub default_image: String, + + /// Image pull policy for sandbox images. + pub image_pull_policy: String, + + /// Namespace label applied to Docker sandboxes. + pub sandbox_namespace: String, + + /// Gateway gRPC endpoint the sandbox connects back to. + pub grpc_endpoint: String, + /// Optional override for the Linux `openshell-sandbox` binary mounted into containers. pub supervisor_bin: Option, @@ -150,21 +162,40 @@ pub struct DockerComputeConfig { /// Docker bridge network that sandbox containers join. pub network_name: String, + + /// Host gateway IP used for sandbox host aliases. + pub host_gateway_ip: String, + + /// Unix socket path the in-container supervisor bridges relay traffic to. + pub ssh_socket_path: String, } impl Default for DockerComputeConfig { fn default() -> Self { Self { + default_image: default_sandbox_image(), + image_pull_policy: String::new(), + sandbox_namespace: "default".to_string(), + grpc_endpoint: String::new(), supervisor_bin: None, supervisor_image: None, guest_tls_ca: None, guest_tls_cert: None, guest_tls_key: None, network_name: DEFAULT_DOCKER_NETWORK_NAME.to_string(), + host_gateway_ip: String::new(), + ssh_socket_path: "/run/openshell/ssh.sock".to_string(), } } } +fn default_sandbox_image() -> String { + format!( + "{}/base:latest", + openshell_core::image::DEFAULT_COMMUNITY_REGISTRY + ) +} + #[derive(Debug, Clone, PartialEq, Eq)] pub(crate) struct DockerGuestTlsPaths { pub(crate) ca: PathBuf, @@ -221,7 +252,7 @@ impl DockerComputeDriver { docker_config: &DockerComputeConfig, supervisor_readiness: Arc, ) -> CoreResult { - if config.grpc_endpoint.trim().is_empty() { + if docker_config.grpc_endpoint.trim().is_empty() { return Err(Error::config( "grpc_endpoint is required when using the docker compute driver", )); @@ -247,28 +278,28 @@ impl DockerComputeDriver { } let network_name = docker_network_name(docker_config); let bridge_gateway_ip = ensure_bridge_network(&docker, &network_name).await?; - let host_gateway_ip = parse_optional_host_gateway_ip(&config.host_gateway_ip)?; + let host_gateway_ip = parse_optional_host_gateway_ip(&docker_config.host_gateway_ip)?; let gateway_route = docker_gateway_route(&info, bridge_gateway_ip, gateway_port, host_gateway_ip); let grpc_endpoint = docker_container_openshell_endpoint( - &config.grpc_endpoint, + &docker_config.grpc_endpoint, HOST_OPENSHELL_INTERNAL, gateway_port, ); let daemon_arch = normalize_docker_arch(version.arch.as_deref().unwrap_or_default()); let supervisor_bin = resolve_supervisor_bin(&docker, docker_config, &daemon_arch).await?; - let guest_tls = docker_guest_tls_paths(config, docker_config)?; + let guest_tls = docker_guest_tls_paths(docker_config)?; let driver = Self { docker: Arc::new(docker), config: DockerDriverRuntimeConfig { - default_image: config.sandbox_image.clone(), - image_pull_policy: config.sandbox_image_pull_policy.clone(), - sandbox_namespace: config.sandbox_namespace.clone(), + default_image: docker_config.default_image.clone(), + image_pull_policy: docker_config.image_pull_policy.clone(), + sandbox_namespace: docker_config.sandbox_namespace.clone(), grpc_endpoint, network_name, gateway_route, - ssh_socket_path: config.sandbox_ssh_socket_path.clone(), + ssh_socket_path: docker_config.ssh_socket_path.clone(), stop_timeout_secs: DEFAULT_STOP_TIMEOUT_SECS, log_level: config.log_level.clone(), supervisor_bin, @@ -708,12 +739,12 @@ impl DockerComputeDriver { "never" => match self.docker.inspect_image(image).await { Ok(_) => Ok(()), Err(err) if is_not_found_error(&err) => Err(Status::failed_precondition(format!( - "docker image '{image}' is not present locally and sandbox_image_pull_policy=Never" + "docker image '{image}' is not present locally and image_pull_policy=Never" ))), Err(err) => Err(internal_status("inspect Docker image", err)), }, other => Err(Status::failed_precondition(format!( - "unsupported docker sandbox_image_pull_policy '{other}'; expected Always, IfNotPresent, or Never", + "unsupported docker image_pull_policy '{other}'; expected Always, IfNotPresent, or Never", ))), } } @@ -1093,11 +1124,10 @@ fn parse_optional_host_gateway_ip(value: &str) -> CoreResult> { return Ok(None); } - trimmed.parse().map(Some).map_err(|err| { - Error::config(format!( - "invalid OPENSHELL_HOST_GATEWAY_IP value '{trimmed}': {err}" - )) - }) + trimmed + .parse() + .map(Some) + .map_err(|err| Error::config(format!("invalid host_gateway_ip value '{trimmed}': {err}"))) } fn docker_gateway_route( @@ -1636,7 +1666,7 @@ pub(crate) async fn resolve_supervisor_bin( docker_config: &DockerComputeConfig, daemon_arch: &str, ) -> CoreResult { - // Tier 1: explicit --docker-supervisor-bin / OPENSHELL_DOCKER_SUPERVISOR_BIN. + // Tier 1: explicit supervisor_bin in [openshell.drivers.docker]. if let Some(path) = docker_config.supervisor_bin.clone() { let path = canonicalize_existing_file(&path, "docker supervisor binary")?; validate_linux_elf_binary(&path)?; @@ -1980,18 +2010,17 @@ pub(crate) fn validate_linux_elf_binary(path: &Path) -> CoreResult<()> { } pub(crate) fn docker_guest_tls_paths( - config: &Config, docker_config: &DockerComputeConfig, ) -> CoreResult> { let tls_flags_provided = docker_config.guest_tls_ca.is_some() || docker_config.guest_tls_cert.is_some() || docker_config.guest_tls_key.is_some(); - if !config.grpc_endpoint.starts_with("https://") { + if !docker_config.grpc_endpoint.starts_with("https://") { if tls_flags_provided { return Err(Error::config(format!( - "--docker-tls-ca/--docker-tls-cert/--docker-tls-key were provided but OPENSHELL_GRPC_ENDPOINT is '{}'; TLS materials require an https:// endpoint", - config.grpc_endpoint, + "guest_tls_ca/guest_tls_cert/guest_tls_key were provided but grpc_endpoint is '{}'; TLS materials require an https:// endpoint", + docker_config.grpc_endpoint, ))); } return Ok(None); @@ -2004,23 +2033,23 @@ pub(crate) fn docker_guest_tls_paths( ]; if provided.iter().all(Option::is_none) { return Err(Error::config( - "docker compute driver requires --docker-tls-ca, --docker-tls-cert, and --docker-tls-key when OPENSHELL_GRPC_ENDPOINT uses https://", + "docker compute driver requires guest_tls_ca, guest_tls_cert, and guest_tls_key when grpc_endpoint uses https://", )); } let Some(ca) = docker_config.guest_tls_ca.clone() else { return Err(Error::config( - "--docker-tls-ca is required when Docker sandbox TLS materials are configured", + "guest_tls_ca is required when Docker sandbox TLS materials are configured", )); }; let Some(cert) = docker_config.guest_tls_cert.clone() else { return Err(Error::config( - "--docker-tls-cert is required when Docker sandbox TLS materials are configured", + "guest_tls_cert is required when Docker sandbox TLS materials are configured", )); }; let Some(key) = docker_config.guest_tls_key.clone() else { return Err(Error::config( - "--docker-tls-key is required when Docker sandbox TLS materials are configured", + "guest_tls_key is required when Docker sandbox TLS materials are configured", )); }; diff --git a/crates/openshell-driver-docker/src/tests.rs b/crates/openshell-driver-docker/src/tests.rs index b3ea3b4a8..62a6b89e4 100644 --- a/crates/openshell-driver-docker/src/tests.rs +++ b/crates/openshell-driver-docker/src/tests.rs @@ -327,7 +327,7 @@ fn parse_optional_host_gateway_ip_rejects_invalid_values() { parse_optional_host_gateway_ip("not-an-ip") .unwrap_err() .to_string() - .contains("OPENSHELL_HOST_GATEWAY_IP") + .contains("host_gateway_ip") ); } @@ -708,20 +708,17 @@ fn validate_linux_elf_binary_rejects_non_elf_files() { #[test] fn docker_guest_tls_paths_require_all_files_for_https() { - let config = Config::new(None).with_grpc_endpoint("https://localhost:8443"); let tempdir = TempDir::new().unwrap(); let ca = tempdir.path().join("ca.crt"); fs::write(&ca, b"ca").unwrap(); - let err = docker_guest_tls_paths( - &config, - &DockerComputeConfig { - guest_tls_ca: Some(ca), - ..Default::default() - }, - ) + let err = docker_guest_tls_paths(&DockerComputeConfig { + grpc_endpoint: "https://localhost:8443".to_string(), + guest_tls_ca: Some(ca), + ..Default::default() + }) .unwrap_err(); - assert!(err.to_string().contains("--docker-tls-cert")); + assert!(err.to_string().contains("guest_tls_cert")); } #[test] @@ -798,26 +795,26 @@ fn trim_container_name_tail_strips_separators() { #[test] fn docker_guest_tls_paths_rejects_tls_flags_without_https() { - let config = Config::new(None).with_grpc_endpoint("http://localhost:8080"); let tempdir = TempDir::new().unwrap(); let ca = tempdir.path().join("ca.crt"); fs::write(&ca, b"ca").unwrap(); - let err = docker_guest_tls_paths( - &config, - &DockerComputeConfig { - guest_tls_ca: Some(ca), - ..Default::default() - }, - ) + let err = docker_guest_tls_paths(&DockerComputeConfig { + grpc_endpoint: "http://localhost:8080".to_string(), + guest_tls_ca: Some(ca), + ..Default::default() + }) .unwrap_err(); assert!(err.to_string().contains("https://")); } #[test] fn docker_guest_tls_paths_allows_plain_http_without_tls_flags() { - let config = Config::new(None).with_grpc_endpoint("http://localhost:8080"); - let result = docker_guest_tls_paths(&config, &DockerComputeConfig::default()).unwrap(); + let result = docker_guest_tls_paths(&DockerComputeConfig { + grpc_endpoint: "http://localhost:8080".to_string(), + ..Default::default() + }) + .unwrap(); assert!(result.is_none()); } diff --git a/crates/openshell-driver-kubernetes/src/config.rs b/crates/openshell-driver-kubernetes/src/config.rs index a786098a3..28c04deb3 100644 --- a/crates/openshell-driver-kubernetes/src/config.rs +++ b/crates/openshell-driver-kubernetes/src/config.rs @@ -70,7 +70,7 @@ impl Default for KubernetesComputeConfig { fn default() -> Self { Self { namespace: DEFAULT_K8S_NAMESPACE.to_string(), - default_image: String::new(), + default_image: default_sandbox_image(), // Default empty so the gateway omits `imagePullPolicy` from pod // specs and Kubernetes applies its own default (Always for `latest`, // IfNotPresent otherwise). `DEFAULT_IMAGE_PULL_POLICY` ("missing") @@ -87,3 +87,10 @@ impl Default for KubernetesComputeConfig { } } } + +fn default_sandbox_image() -> String { + format!( + "{}/base:latest", + openshell_core::image::DEFAULT_COMMUNITY_REGISTRY + ) +} diff --git a/crates/openshell-driver-podman/README.md b/crates/openshell-driver-podman/README.md index 51bf5f4e3..1906bd912 100644 --- a/crates/openshell-driver-podman/README.md +++ b/crates/openshell-driver-podman/README.md @@ -291,8 +291,7 @@ Podman resources after out-of-band container removal or label drift. | `OPENSHELL_GRPC_ENDPOINT` | `--grpc-endpoint` | Auto-detected via `host.containers.internal` | Gateway gRPC endpoint for sandbox callbacks. | | `OPENSHELL_GATEWAY_PORT` | `--gateway-port` | `8080` | Gateway port used for endpoint auto-detection by the standalone binary. | | `OPENSHELL_NETWORK_NAME` | `--network-name` | `openshell` | Podman bridge network name. | -| `OPENSHELL_SANDBOX_SSH_PORT` | `--sandbox-ssh-port` | `2222` | SSH compatibility port inside the container. | -| `OPENSHELL_SANDBOX_SSH_SOCKET_PATH` | `--sandbox-ssh-socket-path` | `/run/openshell/ssh.sock` | Standalone driver only: supervisor Unix socket path in `PodmanComputeConfig`. In-gateway Podman uses server `config.sandbox_ssh_socket_path`. | +| `OPENSHELL_SANDBOX_SSH_SOCKET_PATH` | `--sandbox-ssh-socket-path` | `/run/openshell/ssh.sock` | Supervisor Unix socket path in `PodmanComputeConfig`. | | `OPENSHELL_STOP_TIMEOUT` | `--stop-timeout` | `10` | Container stop timeout in seconds. | | `OPENSHELL_SUPERVISOR_IMAGE` | `--supervisor-image` | `openshell/supervisor:latest` through the gateway, required standalone | OCI image containing the supervisor binary. | | `OPENSHELL_PODMAN_TLS_CA` | `--podman-tls-ca` | unset | Host path to the CA certificate mounted for sandbox mTLS. | diff --git a/crates/openshell-driver-podman/src/config.rs b/crates/openshell-driver-podman/src/config.rs index c64dccc5a..c78c2b12f 100644 --- a/crates/openshell-driver-podman/src/config.rs +++ b/crates/openshell-driver-podman/src/config.rs @@ -1,9 +1,7 @@ // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -use openshell_core::config::{ - DEFAULT_SSH_PORT, DEFAULT_STOP_TIMEOUT_SECS, DEFAULT_SUPERVISOR_IMAGE, -}; +use openshell_core::config::{DEFAULT_STOP_TIMEOUT_SECS, DEFAULT_SUPERVISOR_IMAGE}; use std::path::PathBuf; use std::str::FromStr; @@ -91,8 +89,6 @@ pub struct PodmanComputeConfig { /// Name of the Podman bridge network. /// Created automatically if it does not exist. pub network_name: String, - /// SSH port inside the container. - pub ssh_port: u16, /// Container stop timeout in seconds (SIGTERM → SIGKILL). pub stop_timeout_secs: u32, /// OCI image containing the openshell-sandbox supervisor binary. @@ -184,13 +180,12 @@ impl Default for PodmanComputeConfig { fn default() -> Self { Self { socket_path: Self::default_socket_path(), - default_image: String::new(), + default_image: default_sandbox_image(), image_pull_policy: ImagePullPolicy::default(), grpc_endpoint: String::new(), gateway_port: openshell_core::config::DEFAULT_SERVER_PORT, sandbox_ssh_socket_path: "/run/openshell/ssh.sock".to_string(), network_name: DEFAULT_NETWORK_NAME.to_string(), - ssh_port: DEFAULT_SSH_PORT, stop_timeout_secs: DEFAULT_STOP_TIMEOUT_SECS, supervisor_image: DEFAULT_SUPERVISOR_IMAGE.to_string(), guest_tls_ca: None, @@ -200,6 +195,13 @@ impl Default for PodmanComputeConfig { } } +fn default_sandbox_image() -> String { + format!( + "{}/base:latest", + openshell_core::image::DEFAULT_COMMUNITY_REGISTRY + ) +} + impl std::fmt::Debug for PodmanComputeConfig { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("PodmanComputeConfig") @@ -210,7 +212,6 @@ impl std::fmt::Debug for PodmanComputeConfig { .field("gateway_port", &self.gateway_port) .field("sandbox_ssh_socket_path", &self.sandbox_ssh_socket_path) .field("network_name", &self.network_name) - .field("ssh_port", &self.ssh_port) .field("stop_timeout_secs", &self.stop_timeout_secs) .field("supervisor_image", &self.supervisor_image) .field("guest_tls_ca", &self.guest_tls_ca) diff --git a/crates/openshell-driver-podman/src/container.rs b/crates/openshell-driver-podman/src/container.rs index cd1baf7ec..1cb58e338 100644 --- a/crates/openshell-driver-podman/src/container.rs +++ b/crates/openshell-driver-podman/src/container.rs @@ -490,7 +490,8 @@ pub fn build_container_spec(sandbox: &DriverSandbox, config: &PodmanComputeConfi "CMD-SHELL".into(), format!( "test -e /var/run/openshell-ssh-ready || test -S {} || ss -tlnp | grep -q :{}", - config.sandbox_ssh_socket_path, config.ssh_port + config.sandbox_ssh_socket_path, + openshell_core::config::DEFAULT_SSH_PORT ), ], interval: 3_000_000_000, @@ -569,7 +570,7 @@ pub fn build_container_spec(sandbox: &DriverSandbox, config: &PodmanComputeConfi // the host, so we must use the published host port on 127.0.0.1 instead. portmappings: vec![PortMapping { host_port: 0, - container_port: config.ssh_port, + container_port: openshell_core::config::DEFAULT_SSH_PORT, protocol: "tcp".into(), }], }; diff --git a/crates/openshell-driver-podman/src/driver.rs b/crates/openshell-driver-podman/src/driver.rs index 04c360bb7..a2a1e15d6 100644 --- a/crates/openshell-driver-podman/src/driver.rs +++ b/crates/openshell-driver-podman/src/driver.rs @@ -256,7 +256,7 @@ impl PodmanComputeDriver { let image = container::resolve_image(sandbox, &self.config); if image.is_empty() { return Err(ComputeDriverError::Precondition( - "no sandbox image configured: set --sandbox-image on the server \ + "no sandbox image configured: set default_image in [openshell.drivers.podman] \ or provide an image in the sandbox template" .to_string(), )); diff --git a/crates/openshell-driver-podman/src/main.rs b/crates/openshell-driver-podman/src/main.rs index 9670eb9b9..5a0227ef6 100644 --- a/crates/openshell-driver-podman/src/main.rs +++ b/crates/openshell-driver-podman/src/main.rs @@ -9,7 +9,7 @@ use tracing::info; use tracing_subscriber::EnvFilter; use openshell_core::VERSION; -use openshell_core::config::{DEFAULT_SSH_PORT, DEFAULT_STOP_TIMEOUT_SECS}; +use openshell_core::config::DEFAULT_STOP_TIMEOUT_SECS; use openshell_core::proto::compute::v1::compute_driver_server::ComputeDriverServer; use openshell_driver_podman::config::{DEFAULT_NETWORK_NAME, ImagePullPolicy}; use openshell_driver_podman::{ComputeDriverService, PodmanComputeConfig, PodmanComputeDriver}; @@ -67,9 +67,6 @@ struct Args { #[arg(long, env = "OPENSHELL_NETWORK_NAME", default_value = DEFAULT_NETWORK_NAME)] network_name: String, - #[arg(long, env = "OPENSHELL_SANDBOX_SSH_PORT", default_value_t = DEFAULT_SSH_PORT)] - sandbox_ssh_port: u16, - /// Container stop timeout in seconds (SIGTERM → SIGKILL). #[arg(long, env = "OPENSHELL_STOP_TIMEOUT", default_value_t = DEFAULT_STOP_TIMEOUT_SECS)] stop_timeout: u32, @@ -112,7 +109,6 @@ async fn main() -> Result<()> { gateway_port: args.gateway_port, sandbox_ssh_socket_path: args.sandbox_ssh_socket_path, network_name: args.network_name, - ssh_port: args.sandbox_ssh_port, stop_timeout_secs: args.stop_timeout, supervisor_image: args.supervisor_image, guest_tls_ca: args.podman_tls_ca, diff --git a/crates/openshell-driver-vm/README.md b/crates/openshell-driver-vm/README.md index 49f2ef005..ad2944a77 100644 --- a/crates/openshell-driver-vm/README.md +++ b/crates/openshell-driver-vm/README.md @@ -43,7 +43,7 @@ By default `mise run gateway:vm`: - Registers the CLI gateway `vm-dev` by writing `~/.config/openshell/gateways/vm-dev/metadata.json`. It does not modify the workspace `.env`. - Persists the gateway SQLite DB under `.cache/gateway-vm/gateway.db`. - Places the VM driver state (per-sandbox rootfs plus `run/compute-driver.sock`) under `/tmp/openshell-vm-driver-$USER-vm-dev/` so the AF_UNIX socket path stays under macOS `SUN_LEN`. -- Passes `--driver-dir $PWD/target/debug` so the freshly built `openshell-driver-vm` is used instead of an older installed copy from `~/.local/libexec/openshell`, `/usr/libexec/openshell`, or `/usr/local/libexec`. +- Writes `.cache/gateway-vm/gateway.toml` with `[openshell.drivers.vm].driver_dir = "$PWD/target/debug"` so the freshly built `openshell-driver-vm` is used instead of an older installed copy from `~/.local/libexec/openshell`, `/usr/libexec/openshell`, or `/usr/local/libexec`. For GPU passthrough (VFIO), pass `-- --gpu` and run with root privileges: @@ -104,36 +104,36 @@ codesign \ # 4. Start the gateway with the VM driver mkdir -p /tmp/openshell-vm-driver-$USER-vm-dev .cache/gateway-vm +cat > .cache/gateway-vm/gateway.toml < \ - --grpc-endpoint http://host.containers.internal:18081 \ - --port 18081 \ - --vm-driver-state-dir /tmp/openshell-vm-driver-$USER-vm-dev + --port 18081 ``` -The gateway resolves `openshell-driver-vm` in this order: `--driver-dir`, conventional install locations (`~/.local/libexec/openshell`, `/usr/libexec/openshell`, `/usr/local/libexec/openshell`, `/usr/local/libexec`), then a sibling of the gateway binary. +The gateway resolves `openshell-driver-vm` in this order: `[openshell.drivers.vm].driver_dir`, conventional install locations (`~/.local/libexec/openshell`, `/usr/libexec/openshell`, `/usr/local/libexec/openshell`, `/usr/local/libexec`), then a sibling of the gateway binary. -## Flags +## Gateway And Driver Configuration -| Flag | Env var | Default | Purpose | -|---|---|---|---| -| `--drivers vm` | `OPENSHELL_DRIVERS` | `kubernetes` | Select the VM compute driver. | -| `--grpc-endpoint URL` | `OPENSHELL_GRPC_ENDPOINT` | — | Required. URL the sandbox guest dials to reach the gateway. Use `http://host.containers.internal:` (or `host.docker.internal` / `host.openshell.internal`) so traffic flows through gvproxy's host-loopback NAT (HostIP `192.168.127.254` → host `127.0.0.1`). Loopback URLs like `http://127.0.0.1:` are rewritten automatically by the driver. The bare gateway IP (`192.168.127.1`) only carries gvproxy's own services and will not reach host-bound ports. | -| `--vm-driver-state-dir DIR` | `OPENSHELL_VM_DRIVER_STATE_DIR` | `target/openshell-vm-driver` | Per-sandbox rootfs, console logs, image cache, and private `run/compute-driver.sock` UDS. | -| `--driver-dir DIR` | `OPENSHELL_DRIVER_DIR` | unset | Override the directory searched for `openshell-driver-vm`. | -| `--vm-driver-vcpus N` | `OPENSHELL_VM_DRIVER_VCPUS` | `2` | vCPUs per sandbox. | -| `--vm-driver-mem-mib N` | `OPENSHELL_VM_DRIVER_MEM_MIB` | `2048` | Memory per sandbox, in MiB. | -| `--vm-krun-log-level N` | `OPENSHELL_VM_KRUN_LOG_LEVEL` | `1` | libkrun verbosity (0–5). | -| `--vm-tls-ca PATH` | `OPENSHELL_VM_TLS_CA` | — | CA cert for the guest's mTLS client bundle. Required when `--grpc-endpoint` uses `https://`. | -| `--vm-tls-cert PATH` | `OPENSHELL_VM_TLS_CERT` | — | Guest client certificate. | -| `--vm-tls-key PATH` | `OPENSHELL_VM_TLS_KEY` | — | Guest client private key. | +Select the VM driver with `--drivers vm` or `OPENSHELL_DRIVERS=vm`. Configure VM-specific settings in `[openshell.drivers.vm]`: `grpc_endpoint`, `state_dir`, `driver_dir`, `vcpus`, `mem_mib`, `krun_log_level`, and `guest_tls_*`. -See [`openshell-gateway --help`](../openshell-server/src/cli.rs) for the full flag surface shared with the Kubernetes driver. +See [`openshell-gateway --help`](../openshell-server/src/cli.rs) for the gateway process flag surface. ## Verifying the gateway diff --git a/crates/openshell-server/src/cli.rs b/crates/openshell-server/src/cli.rs index 79420b062..b3b2c6399 100644 --- a/crates/openshell-server/src/cli.rs +++ b/crates/openshell-server/src/cli.rs @@ -7,7 +7,7 @@ use clap::parser::ValueSource; use clap::{ArgAction, ArgMatches, Command, CommandFactory, FromArgMatches, Parser}; use miette::{IntoDiagnostic, Result}; use openshell_core::ComputeDriverKind; -use openshell_core::config::{DEFAULT_DOCKER_NETWORK_NAME, DEFAULT_SERVER_PORT, DEFAULT_SSH_PORT}; +use openshell_core::config::DEFAULT_SERVER_PORT; use std::net::{IpAddr, SocketAddr}; use std::path::PathBuf; use tracing::info; @@ -46,8 +46,8 @@ struct RunArgs { /// Path to a TOML configuration file (see RFC 0003). /// /// When set, gateway-wide settings and per-driver tables are read from - /// the file. Command-line flags and `OPENSHELL_*` environment variables - /// continue to take precedence over file values. + /// the file. Gateway command-line flags and `OPENSHELL_*` environment + /// variables continue to take precedence over gateway file values. #[arg(long, env = "OPENSHELL_GATEWAY_CONFIG")] config: Option, @@ -110,140 +110,6 @@ struct RunArgs { )] drivers: Vec, - /// Kubernetes namespace for sandboxes. - #[arg(long, env = "OPENSHELL_SANDBOX_NAMESPACE", default_value = "default")] - sandbox_namespace: String, - - /// Default container image for sandboxes. - #[arg(long, env = "OPENSHELL_SANDBOX_IMAGE")] - sandbox_image: Option, - - /// Kubernetes `imagePullPolicy` for sandbox pods (Always, `IfNotPresent`, Never). - #[arg(long, env = "OPENSHELL_SANDBOX_IMAGE_PULL_POLICY")] - sandbox_image_pull_policy: Option, - - /// gRPC endpoint that sandboxes use to call back into the gateway. - /// Must be reachable from wherever the sandbox runs (Kubernetes pod, - /// Docker/Podman container, or VM), and is applied to every compute - /// driver. - #[arg(long, env = "OPENSHELL_GRPC_ENDPOINT")] - grpc_endpoint: Option, - - /// Public host for the SSH gateway. - #[arg(long, env = "OPENSHELL_SSH_GATEWAY_HOST", default_value = "127.0.0.1")] - ssh_gateway_host: String, - - /// Public port for the SSH gateway. - #[arg(long, env = "OPENSHELL_SSH_GATEWAY_PORT", default_value_t = DEFAULT_SERVER_PORT)] - ssh_gateway_port: u16, - - /// SSH port inside sandbox pods. - #[arg(long, env = "OPENSHELL_SANDBOX_SSH_PORT", default_value_t = DEFAULT_SSH_PORT)] - sandbox_ssh_port: u16, - - /// Kubernetes secret name containing client TLS materials for sandbox pods. - #[arg(long, env = "OPENSHELL_CLIENT_TLS_SECRET_NAME")] - client_tls_secret_name: Option, - - /// Host gateway IP for sandbox pod hostAliases. - /// When set, sandbox pods get hostAliases entries mapping - /// host.docker.internal and host.openshell.internal to this IP. - #[arg(long, env = "OPENSHELL_HOST_GATEWAY_IP")] - host_gateway_ip: Option, - - /// Working directory for VM driver sandbox state. - #[arg( - long, - env = "OPENSHELL_VM_DRIVER_STATE_DIR", - default_value_os_t = VmComputeConfig::default_state_dir() - )] - vm_driver_state_dir: PathBuf, - - /// Directory searched for compute-driver binaries (e.g. - /// `openshell-driver-vm`) when an explicit binary override isn't - /// configured. When unset, the gateway searches - /// `$HOME/.local/libexec/openshell`, `/usr/libexec/openshell`, - /// `/usr/local/libexec/openshell`, `/usr/local/libexec`, then a sibling - /// of the gateway binary. - #[arg(long, env = "OPENSHELL_DRIVER_DIR")] - driver_dir: Option, - - /// libkrun log level used by the VM helper. - #[arg( - long, - env = "OPENSHELL_VM_KRUN_LOG_LEVEL", - default_value_t = VmComputeConfig::default_krun_log_level() - )] - vm_krun_log_level: u32, - - /// Default vCPU count for VM sandboxes. - #[arg( - long, - env = "OPENSHELL_VM_DRIVER_VCPUS", - default_value_t = VmComputeConfig::default_vcpus() - )] - vm_vcpus: u8, - - /// Default memory allocation for VM sandboxes, in MiB. - #[arg( - long, - env = "OPENSHELL_VM_DRIVER_MEM_MIB", - default_value_t = VmComputeConfig::default_mem_mib() - )] - vm_mem_mib: u32, - - /// CA certificate installed into VM sandboxes for gateway mTLS. - #[arg(long, env = "OPENSHELL_VM_TLS_CA")] - vm_tls_ca: Option, - - /// Client certificate installed into VM sandboxes for gateway mTLS. - #[arg(long, env = "OPENSHELL_VM_TLS_CERT")] - vm_tls_cert: Option, - - /// Client private key installed into VM sandboxes for gateway mTLS. - #[arg(long, env = "OPENSHELL_VM_TLS_KEY")] - vm_tls_key: Option, - - /// Linux `openshell-sandbox` binary bind-mounted into Docker sandboxes. - /// - /// When unset the gateway falls back to (in order) a sibling - /// `openshell-sandbox` next to the gateway binary, a local cargo build, - /// or extracting the binary from `--docker-supervisor-image`. - #[arg(long, env = "OPENSHELL_DOCKER_SUPERVISOR_BIN")] - docker_supervisor_bin: Option, - - /// Image the Docker driver pulls to extract the Linux - /// `openshell-sandbox` binary when no explicit `--docker-supervisor-bin` - /// override or local build is available. Defaults to - /// `ghcr.io/nvidia/openshell/supervisor:`. - #[arg(long, env = "OPENSHELL_DOCKER_SUPERVISOR_IMAGE")] - docker_supervisor_image: Option, - - /// CA certificate bind-mounted into Docker sandboxes for gateway mTLS. - #[arg(long, env = "OPENSHELL_DOCKER_TLS_CA")] - docker_tls_ca: Option, - - /// Client certificate bind-mounted into Docker sandboxes for gateway mTLS. - #[arg(long, env = "OPENSHELL_DOCKER_TLS_CERT")] - docker_tls_cert: Option, - - /// Client private key bind-mounted into Docker sandboxes for gateway mTLS. - #[arg(long, env = "OPENSHELL_DOCKER_TLS_KEY")] - docker_tls_key: Option, - - /// Docker bridge network used for sandbox containers. - #[arg( - long, - env = "OPENSHELL_DOCKER_NETWORK_NAME", - default_value = DEFAULT_DOCKER_NETWORK_NAME - )] - docker_network_name: String, - - /// Enable Kubernetes user namespace isolation (hostUsers: false) for - /// sandbox pods. - #[arg(long, env = "OPENSHELL_ENABLE_USER_NAMESPACES")] - enable_user_namespaces: bool, - /// Disable TLS entirely — listen on plaintext HTTP. /// Use this when the gateway sits behind a reverse proxy or tunnel /// (e.g. Cloudflare Tunnel) that terminates TLS at the edge. @@ -446,10 +312,6 @@ async fn run_from_args(mut args: RunArgs, matches: ArgMatches) -> Result<()> { config = config .with_database_url(db_url) .with_compute_drivers(args.drivers.clone()) - .with_sandbox_namespace(args.sandbox_namespace.clone()) - .with_ssh_gateway_host(args.ssh_gateway_host.clone()) - .with_ssh_gateway_port(args.ssh_gateway_port) - .with_sandbox_ssh_port(args.sandbox_ssh_port) .with_server_sans(args.server_sans.clone()) .with_loopback_service_http(args.enable_loopback_service_http); @@ -460,26 +322,6 @@ async fn run_from_args(mut args: RunArgs, matches: ArgMatches) -> Result<()> { config = config.with_ssh_session_ttl_secs(ttl); } - if let Some(image) = args.sandbox_image.clone() { - config = config.with_sandbox_image(image); - } - - if let Some(policy) = args.sandbox_image_pull_policy.clone() { - config = config.with_sandbox_image_pull_policy(policy); - } - - if let Some(endpoint) = args.grpc_endpoint.clone() { - config = config.with_grpc_endpoint(endpoint); - } - - if let Some(name) = args.client_tls_secret_name.clone() { - config = config.with_client_tls_secret_name(name); - } - - if let Some(ip) = args.host_gateway_ip.clone() { - config = config.with_host_gateway_ip(ip); - } - if let Some(issuer) = args.oidc_issuer.clone() { config = config.with_oidc(openshell_core::OidcConfig { issuer, @@ -492,10 +334,8 @@ async fn run_from_args(mut args: RunArgs, matches: ArgMatches) -> Result<()> { }); } - config.enable_user_namespaces = args.enable_user_namespaces; - - let vm_config = build_vm_config(&args, &matches, &config, file.as_ref())?; - let docker_config = build_docker_config(&args, &matches, file.as_ref())?; + let vm_config = build_vm_config(file.as_ref())?; + let docker_config = build_docker_config(file.as_ref())?; if args.disable_tls { info!("TLS disabled — listening on plaintext HTTP"); @@ -592,49 +432,6 @@ fn merge_file_into_args(args: &mut RunArgs, file: &GatewayFileSection, matches: { args.drivers.clone_from(drivers); } - if let Some(ns) = &file.sandbox_namespace - && arg_defaulted(matches, "sandbox_namespace") - { - args.sandbox_namespace.clone_from(ns); - } - if let Some(port) = file.sandbox_ssh_port - && arg_defaulted(matches, "sandbox_ssh_port") - { - args.sandbox_ssh_port = port; - } - if let Some(host) = &file.ssh_gateway_host - && arg_defaulted(matches, "ssh_gateway_host") - { - args.ssh_gateway_host.clone_from(host); - } - if let Some(port) = file.ssh_gateway_port - && arg_defaulted(matches, "ssh_gateway_port") - { - args.ssh_gateway_port = port; - } - if let Some(image) = &file.default_image - && args.sandbox_image.is_none() - && arg_defaulted(matches, "sandbox_image") - { - args.sandbox_image = Some(image.clone()); - } - if let Some(secret) = &file.client_tls_secret_name - && args.client_tls_secret_name.is_none() - && arg_defaulted(matches, "client_tls_secret_name") - { - args.client_tls_secret_name = Some(secret.clone()); - } - if let Some(ip) = &file.host_gateway_ip - && args.host_gateway_ip.is_none() - && arg_defaulted(matches, "host_gateway_ip") - { - args.host_gateway_ip = Some(ip.clone()); - } - if let Some(enabled) = file.enable_user_namespaces - && arg_defaulted(matches, "enable_user_namespaces") - { - args.enable_user_namespaces = enabled; - } if let Some(sans) = &file.server_sans && args.server_sans.is_empty() && arg_defaulted(matches, "server_sans") @@ -692,14 +489,9 @@ fn merge_file_into_args(args: &mut RunArgs, file: &GatewayFileSection, matches: } } -/// Build [`VmComputeConfig`] by overlaying CLI args on top of the -/// `[openshell.drivers.vm]` table inherited from `[openshell.gateway]`. -fn build_vm_config( - args: &RunArgs, - matches: &ArgMatches, - config: &openshell_core::Config, - file: Option<&ConfigFile>, -) -> Result { +/// Build [`VmComputeConfig`] from the `[openshell.drivers.vm]` table +/// inherited from `[openshell.gateway]`. +fn build_vm_config(file: Option<&ConfigFile>) -> Result { let mut cfg = if let Some(file) = file { let merged = config_file::driver_table( ComputeDriverKind::Vm, @@ -713,80 +505,26 @@ fn build_vm_config( VmComputeConfig::default() }; - // CLI/env overrides — and `state_dir` is also pulled from RunArgs when the - // file did not set it, so the gateway always has a working directory. - if !arg_defaulted(matches, "vm_driver_state_dir") || cfg.state_dir.as_os_str().is_empty() { - cfg.state_dir.clone_from(&args.vm_driver_state_dir); - } - if !arg_defaulted(matches, "driver_dir") || cfg.driver_dir.is_none() { - cfg.driver_dir.clone_from(&args.driver_dir); - } - if !arg_defaulted(matches, "vm_krun_log_level") { - cfg.krun_log_level = args.vm_krun_log_level; - } - if !arg_defaulted(matches, "vm_vcpus") { - cfg.vcpus = args.vm_vcpus; - } - if !arg_defaulted(matches, "vm_mem_mib") { - cfg.mem_mib = args.vm_mem_mib; - } - if let Some(p) = args.vm_tls_ca.clone() { - cfg.guest_tls_ca = Some(p); - } - if let Some(p) = args.vm_tls_cert.clone() { - cfg.guest_tls_cert = Some(p); - } - if let Some(p) = args.vm_tls_key.clone() { - cfg.guest_tls_key = Some(p); - } - // Fall through: image inherited from gateway-wide `sandbox_image` when - // the merged table did not supply `default_image`. - if cfg.default_image.is_empty() { - cfg.default_image.clone_from(&config.sandbox_image); + if cfg.state_dir.as_os_str().is_empty() { + cfg.state_dir = VmComputeConfig::default_state_dir(); } Ok(cfg) } /// Build [`DockerComputeConfig`] using the same inheritance pattern as /// [`build_vm_config`]. -fn build_docker_config( - args: &RunArgs, - matches: &ArgMatches, - file: Option<&ConfigFile>, -) -> Result { - let mut cfg = if let Some(file) = file { +fn build_docker_config(file: Option<&ConfigFile>) -> Result { + if let Some(file) = file { let merged = config_file::driver_table( ComputeDriverKind::Docker, &file.openshell.gateway, file.openshell.drivers.get("docker"), ); - merged + return merged .try_into::() - .map_err(|e| miette::miette!("invalid [openshell.drivers.docker] table: {e}"))? - } else { - DockerComputeConfig::default() - }; - - if args.docker_supervisor_bin.is_some() { - cfg.supervisor_bin.clone_from(&args.docker_supervisor_bin); - } - if args.docker_supervisor_image.is_some() { - cfg.supervisor_image - .clone_from(&args.docker_supervisor_image); - } - if args.docker_tls_ca.is_some() { - cfg.guest_tls_ca.clone_from(&args.docker_tls_ca); - } - if args.docker_tls_cert.is_some() { - cfg.guest_tls_cert.clone_from(&args.docker_tls_cert); + .map_err(|e| miette::miette!("invalid [openshell.drivers.docker] table: {e}")); } - if args.docker_tls_key.is_some() { - cfg.guest_tls_key.clone_from(&args.docker_tls_key); - } - if !arg_defaulted(matches, "docker_network_name") { - cfg.network_name.clone_from(&args.docker_network_name); - } - Ok(cfg) + Ok(DockerComputeConfig::default()) } #[cfg(test)] @@ -946,6 +684,42 @@ mod tests { assert_eq!(cli.run.server_sans, vec!["*.apps.example.com".to_string()]); } + #[test] + fn command_rejects_removed_driver_flags() { + let err = command() + .try_get_matches_from([ + "openshell-gateway", + "--db-url", + "sqlite::memory:", + "--sandbox-image", + "example/sandbox:latest", + ]) + .expect_err("driver implementation flags should not be accepted"); + + assert_eq!(err.kind(), clap::error::ErrorKind::UnknownArgument); + } + + #[test] + fn command_rejects_removed_ssh_endpoint_flags() { + for flag in [ + "--ssh-gateway-host", + "--ssh-gateway-port", + "--sandbox-ssh-port", + ] { + let err = command() + .try_get_matches_from([ + "openshell-gateway", + "--db-url", + "sqlite::memory:", + flag, + "x", + ]) + .expect_err("SSH endpoint flags should not be accepted"); + + assert_eq!(err.kind(), clap::error::ErrorKind::UnknownArgument); + } + } + #[test] fn generate_certs_subcommand_parses_without_db_url() { let _lock = ENV_LOCK diff --git a/crates/openshell-server/src/compute/vm.rs b/crates/openshell-server/src/compute/vm.rs index 7dc9ee994..14e518e7b 100644 --- a/crates/openshell-server/src/compute/vm.rs +++ b/crates/openshell-server/src/compute/vm.rs @@ -73,6 +73,9 @@ pub struct VmComputeConfig { /// Default sandbox image the driver should use when a request omits one. pub default_image: String, + /// Gateway gRPC endpoint the sandbox guest connects back to. + pub grpc_endpoint: String, + /// libkrun log level used by the VM driver helper. pub krun_log_level: u32, @@ -135,7 +138,8 @@ impl Default for VmComputeConfig { Self { state_dir: Self::default_state_dir(), driver_dir: None, - default_image: String::new(), + default_image: default_sandbox_image(), + grpc_endpoint: String::new(), krun_log_level: Self::default_krun_log_level(), vcpus: Self::default_vcpus(), mem_mib: Self::default_mem_mib(), @@ -146,6 +150,13 @@ impl Default for VmComputeConfig { } } +fn default_sandbox_image() -> String { + format!( + "{}/base:latest", + openshell_core::image::DEFAULT_COMMUNITY_REGISTRY + ) +} + #[cfg(unix)] #[derive(Debug, Clone, PartialEq, Eq)] pub struct VmGuestTlsPaths { @@ -158,7 +169,7 @@ pub struct VmGuestTlsPaths { /// /// Resolution order: /// 1. `{driver_dir}/openshell-driver-vm`, where `driver_dir` comes from -/// `--driver-dir` / `OPENSHELL_DRIVER_DIR`. +/// `[openshell.drivers.vm].driver_dir`. /// 2. Conventional install directories: /// `~/.local/libexec/openshell`, `/usr/libexec/openshell`, /// `/usr/local/libexec/openshell`, `/usr/local/libexec`. @@ -198,7 +209,7 @@ pub fn resolve_compute_driver_bin(vm_config: &VmComputeConfig) -> Result>() .join(", "); Err(Error::config(format!( - "vm compute driver binary not found (searched {searched_display}); install it under --driver-dir / OPENSHELL_DRIVER_DIR, a conventional libexec path such as ~/.local/libexec/openshell, /usr/libexec/openshell, or /usr/local/libexec{{,/openshell}}, or place it next to the gateway binary" + "vm compute driver binary not found (searched {searched_display}); install it under [openshell.drivers.vm].driver_dir, a conventional libexec path such as ~/.local/libexec/openshell, /usr/libexec/openshell, or /usr/local/libexec{{,/openshell}}, or place it next to the gateway binary" ))) } @@ -360,10 +371,9 @@ fn remove_stale_socket(socket_path: &Path, expected_uid: u32) -> Result<()> { #[cfg(unix)] pub fn compute_driver_guest_tls_paths( - config: &Config, vm_config: &VmComputeConfig, ) -> Result> { - if !config.grpc_endpoint.starts_with("https://") { + if !vm_config.grpc_endpoint.starts_with("https://") { return Ok(None); } @@ -374,23 +384,23 @@ pub fn compute_driver_guest_tls_paths( ]; if provided.iter().all(Option::is_none) { return Err(Error::config( - "vm compute driver requires --vm-tls-ca, --vm-tls-cert, and --vm-tls-key when OPENSHELL_GRPC_ENDPOINT uses https://", + "vm compute driver requires guest_tls_ca, guest_tls_cert, and guest_tls_key when grpc_endpoint uses https://", )); } let Some(ca) = vm_config.guest_tls_ca.clone() else { return Err(Error::config( - "--vm-tls-ca is required when VM guest TLS materials are configured", + "guest_tls_ca is required when VM guest TLS materials are configured", )); }; let Some(cert) = vm_config.guest_tls_cert.clone() else { return Err(Error::config( - "--vm-tls-cert is required when VM guest TLS materials are configured", + "guest_tls_cert is required when VM guest TLS materials are configured", )); }; let Some(key) = vm_config.guest_tls_key.clone() else { return Err(Error::config( - "--vm-tls-key is required when VM guest TLS materials are configured", + "guest_tls_key is required when VM guest TLS materials are configured", )); }; @@ -414,7 +424,7 @@ pub async fn spawn( config: &Config, vm_config: &VmComputeConfig, ) -> Result<(Channel, Arc)> { - if config.grpc_endpoint.trim().is_empty() { + if vm_config.grpc_endpoint.trim().is_empty() { return Err(Error::config( "grpc_endpoint is required when using the vm compute driver", )); @@ -422,7 +432,7 @@ pub async fn spawn( let driver_bin = resolve_compute_driver_bin(vm_config)?; let socket_path = compute_driver_socket_path(vm_config); - let guest_tls_paths = compute_driver_guest_tls_paths(config, vm_config)?; + let guest_tls_paths = compute_driver_guest_tls_paths(vm_config)?; prepare_compute_driver_socket_path(vm_config, &socket_path)?; let mut command = Command::new(&driver_bin); @@ -437,7 +447,7 @@ pub async fn spawn( command.arg("--log-level").arg(&config.log_level); command .arg("--openshell-endpoint") - .arg(&config.grpc_endpoint); + .arg(&vm_config.grpc_endpoint); command.arg("--state-dir").arg(&vm_config.state_dir); if !vm_config.default_image.trim().is_empty() { command.arg("--default-image").arg(&vm_config.default_image); @@ -539,7 +549,6 @@ mod tests { prepare_compute_driver_socket_path, prepare_vm_state_dir, resolve_compute_driver_bin, resolve_driver_search_dirs, }; - use openshell_core::{Config, TlsConfig}; use std::os::unix::fs::PermissionsExt; use std::os::unix::net::UnixListener as StdUnixListener; use std::path::PathBuf; @@ -570,8 +579,7 @@ mod tests { let err = resolve_compute_driver_bin(&vm_config) .unwrap_err() .to_string(); - assert!(err.contains("--driver-dir")); - assert!(err.contains("OPENSHELL_DRIVER_DIR")); + assert!(err.contains("[openshell.drivers.vm].driver_dir")); assert!(err.contains("openshell-driver-vm")); } @@ -589,27 +597,16 @@ mod tests { #[test] fn vm_compute_driver_tls_requires_explicit_guest_bundle() { - let dir = tempdir().unwrap(); - let server_cert = dir.path().join("server.crt"); - let server_key = dir.path().join("server.key"); - let server_ca = dir.path().join("client-ca.crt"); - std::fs::write(&server_cert, "server-cert").unwrap(); - std::fs::write(&server_key, "server-key").unwrap(); - std::fs::write(&server_ca, "client-ca").unwrap(); - - let config = Config::new(Some(TlsConfig { - cert_path: server_cert, - key_path: server_key, - client_ca_path: server_ca, - allow_unauthenticated: false, - })) - .with_grpc_endpoint("https://gateway.internal:8443"); + let vm_config = VmComputeConfig { + grpc_endpoint: "https://gateway.internal:8443".to_string(), + ..Default::default() + }; - let err = compute_driver_guest_tls_paths(&config, &VmComputeConfig::default()) + let err = compute_driver_guest_tls_paths(&vm_config) .expect_err("https vm endpoints should require an explicit guest client bundle"); assert!( err.to_string() - .contains("--vm-tls-ca, --vm-tls-cert, and --vm-tls-key") + .contains("guest_tls_ca, guest_tls_cert, and guest_tls_key") ); } @@ -618,14 +615,12 @@ mod tests { let dir = tempdir().unwrap(); let server_cert = dir.path().join("server.crt"); let server_key = dir.path().join("server.key"); - let server_ca = dir.path().join("client-ca.crt"); let guest_ca = dir.path().join("guest-ca.crt"); let guest_cert = dir.path().join("guest.crt"); let guest_key = dir.path().join("guest.key"); for path in [ &server_cert, &server_key, - &server_ca, &guest_ca, &guest_cert, &guest_key, @@ -633,21 +628,15 @@ mod tests { std::fs::write(path, path.display().to_string()).unwrap(); } - let config = Config::new(Some(TlsConfig { - cert_path: server_cert.clone(), - key_path: server_key.clone(), - client_ca_path: server_ca, - allow_unauthenticated: false, - })) - .with_grpc_endpoint("https://gateway.internal:8443"); let vm_config = VmComputeConfig { + grpc_endpoint: "https://gateway.internal:8443".to_string(), guest_tls_ca: Some(guest_ca.clone()), guest_tls_cert: Some(guest_cert.clone()), guest_tls_key: Some(guest_key.clone()), ..Default::default() }; - let guest_paths = compute_driver_guest_tls_paths(&config, &vm_config) + let guest_paths = compute_driver_guest_tls_paths(&vm_config) .unwrap() .expect("https vm endpoints should pass an explicit guest client bundle"); assert_eq!(guest_paths.ca, guest_ca); diff --git a/crates/openshell-server/src/config_file.rs b/crates/openshell-server/src/config_file.rs index 0a7afe394..2a1320a55 100644 --- a/crates/openshell-server/src/config_file.rs +++ b/crates/openshell-server/src/config_file.rs @@ -10,13 +10,15 @@ //! a `[openshell.drivers.]` table so each driver crate's //! `Deserialize` impl sees a fully-populated table. //! -//! The merge precedence at the gateway level is: +//! The merge precedence for gateway process settings is: //! ```text //! CLI flag > OPENSHELL_* env var > TOML file > built-in default //! ``` -//! Per-field application of file values happens in [`crate::cli`], which uses -//! clap's `ArgMatches::value_source` to detect arguments that fell back to -//! their default and are therefore eligible for replacement by file values. +//! Driver implementation settings are configured in the TOML driver tables. +//! Per-field application of gateway file values happens in [`crate::cli`], +//! which uses clap's `ArgMatches::value_source` to detect arguments that fell +//! back to their default and are therefore eligible for replacement by file +//! values. use std::collections::BTreeMap; use std::net::SocketAddr; @@ -91,12 +93,6 @@ pub struct GatewayFileSection { #[serde(default)] pub sandbox_namespace: Option, #[serde(default)] - pub sandbox_ssh_port: Option, - #[serde(default)] - pub ssh_gateway_host: Option, - #[serde(default)] - pub ssh_gateway_port: Option, - #[serde(default)] pub ssh_session_ttl_secs: Option, // ── Service routing ────────────────────────────────────────────────── @@ -245,6 +241,7 @@ pub fn driver_table( fn inheritable_keys(driver: ComputeDriverKind) -> &'static [&'static str] { match driver { ComputeDriverKind::Kubernetes => &[ + "namespace", "default_image", "supervisor_image", "client_tls_secret_name", @@ -252,7 +249,10 @@ fn inheritable_keys(driver: ComputeDriverKind) -> &'static [&'static str] { "enable_user_namespaces", ], ComputeDriverKind::Docker => &[ + "sandbox_namespace", + "default_image", "supervisor_image", + "host_gateway_ip", "guest_tls_ca", "guest_tls_cert", "guest_tls_key", @@ -275,6 +275,7 @@ fn inheritable_keys(driver: ComputeDriverKind) -> &'static [&'static str] { fn gateway_inherited_value(g: &GatewayFileSection, key: &str) -> Option { match key { + "namespace" | "sandbox_namespace" => g.sandbox_namespace.as_deref().map(string_value), "default_image" => g.default_image.as_deref().map(string_value), "supervisor_image" => g.supervisor_image.as_deref().map(string_value), "client_tls_secret_name" => g.client_tls_secret_name.as_deref().map(string_value), @@ -388,6 +389,17 @@ nonsense = true assert!(matches!(err, ConfigFileError::Parse { .. })); } + #[test] + fn rejects_removed_ssh_endpoint_fields() { + let toml = r" +[openshell.gateway] +ssh_gateway_port = 8080 +"; + let tmp = write_tmp(toml); + let err = load(tmp.path()).expect_err("removed SSH endpoint keys must be rejected"); + assert!(matches!(err, ConfigFileError::Parse { .. })); + } + #[test] fn rejects_unsupported_version() { let toml = r" @@ -432,6 +444,30 @@ version = 2 ); } + #[test] + fn docker_driver_table_inherits_gateway_defaults() { + let gateway = GatewayFileSection { + sandbox_namespace: Some("agents".to_string()), + default_image: Some("ghcr.io/nvidia/openshell/sandbox:0.9".to_string()), + host_gateway_ip: Some("10.0.0.1".to_string()), + ..Default::default() + }; + let merged = driver_table(ComputeDriverKind::Docker, &gateway, None); + let table = merged.as_table().expect("table"); + assert_eq!( + table.get("sandbox_namespace").and_then(|v| v.as_str()), + Some("agents") + ); + assert_eq!( + table.get("default_image").and_then(|v| v.as_str()), + Some("ghcr.io/nvidia/openshell/sandbox:0.9") + ); + assert_eq!( + table.get("host_gateway_ip").and_then(|v| v.as_str()), + Some("10.0.0.1") + ); + } + #[test] fn driver_table_specific_value_overrides_gateway_default() { let gateway = GatewayFileSection { diff --git a/crates/openshell-server/src/grpc/sandbox.rs b/crates/openshell-server/src/grpc/sandbox.rs index 00b172f1f..700f7bc8f 100644 --- a/crates/openshell-server/src/grpc/sandbox.rs +++ b/crates/openshell-server/src/grpc/sandbox.rs @@ -1141,17 +1141,10 @@ pub(super) async fn handle_revoke_ssh_session( // --------------------------------------------------------------------------- fn resolve_gateway(config: &openshell_core::Config) -> (String, u16) { - let host = if config.ssh_gateway_host.is_empty() { - config.bind_address.ip().to_string() - } else { - config.ssh_gateway_host.clone() - }; - let port = if config.ssh_gateway_port == 0 { - config.bind_address.port() - } else { - config.ssh_gateway_port - }; - (host, port) + ( + config.bind_address.ip().to_string(), + config.bind_address.port(), + ) } /// Shell-escape a value for embedding in a POSIX shell command. diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index 7d45c3832..4a39e7f67 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -578,49 +578,7 @@ async fn build_compute_runtime( match driver { ComputeDriverKind::Kubernetes => { - let mut k8s = kubernetes_config_from_file(file)?; - // Env overrides file for fields not represented in Config. - if let Ok(v) = std::env::var("OPENSHELL_SUPERVISOR_IMAGE") - && !v.is_empty() - { - k8s.supervisor_image = v; - } - if let Ok(v) = std::env::var("OPENSHELL_SUPERVISOR_IMAGE_PULL_POLICY") - && !v.is_empty() - { - k8s.supervisor_image_pull_policy = v; - } - if let Ok(v) = std::env::var("OPENSHELL_SUPERVISOR_SIDELOAD_METHOD") - && !v.is_empty() - && let Ok(parsed) = v.parse() - { - k8s.supervisor_sideload_method = parsed; - } - // Shared fields are sourced from Config, which already merged - // file + CLI/env at startup. - k8s.namespace.clone_from(&config.sandbox_namespace); - k8s.default_image.clone_from(&config.sandbox_image); - // Only let the gateway-wide CLI/env value overwrite the per-driver - // file value when it was actually set — otherwise the empty CLI - // default would silently clobber `image_pull_policy` configured - // under `[openshell.drivers.kubernetes]`. - if !config.sandbox_image_pull_policy.is_empty() { - k8s.image_pull_policy - .clone_from(&config.sandbox_image_pull_policy); - } - // Same rationale as `image_pull_policy`: only let the gateway-wide - // CLI/env value win when it was actually set, otherwise the empty - // CLI default would clobber `grpc_endpoint` from - // `[openshell.drivers.kubernetes]`. - if !config.grpc_endpoint.is_empty() { - k8s.grpc_endpoint.clone_from(&config.grpc_endpoint); - } - k8s.ssh_socket_path - .clone_from(&config.sandbox_ssh_socket_path); - k8s.client_tls_secret_name - .clone_from(&config.client_tls_secret_name); - k8s.host_gateway_ip.clone_from(&config.host_gateway_ip); - k8s.enable_user_namespaces = config.enable_user_namespaces; + let k8s = kubernetes_config_from_file(file)?; ComputeRuntime::new_kubernetes( k8s, store, @@ -659,63 +617,7 @@ async fn build_compute_runtime( } ComputeDriverKind::Podman => { let mut podman = podman_config_from_file(file)?; - // Env overrides file for fields not represented in Config. - if let Ok(v) = std::env::var("OPENSHELL_PODMAN_SOCKET") - && !v.is_empty() - { - podman.socket_path = std::path::PathBuf::from(v); - } - if let Ok(v) = std::env::var("OPENSHELL_NETWORK_NAME") - && !v.is_empty() - { - podman.network_name = v; - } - if let Ok(v) = std::env::var("OPENSHELL_STOP_TIMEOUT") - && let Ok(parsed) = v.parse() - { - podman.stop_timeout_secs = parsed; - } - if let Ok(v) = std::env::var("OPENSHELL_SUPERVISOR_IMAGE") - && !v.is_empty() - { - podman.supervisor_image = v; - } - if let Ok(v) = std::env::var("OPENSHELL_PODMAN_TLS_CA") - && !v.is_empty() - { - podman.guest_tls_ca = Some(std::path::PathBuf::from(v)); - } - if let Ok(v) = std::env::var("OPENSHELL_PODMAN_TLS_CERT") - && !v.is_empty() - { - podman.guest_tls_cert = Some(std::path::PathBuf::from(v)); - } - if let Ok(v) = std::env::var("OPENSHELL_PODMAN_TLS_KEY") - && !v.is_empty() - { - podman.guest_tls_key = Some(std::path::PathBuf::from(v)); - } - // Shared fields are sourced from Config (which already merged - // file + CLI/env at startup). - podman.default_image.clone_from(&config.sandbox_image); - // The CLI/env `image_pull_policy` is K8s-shaped - // (e.g. `IfNotPresent`) and won't parse into Podman's lowercase - // enum. Only apply it when the operator set a Podman-shaped value - // explicitly; otherwise keep whatever `[openshell.drivers.podman]` - // (or the struct default) provided. - if !config.sandbox_image_pull_policy.is_empty() - && let Ok(policy) = config.sandbox_image_pull_policy.parse() - { - podman.image_pull_policy = policy; - } - if !config.grpc_endpoint.is_empty() { - podman.grpc_endpoint.clone_from(&config.grpc_endpoint); - } podman.gateway_port = config.bind_address.port(); - podman - .sandbox_ssh_socket_path - .clone_from(&config.sandbox_ssh_socket_path); - podman.ssh_port = config.sandbox_ssh_port; ComputeRuntime::new_podman( podman, diff --git a/deploy/deb/init-gateway-config.sh b/deploy/deb/init-gateway-config.sh new file mode 100755 index 000000000..55b07f7e5 --- /dev/null +++ b/deploy/deb/init-gateway-config.sh @@ -0,0 +1,56 @@ +#!/bin/sh +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -eu + +CONFIG_FILE="${1:?Usage: init-gateway-config.sh }" +PKI_DIR="${2:?Usage: init-gateway-config.sh }" +DRIVER_DIR="${3:?Usage: init-gateway-config.sh }" +VM_STATE_DIR="${4:?Usage: init-gateway-config.sh }" + +if [ -f "$CONFIG_FILE" ]; then + exit 0 +fi + +mkdir -p "$(dirname "$CONFIG_FILE")" "$VM_STATE_DIR" + +port="${OPENSHELL_SERVER_PORT:-17670}" +scheme="https" +if [ "${OPENSHELL_DISABLE_TLS:-false}" = "true" ]; then + scheme="http" +fi + +tmp="${CONFIG_FILE}.tmp" +{ + cat < "$tmp" + +chmod 600 "$tmp" +mv "$tmp" "$CONFIG_FILE" diff --git a/deploy/deb/openshell-gateway.service b/deploy/deb/openshell-gateway.service index 9de94da22..1b57f3e48 100644 --- a/deploy/deb/openshell-gateway.service +++ b/deploy/deb/openshell-gateway.service @@ -13,21 +13,10 @@ Environment=OPENSHELL_TLS_CERT=%S/openshell/tls/server/tls.crt Environment=OPENSHELL_TLS_KEY=%S/openshell/tls/server/tls.key Environment=OPENSHELL_TLS_CLIENT_CA=%S/openshell/tls/ca.crt Environment=OPENSHELL_DB_URL=sqlite:%S/openshell/gateway/openshell.db -Environment=OPENSHELL_GRPC_ENDPOINT=https://127.0.0.1:17670 -Environment=OPENSHELL_SSH_GATEWAY_HOST=127.0.0.1 -Environment=OPENSHELL_SSH_GATEWAY_PORT=17670 -Environment=OPENSHELL_VM_DRIVER_STATE_DIR=%S/openshell/vm-driver -Environment=OPENSHELL_VM_TLS_CA=%S/openshell/tls/ca.crt -Environment=OPENSHELL_VM_TLS_CERT=%S/openshell/tls/client/tls.crt -Environment=OPENSHELL_VM_TLS_KEY=%S/openshell/tls/client/tls.key -Environment=OPENSHELL_DOCKER_TLS_CA=%S/openshell/tls/ca.crt -Environment=OPENSHELL_DOCKER_TLS_CERT=%S/openshell/tls/client/tls.crt -Environment=OPENSHELL_DOCKER_TLS_KEY=%S/openshell/tls/client/tls.key -Environment=OPENSHELL_PODMAN_TLS_CA=%S/openshell/tls/ca.crt -Environment=OPENSHELL_PODMAN_TLS_CERT=%S/openshell/tls/client/tls.crt -Environment=OPENSHELL_PODMAN_TLS_KEY=%S/openshell/tls/client/tls.key +Environment=OPENSHELL_GATEWAY_CONFIG=%S/openshell/gateway/config.toml EnvironmentFile=-%h/.config/openshell/gateway.env ExecStartPre=/usr/bin/openshell-gateway generate-certs --output-dir %S/openshell/tls --server-san host.openshell.internal +ExecStartPre=/usr/libexec/openshell/init-gateway-config.sh %S/openshell/gateway/config.toml %S/openshell/tls /usr/libexec/openshell %S/openshell/vm-driver ExecStart=/usr/bin/openshell-gateway Restart=on-failure RestartSec=5s diff --git a/deploy/helm/openshell/templates/gateway-config.yaml b/deploy/helm/openshell/templates/gateway-config.yaml index e05309e51..9d95e45c1 100644 --- a/deploy/helm/openshell/templates/gateway-config.yaml +++ b/deploy/helm/openshell/templates/gateway-config.yaml @@ -31,12 +31,6 @@ data: sandbox_namespace = {{ include "openshell.sandboxNamespace" . | quote }} default_image = {{ .Values.server.sandboxImage | quote }} supervisor_image = {{ include "openshell.supervisorImage" . | quote }} - {{- if .Values.server.sshGatewayHost }} - ssh_gateway_host = {{ .Values.server.sshGatewayHost | quote }} - {{- end }} - {{- if .Values.server.sshGatewayPort }} - ssh_gateway_port = {{ .Values.server.sshGatewayPort }} - {{- end }} {{- if .Values.server.hostGatewayIP }} host_gateway_ip = {{ .Values.server.hostGatewayIP | quote }} {{- end }} diff --git a/deploy/helm/openshell/values.yaml b/deploy/helm/openshell/values.yaml index 4aa9faf20..2a87e3c10 100644 --- a/deploy/helm/openshell/values.yaml +++ b/deploy/helm/openshell/values.yaml @@ -101,11 +101,6 @@ server: # Override only when sandboxes must reach the gateway via a different # hostname (e.g. an external ingress or a host alias). grpcEndpoint: "" - # Public host/port returned to CLI clients for SSH proxy CONNECT requests. - # For local clusters the default 127.0.0.1:8080 is correct; for remote - # clusters these should be set to the externally reachable host and port. - sshGatewayHost: "" - sshGatewayPort: 0 # TLS configuration for the server. The server always terminates mTLS # directly and requires client certificates. # Host gateway IP for sandbox pod hostAliases. When set, sandbox pods get diff --git a/deploy/kube/manifests/openshell-helmchart.yaml b/deploy/kube/manifests/openshell-helmchart.yaml index ea4e370dc..d7d66d8a9 100644 --- a/deploy/kube/manifests/openshell-helmchart.yaml +++ b/deploy/kube/manifests/openshell-helmchart.yaml @@ -33,8 +33,6 @@ spec: sandboxImagePullPolicy: __SANDBOX_IMAGE_PULL_POLICY__ supervisorImage: ghcr.io/nvidia/openshell/supervisor:latest dbUrl: __DB_URL__ - sshGatewayHost: __SSH_GATEWAY_HOST__ - sshGatewayPort: __SSH_GATEWAY_PORT__ hostGatewayIP: __HOST_GATEWAY_IP__ disableGatewayAuth: __DISABLE_GATEWAY_AUTH__ disableTls: __DISABLE_TLS__ diff --git a/deploy/man/openshell-gateway.8.md b/deploy/man/openshell-gateway.8.md index f11a9d37b..64aa7c0f7 100644 --- a/deploy/man/openshell-gateway.8.md +++ b/deploy/man/openshell-gateway.8.md @@ -96,26 +96,9 @@ gRPC and HTTP, secured by mutual TLS (mTLS) by default. service URLs under that domain. Environment: **OPENSHELL_SERVER_SAN**. -**--sandbox-image** *IMAGE* -: Default container image for sandboxes. - Environment: **OPENSHELL_SANDBOX_IMAGE**. - -**--sandbox-image-pull-policy** *POLICY* -: Image pull policy: Always, IfNotPresent, Never. - Environment: **OPENSHELL_SANDBOX_IMAGE_PULL_POLICY**. - -**--ssh-gateway-host** *HOST* -: Public host for the SSH gateway endpoint. Default: **127.0.0.1**. - Environment: **OPENSHELL_SSH_GATEWAY_HOST**. - -**--ssh-gateway-port** *PORT* -: Public port for the SSH gateway endpoint. Default: **8080**. - Environment: **OPENSHELL_SSH_GATEWAY_PORT**. - -**--grpc-endpoint** *URL* -: gRPC endpoint for sandbox callbacks. Should be reachable from - within sandbox containers. - Environment: **OPENSHELL_GRPC_ENDPOINT**. +Compute driver settings such as sandbox image, callback endpoint, image +pull policy, network name, VM state directory, and guest TLS material are +configured in the TOML file passed with **--config**. # SYSTEMD INTEGRATION diff --git a/deploy/man/openshell-gateway.env.5.md b/deploy/man/openshell-gateway.env.5.md index 50b9b0694..f15f19015 100644 --- a/deploy/man/openshell-gateway.env.5.md +++ b/deploy/man/openshell-gateway.env.5.md @@ -93,41 +93,12 @@ exist (the unit has built-in defaults for all required settings). Wildcard DNS SANs also enable sandbox service URLs under that domain. -**OPENSHELL_PODMAN_TLS_CA** (default: auto-generated path) -: CA certificate bind-mounted into sandbox containers. +## Driver Configuration -**OPENSHELL_PODMAN_TLS_CERT** (default: auto-generated path) -: Client certificate bind-mounted into sandbox containers. - -**OPENSHELL_PODMAN_TLS_KEY** (default: auto-generated path) -: Client private key bind-mounted into sandbox containers. - -## Images - -**OPENSHELL_SUPERVISOR_IMAGE** (default: ghcr.io/nvidia/openshell/supervisor:latest) -: OCI image containing the supervisor binary, mounted read-only - into sandbox containers. - -**OPENSHELL_SANDBOX_IMAGE** (default: ghcr.io/nvidia/openshell-community/sandboxes/base:latest) -: Default OCI image for sandbox containers. - -**OPENSHELL_SANDBOX_IMAGE_PULL_POLICY** (default: missing) -: When to pull sandbox images: **always** (every sandbox creation), - **missing** (only if not cached locally), **never** (use cached - only), **newer** (pull if a newer version exists). - -## Podman Driver - -**OPENSHELL_PODMAN_SOCKET** (default: $XDG_RUNTIME_DIR/podman/podman.sock) -: Path to the Podman API Unix socket. - -**OPENSHELL_NETWORK_NAME** (default: openshell) -: Name of the Podman bridge network for sandbox containers. Created - automatically if it does not exist. - -**OPENSHELL_STOP_TIMEOUT** (default: 10) -: Seconds to wait after SIGTERM before sending SIGKILL when stopping - a sandbox container. +Compute driver settings are configured in the TOML file referenced by +**OPENSHELL_GATEWAY_CONFIG** or **--config**. This includes sandbox +images, image pull policy, callback endpoints, Podman socket path, +Docker network name, VM state directory, and guest TLS material. # EXAMPLES @@ -135,15 +106,6 @@ Change the API port to 9090: OPENSHELL_SERVER_PORT=9090 -Pin sandbox images to a specific version: - - OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor:v0.0.37 - OPENSHELL_SANDBOX_IMAGE=ghcr.io/nvidia/openshell-community/sandboxes/base:v0.0.37 - -Air-gapped deployment (pre-loaded images, no registry access): - - OPENSHELL_SANDBOX_IMAGE_PULL_POLICY=never - Enable debug logging: OPENSHELL_LOG_LEVEL=debug diff --git a/deploy/rpm/CONFIGURATION.md b/deploy/rpm/CONFIGURATION.md index de2e1e694..13d419477 100644 --- a/deploy/rpm/CONFIGURATION.md +++ b/deploy/rpm/CONFIGURATION.md @@ -109,8 +109,8 @@ To disable TLS (not recommended for production): OPENSHELL_DISABLE_TLS=true ``` -1. Comment out the `OPENSHELL_TLS_*` and `OPENSHELL_PODMAN_TLS_*` - variables if they are set. +1. Remove or comment out the `guest_tls_*` entries in + `~/.config/openshell/gateway.toml` if they are set. 1. Restart the gateway. @@ -120,14 +120,15 @@ When mTLS is enabled, the Podman driver bind-mounts the client certificates into each sandbox container so the supervisor process can establish an mTLS connection back to the gateway. -The following environment variables control the host-side paths of the -client certificates that are mounted into sandbox containers: +The following TOML fields control the host-side paths of the client +certificates that are mounted into sandbox containers: -| Variable | Description | -|----------|-------------| -| `OPENSHELL_PODMAN_TLS_CA` | CA certificate (host path) | -| `OPENSHELL_PODMAN_TLS_CERT` | Client certificate (host path) | -| `OPENSHELL_PODMAN_TLS_KEY` | Client private key (host path) | +```toml +[openshell.gateway] +guest_tls_ca = "/home/user/.local/state/openshell/tls/ca.crt" +guest_tls_cert = "/home/user/.local/state/openshell/tls/client/tls.crt" +guest_tls_key = "/home/user/.local/state/openshell/tls/client/tls.key" +``` Inside the container, the supervisor reads them from: @@ -141,13 +142,14 @@ configuration is required. ## Configuration reference -All settings are controlled via environment variables. The user unit -reads from `~/.config/openshell/gateway.env` (generated on first start) -and from `Environment=` directives in the systemd unit. +Gateway process settings are controlled via environment variables. Driver +implementation settings live in `~/.config/openshell/gateway.toml`, which is +generated on first start and selected through `OPENSHELL_GATEWAY_CONFIG`. Values in `gateway.env` override the unit defaults. Use `systemctl --user edit openshell-gateway` to add overrides that persist -across package upgrades. +across package upgrades. Gateway CLI/env values override the gateway section +of the TOML file, while driver tables are read from TOML. ### Gateway settings @@ -158,7 +160,7 @@ across package upgrades. | `OPENSHELL_HEALTH_PORT` | `0` (disabled) | Port for unauthenticated health endpoints (`/healthz`, `/readyz`). Set to a non-zero value to enable. | | `OPENSHELL_METRICS_PORT` | `0` (disabled) | Port for Prometheus metrics (`/metrics`). Set to a non-zero value to enable. | | `OPENSHELL_LOG_LEVEL` | `info` | Log level: `trace`, `debug`, `info`, `warn`, `error` | -| `OPENSHELL_DRIVERS` | `podman` | Compute driver (`podman`, `docker`, `kubernetes`) | +| `OPENSHELL_DRIVERS` | `podman` | Compute driver (`podman`, `docker`, `kubernetes`, `vm`) | | `OPENSHELL_DB_URL` | `sqlite://$XDG_STATE_HOME/openshell/gateway.db` | SQLite database URL for state persistence | | `OPENSHELL_DISABLE_GATEWAY_AUTH` | (unset) | Set to `true` to skip mTLS client certificate checks | @@ -170,25 +172,26 @@ across package upgrades. | `OPENSHELL_TLS_KEY` | (auto-generated path) | Server TLS private key | | `OPENSHELL_TLS_CLIENT_CA` | (auto-generated path) | CA for client certificate verification | | `OPENSHELL_DISABLE_TLS` | (unset) | Set to `true` to disable TLS | -| `OPENSHELL_PODMAN_TLS_CA` | (auto-generated path) | CA cert mounted into sandbox containers | -| `OPENSHELL_PODMAN_TLS_CERT` | (auto-generated path) | Client cert mounted into sandbox containers | -| `OPENSHELL_PODMAN_TLS_KEY` | (auto-generated path) | Client key mounted into sandbox containers | -### Sandbox settings +### Driver TOML settings -| Variable | Default | Description | -|----------|---------|-------------| -| `OPENSHELL_SUPERVISOR_IMAGE` | `ghcr.io/nvidia/openshell/supervisor:latest` | Supervisor binary OCI image | -| `OPENSHELL_SANDBOX_IMAGE` | `ghcr.io/nvidia/openshell-community/sandboxes/base:latest` | Default sandbox base image | -| `OPENSHELL_SANDBOX_IMAGE_PULL_POLICY` | `missing` | Image pull policy: `always`, `missing`, `never`, `newer` | +The generated `gateway.toml` contains the RPM's Podman defaults: -### Podman driver settings +```toml +[openshell.gateway] +compute_drivers = ["podman"] +default_image = "ghcr.io/nvidia/openshell-community/sandboxes/base:latest" +supervisor_image = "ghcr.io/nvidia/openshell/supervisor:latest" +guest_tls_ca = "/home/user/.local/state/openshell/tls/ca.crt" +guest_tls_cert = "/home/user/.local/state/openshell/tls/client/tls.crt" +guest_tls_key = "/home/user/.local/state/openshell/tls/client/tls.key" -| Variable | Default | Description | -|----------|---------|-------------| -| `OPENSHELL_PODMAN_SOCKET` | `$XDG_RUNTIME_DIR/podman/podman.sock` | Podman API Unix socket path | -| `OPENSHELL_NETWORK_NAME` | `openshell` | Podman bridge network name for sandbox containers | -| `OPENSHELL_STOP_TIMEOUT` | `10` | Container stop timeout in seconds (SIGTERM then SIGKILL) | +[openshell.drivers.podman] +socket_path = "/run/user/1000/podman/podman.sock" +image_pull_policy = "missing" +network_name = "openshell" +stop_timeout_secs = 10 +``` ### Image management @@ -203,14 +206,14 @@ podman pull ghcr.io/nvidia/openshell/supervisor:latest podman pull ghcr.io/nvidia/openshell-community/sandboxes/base:latest ``` -Or set `OPENSHELL_SANDBOX_IMAGE_PULL_POLICY=always` to pull on every -sandbox creation. +Or set `image_pull_policy = "always"` in +`[openshell.drivers.podman]` to pull on every sandbox creation. To pin specific image versions instead of `:latest`: ```shell -OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor:v0.0.37 -OPENSHELL_SANDBOX_IMAGE=ghcr.io/nvidia/openshell-community/sandboxes/base:v0.0.37 +supervisor_image = "ghcr.io/nvidia/openshell/supervisor:v0.0.37" +default_image = "ghcr.io/nvidia/openshell-community/sandboxes/base:v0.0.37" ``` For air-gapped environments: @@ -233,8 +236,9 @@ For air-gapped environments: 1. Set pull policy to `never`: - ```shell - OPENSHELL_SANDBOX_IMAGE_PULL_POLICY=never + ```toml + [openshell.drivers.podman] + image_pull_policy = "never" ``` ## File locations @@ -245,8 +249,9 @@ For air-gapped environments: | CLI binary | `/usr/bin/openshell` | | Systemd user unit | `/usr/lib/systemd/user/openshell-gateway.service` | | PKI bootstrap script | `/usr/libexec/openshell/init-pki.sh` | -| Env generator script | `/usr/libexec/openshell/init-gateway-env.sh` | +| Env/config generator script | `/usr/libexec/openshell/init-gateway-env.sh` | | TLS certificates | `~/.local/state/openshell/tls/` | | CLI client certs | `~/.config/openshell/gateways/openshell/mtls/` | | Gateway database | `~/.local/state/openshell/gateway.db` | -| Gateway configuration | `~/.config/openshell/gateway.env` | +| Gateway environment | `~/.config/openshell/gateway.env` | +| Gateway TOML configuration | `~/.config/openshell/gateway.toml` | diff --git a/deploy/rpm/QUICKSTART.md b/deploy/rpm/QUICKSTART.md index 4d6acb4a1..1f89bba00 100644 --- a/deploy/rpm/QUICKSTART.md +++ b/deploy/rpm/QUICKSTART.md @@ -51,8 +51,8 @@ The gateway pulls container images from ghcr.io on first sandbox creation. Ensure the host can reach ghcr.io over HTTPS (port 443). For air-gapped environments, pre-load images with `podman pull` and -set `OPENSHELL_SANDBOX_IMAGE_PULL_POLICY=never` in -`~/.config/openshell/gateway.env`. See CONFIGURATION.md for details. +set `image_pull_policy = "never"` in +`~/.config/openshell/gateway.toml`. See CONFIGURATION.md for details. ## Start the gateway @@ -64,6 +64,7 @@ On first start, the gateway automatically generates: - A self-signed PKI bundle (CA, server cert, client cert) for mTLS - A commented configuration file at `~/.config/openshell/gateway.env` +- A gateway TOML file at `~/.config/openshell/gateway.toml` > **Note:** The gateway binds to all interfaces (`0.0.0.0`) by default. > Mutual TLS (mTLS) is enabled automatically on first start, requiring a diff --git a/deploy/rpm/TROUBLESHOOTING.md b/deploy/rpm/TROUBLESHOOTING.md index 2c33e1a57..1cc39cd8d 100644 --- a/deploy/rpm/TROUBLESHOOTING.md +++ b/deploy/rpm/TROUBLESHOOTING.md @@ -186,8 +186,8 @@ podman pull ghcr.io/nvidia/openshell-community/sandboxes/base:latest podman pull ghcr.io/nvidia/openshell/supervisor:latest ``` -Or set `OPENSHELL_SANDBOX_IMAGE_PULL_POLICY=always` in -`~/.config/openshell/gateway.env` and restart the gateway. +Or set `image_pull_policy = "always"` in +`~/.config/openshell/gateway.toml` and restart the gateway. ### Gateway stops on logout @@ -216,11 +216,10 @@ systemctl --user restart openshell-gateway The SQLite database schema is auto-migrated on startup. Running sandboxes are stopped during the restart. -The `gateway.env` file is not overwritten during upgrades. The -`init-gateway-env.sh` script is idempotent and only generates the file -on first start. New configuration options from newer versions can be -added manually by referencing CONFIGURATION.md or running -`openshell-gateway --help`. +The `gateway.env` and `gateway.toml` files are not overwritten during +upgrades. The `init-gateway-env.sh` script is idempotent and only generates +missing files on first start. New gateway process options can be added +manually by referencing CONFIGURATION.md or running `openshell-gateway --help`. To pick up new container images after an upgrade: diff --git a/deploy/rpm/init-gateway-env.sh b/deploy/rpm/init-gateway-env.sh index f041fa941..baf2f5564 100644 --- a/deploy/rpm/init-gateway-env.sh +++ b/deploy/rpm/init-gateway-env.sh @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -# Generate the gateway environment configuration file on first start. +# Generate the gateway environment and TOML configuration files on first start. # # Called from the systemd ExecStartPre directive to bootstrap the # gateway configuration. Idempotent: exits immediately if the file @@ -17,14 +17,65 @@ set -euo pipefail ENV_FILE="${1:?Usage: init-gateway-env.sh }" +CONFIG_DIR="$(dirname "${ENV_FILE}")" +CONFIG_FILE="${CONFIG_DIR}/gateway.toml" +STATE_HOME="${XDG_STATE_HOME:-${HOME}/.local/state}" +RUNTIME_HOME="${XDG_RUNTIME_DIR:-/run/user/$(id -u)}" + +write_gateway_config() { + if [ -f "${CONFIG_FILE}" ]; then + return + fi + + mkdir -p "${CONFIG_DIR}" "${STATE_HOME}/openshell/vm-driver" + cat > "${CONFIG_FILE}" << EOF +[openshell] +version = 1 + +[openshell.gateway] +compute_drivers = ["podman"] +default_image = "ghcr.io/nvidia/openshell-community/sandboxes/base:latest" +supervisor_image = "ghcr.io/nvidia/openshell/supervisor:latest" +guest_tls_ca = "${STATE_HOME}/openshell/tls/ca.crt" +guest_tls_cert = "${STATE_HOME}/openshell/tls/client/tls.crt" +guest_tls_key = "${STATE_HOME}/openshell/tls/client/tls.key" + +[openshell.drivers.podman] +socket_path = "${RUNTIME_HOME}/podman/podman.sock" +image_pull_policy = "missing" +network_name = "openshell" +stop_timeout_secs = 10 + +[openshell.drivers.vm] +state_dir = "${STATE_HOME}/openshell/vm-driver" +driver_dir = "/usr/libexec/openshell" +grpc_endpoint = "https://127.0.0.1:8080" +EOF + chmod 600 "${CONFIG_FILE}" +} + +ensure_env_points_at_config() { + if grep -q '^OPENSHELL_GATEWAY_CONFIG=' "${ENV_FILE}"; then + return + fi + + cat >> "${ENV_FILE}" << EOF + +# Gateway TOML configuration. Driver implementation settings live here. +OPENSHELL_GATEWAY_CONFIG=${CONFIG_FILE} +EOF +} # ── Idempotent: skip if env file already exists ───────────────────── if [ -f "${ENV_FILE}" ]; then + write_gateway_config + ensure_env_points_at_config exit 0 fi # ── Create parent directory ───────────────────────────────────────── -mkdir -p "$(dirname "${ENV_FILE}")" +mkdir -p "${CONFIG_DIR}" +write_gateway_config # ── Write environment file ────────────────────────────────────────── cat > "${ENV_FILE}" << EOF @@ -34,13 +85,15 @@ cat > "${ENV_FILE}" << EOF # Run 'openshell-gateway --help' for the full list of options. # See /usr/share/doc/openshell-gateway/ for guides. +OPENSHELL_GATEWAY_CONFIG=${CONFIG_FILE} + # ---- Optional (uncomment to override defaults) ---- # Database URL for gateway state persistence. # Default for the user unit: sqlite://\$XDG_STATE_HOME/openshell/gateway.db #OPENSHELL_DB_URL=sqlite:///path/to/gateway.db -# Compute driver: podman (default for RPM), docker, kubernetes. +# Compute driver: podman (default for RPM), docker, kubernetes, vm. #OPENSHELL_DRIVERS=podman # Bind address. 0.0.0.0 listens on all interfaces; mTLS prevents @@ -53,18 +106,9 @@ cat > "${ENV_FILE}" << EOF # Log level: trace, debug, info, warn, error. #OPENSHELL_LOG_LEVEL=info -# ---- Images ---- - -# Supervisor binary OCI image (mounted read-only into sandboxes). -#OPENSHELL_SUPERVISOR_IMAGE=ghcr.io/nvidia/openshell/supervisor:latest - -# Default sandbox base image. -#OPENSHELL_SANDBOX_IMAGE=ghcr.io/nvidia/openshell-community/sandboxes/base:latest - -# Image pull policy: always, missing (default), never, newer. -# Use 'always' to pick up new tags automatically. -# Use 'never' for air-gapped environments with pre-loaded images. -#OPENSHELL_SANDBOX_IMAGE_PULL_POLICY=missing +# Driver implementation settings, including images, pull policy, Podman +# socket, TLS mounts, and VM paths, live in: +# ${CONFIG_FILE} # ---- TLS (mTLS enabled by default) ---- # PKI is auto-generated by init-pki.sh on first start. Client certs are @@ -89,21 +133,6 @@ cat > "${ENV_FILE}" << EOF # Example: OPENSHELL_SERVER_SAN=*.apps.example.com #OPENSHELL_SERVER_SAN= -# Podman driver: client certs bind-mounted into sandbox containers. -#OPENSHELL_PODMAN_TLS_CA=\$XDG_STATE_HOME/openshell/tls/ca.crt -#OPENSHELL_PODMAN_TLS_CERT=\$XDG_STATE_HOME/openshell/tls/client/tls.crt -#OPENSHELL_PODMAN_TLS_KEY=\$XDG_STATE_HOME/openshell/tls/client/tls.key - -# ---- Podman driver ---- - -# Podman API Unix socket path. -#OPENSHELL_PODMAN_SOCKET=\$XDG_RUNTIME_DIR/podman/podman.sock - -# Podman bridge network name for sandbox containers. -#OPENSHELL_NETWORK_NAME=openshell - -# Container stop timeout in seconds (SIGTERM then SIGKILL). -#OPENSHELL_STOP_TIMEOUT=10 EOF chmod 600 "${ENV_FILE}" diff --git a/deploy/snap/README.md b/deploy/snap/README.md index 4950f4921..ece73f680 100644 --- a/deploy/snap/README.md +++ b/deploy/snap/README.md @@ -97,9 +97,10 @@ it while sandboxes are active. Restart the service manually when you are ready to move the gateway to the refreshed snap revision. `openshell-sandbox` is staged next to `openshell-gateway` as the Docker -supervisor binary. The gateway app passes it to the in-process Docker driver -through `OPENSHELL_DOCKER_SUPERVISOR_BIN=$SNAP/bin/openshell-sandbox`. The -service stores its gateway database under `$SNAP_COMMON`. +supervisor binary. The gateway app starts through a small wrapper that writes +`$SNAP_COMMON/gateway.toml` on first start and points the in-process Docker +driver at `$SNAP/bin/openshell-sandbox`. The service stores its gateway +database under `$SNAP_COMMON`. ## Interfaces @@ -147,16 +148,13 @@ openshell.gateway \ --disable-tls \ --port 17670 \ --db-url "sqlite:$SNAP_COMMON/gateway.db?mode=rwc" \ - --docker-supervisor-bin "$SNAP/bin/openshell-sandbox" \ - --docker-network-name openshell-snap \ - --sandbox-namespace docker-snap \ - --sandbox-image ghcr.io/nvidia/openshell-community/sandboxes/base:latest \ - --sandbox-image-pull-policy IfNotPresent \ - --grpc-endpoint http://host.openshell.internal:17670 + --config "$SNAP_COMMON/gateway.toml" ``` This stores the gateway SQLite database at -`/var/snap/openshell/common/gateway.db`. +`/var/snap/openshell/common/gateway.db`. The generated TOML stores Docker +driver settings such as the supervisor binary path, network name, sandbox +namespace, sandbox image, pull policy, and callback endpoint. ## Connect with the OpenShell CLI diff --git a/deploy/snap/bin/openshell-gateway-wrapper b/deploy/snap/bin/openshell-gateway-wrapper new file mode 100755 index 000000000..19e24b52b --- /dev/null +++ b/deploy/snap/bin/openshell-gateway-wrapper @@ -0,0 +1,27 @@ +#!/bin/sh +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -eu + +CONFIG_FILE="${OPENSHELL_GATEWAY_CONFIG:-${SNAP_COMMON}/gateway.toml}" + +if [ ! -f "$CONFIG_FILE" ]; then + mkdir -p "$(dirname "$CONFIG_FILE")" + cat > "$CONFIG_FILE" << EOF +[openshell] +version = 1 + +[openshell.drivers.docker] +default_image = "ghcr.io/nvidia/openshell-community/sandboxes/base:latest" +image_pull_policy = "IfNotPresent" +sandbox_namespace = "docker-snap" +grpc_endpoint = "http://host.openshell.internal:17670" +supervisor_bin = "${SNAP}/bin/openshell-sandbox" +network_name = "openshell-snap" +EOF + chmod 600 "$CONFIG_FILE" +fi + +export OPENSHELL_GATEWAY_CONFIG="$CONFIG_FILE" +exec "${SNAP}/bin/openshell-gateway" "$@" diff --git a/deploy/snap/meta/snap.yaml.in b/deploy/snap/meta/snap.yaml.in index 9444fbbf7..4175da0ac 100644 --- a/deploy/snap/meta/snap.yaml.in +++ b/deploy/snap/meta/snap.yaml.in @@ -31,23 +31,16 @@ apps: - ssh-keys - system-observe gateway: - command: bin/openshell-gateway + command: bin/openshell-gateway-wrapper daemon: simple refresh-mode: endure environment: OPENSHELL_BIND_ADDRESS: 127.0.0.1 OPENSHELL_SERVER_PORT: 17670 OPENSHELL_DB_URL: "sqlite:$SNAP_COMMON/gateway.db?mode=rwc" - OPENSHELL_GRPC_ENDPOINT: http://host.openshell.internal:17670 OPENSHELL_DISABLE_TLS: true OPENSHELL_DRIVERS: docker - OPENSHELL_DOCKER_SUPERVISOR_BIN: "$SNAP/bin/openshell-sandbox" - OPENSHELL_DOCKER_NETWORK_NAME: openshell-snap - OPENSHELL_SANDBOX_IMAGE: ghcr.io/nvidia/openshell-community/sandboxes/base:latest - OPENSHELL_SANDBOX_IMAGE_PULL_POLICY: IfNotPresent - OPENSHELL_SANDBOX_SSH_PORT: 2222 - OPENSHELL_SSH_GATEWAY_HOST: 127.0.0.1 - OPENSHELL_SSH_GATEWAY_PORT: 8080 + OPENSHELL_GATEWAY_CONFIG: "$SNAP_COMMON/gateway.toml" XDG_DATA_HOME: "$SNAP_COMMON" # Used for creating and locating certain sockets. XDG_RUNTIME_DIR: "$SNAP_COMMON" diff --git a/docs/kubernetes/ingress.mdx b/docs/kubernetes/ingress.mdx index e4b23101f..3ed9a4cd5 100644 --- a/docs/kubernetes/ingress.mdx +++ b/docs/kubernetes/ingress.mdx @@ -62,21 +62,9 @@ openshell gateway add http:// --name production openshell status ``` -## Configure SSH relay +## SSH Relay -For sandbox SSH connections to work through the external address, set `server.sshGatewayHost` and `server.sshGatewayPort` to the hostname and port that CLI clients can reach: - -```shell -helm upgrade openshell \ - oci://ghcr.io/nvidia/openshell/helm-chart \ - --version \ - --namespace openshell \ - --set grpcRoute.enabled=true \ - --set grpcRoute.gateway.create=true \ - --set grpcRoute.gateway.className=eg \ - --set server.sshGatewayHost= \ - --set server.sshGatewayPort= -``` +Sandbox SSH uses the gateway endpoint registered with the CLI. No separate Helm SSH host or port values are required. ## Next Steps diff --git a/docs/kubernetes/setup.mdx b/docs/kubernetes/setup.mdx index 5d9e2c089..bb9997305 100644 --- a/docs/kubernetes/setup.mdx +++ b/docs/kubernetes/setup.mdx @@ -134,7 +134,6 @@ The most commonly changed values are: | `server.sandboxNamespace` | Namespace where sandbox pods are created. Defaults to the Helm release namespace when left empty. | | `server.sandboxImage` | Default sandbox image used when a sandbox does not specify one. | | `server.grpcEndpoint` | Endpoint that sandbox supervisors use to call back to the gateway. Must be reachable from inside the cluster. | -| `server.sshGatewayHost` / `server.sshGatewayPort` | Public host and port returned to CLI clients for SSH proxy connections. Required when the gateway is exposed externally. | | `server.disableTls` | Run the gateway over plaintext HTTP. Use only behind a trusted transport. | | `server.enableLoopbackServiceHttp` | Enable local plaintext HTTP for loopback sandbox service URLs. Defaults to `true`. | | `pkiInitJob.serverDnsNames` / `certManager.serverDnsNames` | Additional gateway server DNS SANs. Wildcard SANs also enable sandbox service URLs under that domain. | diff --git a/docs/reference/gateway-config.mdx b/docs/reference/gateway-config.mdx index 09ded084c..ffb932f63 100644 --- a/docs/reference/gateway-config.mdx +++ b/docs/reference/gateway-config.mdx @@ -8,12 +8,12 @@ keywords: "Generative AI, Cybersecurity, AI Agents, Sandboxing, Gateway, Configu position: 5 --- -The OpenShell gateway reads its configuration from a TOML file when `--config` or `OPENSHELL_GATEWAY_CONFIG` is set. CLI flags and `OPENSHELL_*` environment variables always override the file. See [RFC 0003](https://github.com/NVIDIA/OpenShell/blob/main/rfc/0003-gateway-configuration/README.md) for the full schema. +The OpenShell gateway reads its configuration from a TOML file when `--config` or `OPENSHELL_GATEWAY_CONFIG` is set. Gateway process flags and gateway `OPENSHELL_*` environment variables override the file. Compute driver settings live in the driver TOML tables. See [RFC 0003](https://github.com/NVIDIA/OpenShell/blob/main/rfc/0003-gateway-configuration/README.md) for the full schema. ## Source Precedence ```text -CLI flag > OPENSHELL_* env var > TOML file > built-in default +Gateway CLI flag > gateway OPENSHELL_* env var > TOML file > built-in default ``` `database_url` is env-only. The loader rejects it when it appears in the file. @@ -62,9 +62,6 @@ log_level = "info" compute_drivers = ["kubernetes"] sandbox_namespace = "openshell" -sandbox_ssh_port = 2222 -ssh_gateway_host = "127.0.0.1" -ssh_gateway_port = 8080 # Subject Alternative Names baked into the gateway server certificate. # Wildcard DNS SANs (e.g. "*.dev.openshell.localhost") also enable sandbox @@ -151,9 +148,13 @@ guest_tls_cert = "/etc/openshell/certs/client.pem" guest_tls_key = "/etc/openshell/certs/client-key.pem" [openshell.drivers.docker] -network_name = "openshell-docker" +default_image = "ghcr.io/nvidia/openshell/sandbox:latest" +image_pull_policy = "IfNotPresent" +sandbox_namespace = "docker-dev" +grpc_endpoint = "https://host.openshell.internal:8080" +network_name = "openshell-docker" # Skip the image-pull-and-extract step by pointing at a locally built binary. -supervisor_bin = "/usr/local/libexec/openshell/openshell-sandbox" +supervisor_bin = "/usr/local/libexec/openshell/openshell-sandbox" ``` ### Podman @@ -203,6 +204,7 @@ guest_tls_cert = "/var/lib/openshell/guest-tls/client.pem" guest_tls_key = "/var/lib/openshell/guest-tls/client-key.pem" [openshell.drivers.vm] +grpc_endpoint = "https://host.containers.internal:8080" state_dir = "/var/lib/openshell/vm" # Where the gateway looks for the openshell-driver-vm subprocess binary. driver_dir = "/usr/local/libexec/openshell" diff --git a/docs/reference/sandbox-compute-drivers.mdx b/docs/reference/sandbox-compute-drivers.mdx index 84585c90a..9d59217dd 100644 --- a/docs/reference/sandbox-compute-drivers.mdx +++ b/docs/reference/sandbox-compute-drivers.mdx @@ -29,10 +29,8 @@ Common gateway options: | Option | Environment variable | Description | |---|---|---| | `--drivers ` | `OPENSHELL_DRIVERS` | Select the compute driver. Supported values are `docker`, `podman`, `kubernetes`, and `vm`. | -| `--sandbox-image ` | `OPENSHELL_SANDBOX_IMAGE` | Set the default sandbox image used when a sandbox create request does not specify one. | -| `--grpc-endpoint ` | `OPENSHELL_GRPC_ENDPOINT` | Set the gateway callback endpoint that sandbox workloads use to connect back to OpenShell. | -For the full file-based configuration surface — including gateway-wide settings, TLS, OIDC, and per-driver tables — see the [Gateway Configuration File](./gateway-config) reference. Each driver section below documents the CLI flags and environment variables; their TOML equivalents live in the `[openshell.drivers.]` tables on that page. +Set driver-specific values such as sandbox images, callback endpoints, network names, TLS material, and VM sizing in the gateway TOML file. See the [Gateway Configuration File](./gateway-config) reference for the full `[openshell.drivers.]` schema. Sandbox create supports `--cpu` and `--memory` for per-sandbox compute sizing. Docker and Podman apply them as runtime limits. Kubernetes applies them as both @@ -47,13 +45,7 @@ The gateway talks to the Docker daemon to create sandbox containers. Docker is a For maintainer-level implementation details, refer to the [Docker driver README](https://github.com/NVIDIA/OpenShell/blob/main/crates/openshell-driver-docker/README.md). -| Option | Environment variable | Description | -|---|---|---| -| `--drivers docker` | `OPENSHELL_DRIVERS=docker` | Select the Docker compute driver. | -| `--docker-network-name ` | `OPENSHELL_DOCKER_NETWORK_NAME` | Override the bridge network used by Docker sandbox containers. | -| `--docker-supervisor-bin ` | `OPENSHELL_DOCKER_SUPERVISOR_BIN` | Use a local Linux `openshell-sandbox` binary instead of resolving or extracting one automatically. | -| `--docker-supervisor-image ` | `OPENSHELL_DOCKER_SUPERVISOR_IMAGE` | Override the image used to extract the Linux `openshell-sandbox` binary. | -| `--docker-tls-ca`, `--docker-tls-cert`, `--docker-tls-key` | `OPENSHELL_DOCKER_TLS_CA`, `OPENSHELL_DOCKER_TLS_CERT`, `OPENSHELL_DOCKER_TLS_KEY` | Mount sandbox client TLS materials into Docker containers for mTLS callback to the gateway. | +Select Docker with `--drivers docker` or `OPENSHELL_DRIVERS=docker`. Configure Docker driver values such as `grpc_endpoint`, `network_name`, `supervisor_bin`, `supervisor_image`, `image_pull_policy`, and `guest_tls_*` in `[openshell.drivers.docker]`. For GPU-backed Docker sandboxes, configure Docker CDI before starting the gateway so OpenShell can detect the daemon capability. @@ -65,14 +57,7 @@ The gateway talks to the Podman API socket. The Podman driver requires Podman 5. For maintainer-level implementation details, refer to the [Podman driver README](https://github.com/NVIDIA/OpenShell/blob/main/crates/openshell-driver-podman/README.md) and [Podman networking notes](https://github.com/NVIDIA/OpenShell/blob/main/crates/openshell-driver-podman/NETWORKING.md). -| Option | Environment variable | Description | -|---|---|---| -| `--drivers podman` | `OPENSHELL_DRIVERS=podman` | Select the Podman compute driver. | -| None | `OPENSHELL_PODMAN_SOCKET` | Override the Podman API socket path. | -| None | `OPENSHELL_NETWORK_NAME` | Override the Podman bridge network. | -| None | `OPENSHELL_SUPERVISOR_IMAGE` | Override the image containing the `openshell-sandbox` supervisor binary. | -| None | `OPENSHELL_STOP_TIMEOUT` | Set the container stop timeout in seconds. | -| None | `OPENSHELL_PODMAN_TLS_CA`, `OPENSHELL_PODMAN_TLS_CERT`, `OPENSHELL_PODMAN_TLS_KEY` | Mount sandbox client TLS materials into Podman containers for mTLS callback to the gateway. | +Select Podman with `--drivers podman` or `OPENSHELL_DRIVERS=podman`. Configure Podman driver values such as `socket_path`, `network_name`, `supervisor_image`, `stop_timeout_secs`, `image_pull_policy`, `grpc_endpoint`, and `guest_tls_*` in `[openshell.drivers.podman]`. ## MicroVM Driver @@ -94,15 +79,7 @@ openshell-gateway --drivers vm For a service, set `OPENSHELL_DRIVERS=vm` in the service environment file and restart the service. Homebrew creates `$(brew --prefix)/var/openshell/gateway.env` with a commented `OPENSHELL_DRIVERS=vm` entry. Debian and RPM user services read `~/.config/openshell/gateway.env`. -| Option | Environment variable | Description | -|---|---|---| -| `--drivers vm` | `OPENSHELL_DRIVERS=vm` | Select the VM compute driver. VM is never auto-detected. | -| `--driver-dir ` | `OPENSHELL_DRIVER_DIR` | Search a custom directory for `openshell-driver-vm`. | -| `--vm-driver-state-dir ` | `OPENSHELL_VM_DRIVER_STATE_DIR` | Store VM rootfs, console logs, runtime state, image-rootfs cache, and the private `run/compute-driver.sock` socket under this directory. | -| `--vm-driver-vcpus ` | `OPENSHELL_VM_DRIVER_VCPUS` | Set the default vCPU count for VM sandboxes. | -| `--vm-driver-mem-mib ` | `OPENSHELL_VM_DRIVER_MEM_MIB` | Set the default memory allocation for VM sandboxes in MiB. | -| `--vm-krun-log-level ` | `OPENSHELL_VM_KRUN_LOG_LEVEL` | Set the libkrun log level for VM helper processes. | -| `--vm-tls-ca`, `--vm-tls-cert`, `--vm-tls-key` | `OPENSHELL_VM_TLS_CA`, `OPENSHELL_VM_TLS_CERT`, `OPENSHELL_VM_TLS_KEY` | Copy sandbox client TLS materials into VM guests for mTLS callback to the gateway. | +Select VM with `--drivers vm` or `OPENSHELL_DRIVERS=vm`. Configure VM driver values such as `grpc_endpoint`, `driver_dir`, `state_dir`, `vcpus`, `mem_mib`, `krun_log_level`, and `guest_tls_*` in `[openshell.drivers.vm]`. The gateway starts `openshell-driver-vm` over a private Unix socket and passes its process ID so the driver can reject unexpected local clients. The driver's standalone TCP listener is disabled unless `--allow-unauthenticated-tcp` is set for local development. @@ -114,14 +91,14 @@ Helm deployments set Kubernetes driver values through the chart. For maintainer-level implementation details, refer to the [Kubernetes driver README](https://github.com/NVIDIA/OpenShell/blob/main/crates/openshell-driver-kubernetes/README.md). -| Gateway option | Environment variable | Helm value | Description | -|---|---|---|---| -| `--drivers kubernetes` | `OPENSHELL_DRIVERS=kubernetes` | Not applicable | Select the Kubernetes compute driver. | -| `--sandbox-namespace ` | `OPENSHELL_SANDBOX_NAMESPACE` | `server.sandboxNamespace` | Set the namespace for sandbox resources. The Helm chart defaults to the release namespace when left empty. | -| `--sandbox-image ` | `OPENSHELL_SANDBOX_IMAGE` | `server.sandboxImage` | Set the default sandbox image. | -| `--sandbox-image-pull-policy ` | `OPENSHELL_SANDBOX_IMAGE_PULL_POLICY` | `server.sandboxImagePullPolicy` | Set the Kubernetes image pull policy for sandbox pods. | -| `--grpc-endpoint ` | `OPENSHELL_GRPC_ENDPOINT` | `server.grpcEndpoint` | Set the gateway callback endpoint reachable from sandbox pods. | -| `--client-tls-secret-name ` | `OPENSHELL_CLIENT_TLS_SECRET_NAME` | `server.tls.clientTlsSecretName` | Mount sandbox client TLS materials from a Kubernetes secret. | -| Not applicable | Not applicable | `supervisor.sideloadMethod` | How the supervisor binary is delivered into sandbox pods. Leave empty to auto-detect from cluster version. Set to `image-volume` to mount the supervisor OCI image directly as a volume (requires Kubernetes 1.33+ with the ImageVolume feature gate; GA in 1.36), or `init-container` to copy it through an init container on older clusters. | +| Gateway configuration | Helm value | Description | +|---|---|---| +| `compute_drivers = ["kubernetes"]` or `--drivers kubernetes` | Not applicable | Select the Kubernetes compute driver. | +| `[openshell.drivers.kubernetes].namespace` | `server.sandboxNamespace` | Set the namespace for sandbox resources. The Helm chart defaults to the release namespace when left empty. | +| `default_image` | `server.sandboxImage` | Set the default sandbox image. | +| `image_pull_policy` | `server.sandboxImagePullPolicy` | Set the Kubernetes image pull policy for sandbox pods. | +| `grpc_endpoint` | `server.grpcEndpoint` | Set the gateway callback endpoint reachable from sandbox pods. | +| `client_tls_secret_name` | `server.tls.clientTlsSecretName` | Mount sandbox client TLS materials from a Kubernetes secret. | +| `supervisor_sideload_method` | `supervisor.sideloadMethod` | How the supervisor binary is delivered into sandbox pods. Leave empty to auto-detect from cluster version. Set to `image-volume` to mount the supervisor OCI image directly as a volume (requires Kubernetes 1.33+ with the ImageVolume feature gate; GA in 1.36), or `init-container` to copy it through an init container on older clusters. | The Kubernetes driver creates namespaced `agents.x-k8s.io/v1alpha1` `Sandbox` resources from the Kubernetes SIG Apps [agent-sandbox](https://github.com/kubernetes-sigs/agent-sandbox) project. The Agent Sandbox controller turns those resources into sandbox pods and related storage. diff --git a/docs/security/best-practices.mdx b/docs/security/best-practices.mdx index 5123c8c12..0c86069e1 100644 --- a/docs/security/best-practices.mdx +++ b/docs/security/best-practices.mdx @@ -71,8 +71,8 @@ This provides defense-in-depth: even if a container escape vulnerability exists, | Aspect | Detail | |---|---| -| Default | Disabled. Set `server.enableUserNamespaces: true` in the Helm values or `OPENSHELL_ENABLE_USER_NAMESPACES=true` as an environment variable to enable cluster-wide. | -| What you can change | Enable cluster-wide through Helm or environment variable. Override per-sandbox through the `user_namespaces` field on `SandboxTemplate` in the API. | +| Default | Disabled. Set `server.enableUserNamespaces: true` in Helm values or `enable_user_namespaces = true` in the gateway config to enable cluster-wide. | +| What you can change | Enable cluster-wide through Helm or gateway config. Override per-sandbox through the `user_namespaces` field on `SandboxTemplate` in the API. | | Prerequisites | Kubernetes 1.33+ with user namespace support available (beta through 1.35, GA in 1.36+), a container runtime that supports user namespaces (containerd 2.0+, CRI-O 1.25+), and Linux 5.12+ for ID-mapped mounts. | | Risk if enabled with GPU | NVIDIA device plugin compatibility with user namespaces is unverified. OpenShell logs a warning when both GPU and user namespaces are active on the same sandbox. | | Recommendation | Enable on non-GPU clusters running Kubernetes with user namespace support available (1.33+ beta, 1.36+ GA) for stronger host isolation. Test GPU workloads separately before enabling on GPU clusters. | diff --git a/python/openshell/release_formula_test.py b/python/openshell/release_formula_test.py index 1f005f97e..f2f7bf787 100644 --- a/python/openshell/release_formula_test.py +++ b/python/openshell/release_formula_test.py @@ -53,17 +53,18 @@ def test_generate_homebrew_formula_uses_tagged_macos_driver_asset_without_defaul assert 'sha256 "' + "b" * 64 + '"' in formula assert "OPENSHELL_DRIVERS:" not in formula assert "#OPENSHELL_DRIVERS=vm" in formula - assert 'OPENSHELL_DRIVER_DIR: "#{opt_libexec}"' in formula - assert ( - 'OPENSHELL_DOCKER_SUPERVISOR_IMAGE: "ghcr.io/nvidia/openshell/supervisor:0.0.10"' - ) in formula + assert 'OPENSHELL_GATEWAY_CONFIG: "#{var}/openshell/gateway.toml"' in formula + assert 'driver_dir = "#{opt_libexec}"' in formula + assert 'supervisor_image = "ghcr.io/nvidia/openshell/supervisor:0.0.10"' in formula assert 'run opt_libexec/"openshell-gateway-homebrew-service"' in formula assert ( 'docker_tls_dir="${OPENSHELL_DOCKER_TLS_DIR:-${HOME}/.local/state/openshell/homebrew/tls}"' ) in formula - assert 'export OPENSHELL_DOCKER_TLS_CA="${docker_tls_dir}/ca.crt"' in formula + assert 'guest_tls_ca = "${docker_tls_dir}/ca.crt"' in formula assert 'gateway_env="#{var}/openshell/gateway.env"' in formula assert '. "${gateway_env}"' in formula + assert "OPENSHELL_DRIVER_DIR:" not in formula + assert "OPENSHELL_DOCKER_SUPERVISOR_IMAGE:" not in formula assert 'OPENSHELL_DOCKER_TLS_CA: "#{var}/openshell/tls/ca.crt"' not in formula assert "entitlements.atomic_write" in formula assert "brew services restart openshell" in formula diff --git a/rfc/0003-gateway-configuration/README.md b/rfc/0003-gateway-configuration/README.md index ca72b007b..00238cb0f 100644 --- a/rfc/0003-gateway-configuration/README.md +++ b/rfc/0003-gateway-configuration/README.md @@ -78,13 +78,9 @@ log_level = "info" # (kubernetes → podman → docker). VM is never auto-detected. compute_drivers = ["kubernetes"] -# SSH proxy (gateway-side; driver-side equivalents live under each driver). # Note: database_url is a secret and must be supplied via OPENSHELL_DB_URL # (or --db-url) — it is NOT permitted in the file. ssh_session_ttl_secs = 86400 -ssh_gateway_host = "127.0.0.1" -ssh_gateway_port = 8080 -sandbox_ssh_port = 2222 # Service routing — wildcard DNS SANs in `server_sans` also enable sandbox # service URLs under that domain. `enable_loopback_service_http` toggles @@ -134,6 +130,10 @@ host_gateway_ip = "10.0.0.1" ssh_socket_path = "/run/openshell/ssh.sock" [openshell.drivers.docker] +default_image = "ghcr.io/nvidia/openshell/sandbox:latest" +image_pull_policy = "IfNotPresent" +sandbox_namespace = "docker-dev" +grpc_endpoint = "https://host.openshell.internal:8080" network_name = "openshell" supervisor_bin = "/usr/local/libexec/openshell/openshell-sandbox" # optional override supervisor_image = "ghcr.io/nvidia/openshell/supervisor:latest" # used to extract bin @@ -155,6 +155,7 @@ guest_tls_key = "/etc/openshell/certs/client-key.pem" [openshell.drivers.vm] state_dir = "/var/lib/openshell/vm" driver_dir = "/usr/local/libexec/openshell" +grpc_endpoint = "https://host.containers.internal:8080" vcpus = 2 mem_mib = 2048 krun_log_level = 1 diff --git a/snapcraft.yaml b/snapcraft.yaml index 9ac0a14c7..5f27ead1a 100644 --- a/snapcraft.yaml +++ b/snapcraft.yaml @@ -37,23 +37,16 @@ apps: - ssh-keys - system-observe gateway: - command: bin/openshell-gateway + command: bin/openshell-gateway-wrapper daemon: simple refresh-mode: endure environment: OPENSHELL_BIND_ADDRESS: 127.0.0.1 OPENSHELL_SERVER_PORT: 17670 OPENSHELL_DB_URL: "sqlite:$SNAP_COMMON/gateway.db?mode=rwc" - OPENSHELL_GRPC_ENDPOINT: http://host.openshell.internal:17670 OPENSHELL_DISABLE_TLS: "true" OPENSHELL_DRIVERS: docker - OPENSHELL_DOCKER_SUPERVISOR_BIN: "$SNAP/bin/openshell-sandbox" - OPENSHELL_DOCKER_NETWORK_NAME: openshell-snap - OPENSHELL_SANDBOX_IMAGE: ghcr.io/nvidia/openshell-community/sandboxes/base:latest - OPENSHELL_SANDBOX_IMAGE_PULL_POLICY: IfNotPresent - OPENSHELL_SANDBOX_SSH_PORT: 2222 - OPENSHELL_SSH_GATEWAY_HOST: 127.0.0.1 - OPENSHELL_SSH_GATEWAY_PORT: 8080 + OPENSHELL_GATEWAY_CONFIG: "$SNAP_COMMON/gateway.toml" XDG_DATA_HOME: "$SNAP_COMMON" XDG_RUNTIME_DIR: "$SNAP_COMMON" plugs: @@ -96,6 +89,8 @@ parts: "$CRAFT_PART_INSTALL/bin/openshell-gateway" install -D -m 0755 "$CRAFT_PROJECT_DIR/target/release/openshell-sandbox" \ "$CRAFT_PART_INSTALL/bin/openshell-sandbox" + install -D -m 0755 "$CRAFT_PROJECT_DIR/deploy/snap/bin/openshell-gateway-wrapper" \ + "$CRAFT_PART_INSTALL/bin/openshell-gateway-wrapper" install -D -m 0644 "$CRAFT_PROJECT_DIR/LICENSE" \ "$CRAFT_PART_INSTALL/usr/share/doc/openshell/LICENSE" install -D -m 0644 "$CRAFT_PROJECT_DIR/README.md" \ diff --git a/tasks/scripts/gateway-docker.sh b/tasks/scripts/gateway-docker.sh index c5b8d37dd..fd9ba0f6e 100644 --- a/tasks/scripts/gateway-docker.sh +++ b/tasks/scripts/gateway-docker.sh @@ -156,6 +156,22 @@ fi chmod +x "${SUPERVISOR_BIN}" mkdir -p "${STATE_DIR}" +CONFIG_PATH="${STATE_DIR}/gateway.toml" +cat >"${CONFIG_PATH}" <"${CONFIG_PATH}" <&2 + exit 2 + ;; + esac +} + explicit_driver="" while [[ "$#" -gt 0 ]]; do case "$1" in @@ -250,6 +263,7 @@ STATE_DIR="${OPENSHELL_GATEWAY_STATE_DIR:-${ROOT}/.cache/gateway-${DRIVER}}" SANDBOX_NAMESPACE="${OPENSHELL_SANDBOX_NAMESPACE:-${DRIVER}-dev}" SANDBOX_IMAGE="${OPENSHELL_SANDBOX_IMAGE:-ghcr.io/nvidia/openshell-community/sandboxes/base:latest}" SANDBOX_IMAGE_PULL_POLICY="${OPENSHELL_SANDBOX_IMAGE_PULL_POLICY:-IfNotPresent}" +GRPC_ENDPOINT="${OPENSHELL_GRPC_ENDPOINT:-}" LOG_LEVEL="${OPENSHELL_LOG_LEVEL:-info}" if [[ "${DRIVER}" == "podman" ]]; then @@ -278,6 +292,42 @@ if [[ ! -x "${GATEWAY_BIN}" ]]; then fi mkdir -p "${STATE_DIR}" +CONFIG_PATH="${STATE_DIR}/gateway.toml" +cat >"${CONFIG_PATH}" <>"${CONFIG_PATH}" <>"${CONFIG_PATH}" + fi + ;; + podman) + cat >>"${CONFIG_PATH}" <>"${CONFIG_PATH}" + fi + ;; +esac GATEWAY_ENDPOINT="http://127.0.0.1:${PORT}" register_gateway_metadata "${GATEWAY_NAME}" "${GATEWAY_ENDPOINT}" "${PORT}" @@ -295,11 +345,9 @@ echo "Active gateway set to '${GATEWAY_NAME}'. The CLI now targets this gateway echo exec "${GATEWAY_BIN}" \ + --config "${CONFIG_PATH}" \ --port "${PORT}" \ --log-level "${LOG_LEVEL}" \ --drivers "${DRIVER}" \ --disable-tls \ - --db-url "sqlite:${STATE_DIR}/gateway.db?mode=rwc" \ - --sandbox-namespace "${SANDBOX_NAMESPACE}" \ - --sandbox-image "${SANDBOX_IMAGE}" \ - --sandbox-image-pull-policy "${SANDBOX_IMAGE_PULL_POLICY}" + --db-url "sqlite:${STATE_DIR}/gateway.db?mode=rwc" diff --git a/tasks/scripts/package-deb.sh b/tasks/scripts/package-deb.sh index 9d7e3d328..5705e3385 100755 --- a/tasks/scripts/package-deb.sh +++ b/tasks/scripts/package-deb.sh @@ -115,6 +115,8 @@ stage_binary "$OPENSHELL_DRIVER_VM_BINARY" "$pkgroot/usr/libexec/openshell/opens # Per-user systemd unit. Each user enables it via `systemctl --user`. install -D -m 0644 "$src_dir/openshell-gateway.service" \ "$pkgroot/usr/lib/systemd/user/openshell-gateway.service" +install -D -m 0755 "$src_dir/init-gateway-config.sh" \ + "$pkgroot/usr/libexec/openshell/init-gateway-config.sh" # --------------------------------------------------------------------------- # DEBIAN/ control directory diff --git a/tasks/scripts/package-snap.sh b/tasks/scripts/package-snap.sh index 4aafa4ca4..8c299d352 100755 --- a/tasks/scripts/package-snap.sh +++ b/tasks/scripts/package-snap.sh @@ -182,6 +182,8 @@ fi stage_binary "$OPENSHELL_CLI_BINARY" "$snap_root/bin/openshell" stage_binary "$OPENSHELL_GATEWAY_BINARY" "$snap_root/bin/openshell-gateway" stage_binary "$OPENSHELL_DOCKER_SUPERVISOR_BINARY" "$snap_root/bin/openshell-sandbox" +install -D -m 0755 "${repo_root}/deploy/snap/bin/openshell-gateway-wrapper" \ + "$snap_root/bin/openshell-gateway-wrapper" install -D -m 0644 "${repo_root}/LICENSE" "$snap_root/usr/share/doc/openshell/LICENSE" install -D -m 0644 "${repo_root}/README.md" "$snap_root/usr/share/doc/openshell/README.md" diff --git a/tasks/scripts/release.py b/tasks/scripts/release.py index 7406e1adb..df61e0907 100644 --- a/tasks/scripts/release.py +++ b/tasks/scripts/release.py @@ -289,6 +289,13 @@ def install exit 1 fi + gateway_env="#{{var}}/openshell/gateway.env" + if [ -f "${{gateway_env}}" ]; then + set -a + . "${{gateway_env}}" + set +a + fi + docker_tls_dir="${{OPENSHELL_DOCKER_TLS_DIR:-${{HOME}}/.local/state/openshell/homebrew/tls}}" mkdir -p "${{docker_tls_dir}}/client" chmod 700 "${{docker_tls_dir}}" "${{docker_tls_dir}}/client" @@ -296,17 +303,36 @@ def install /usr/bin/install -m 0644 "#{{var}}/openshell/tls/client/tls.crt" "${{docker_tls_dir}}/client/tls.crt" /usr/bin/install -m 0600 "#{{var}}/openshell/tls/client/tls.key" "${{docker_tls_dir}}/client/tls.key" - export OPENSHELL_DOCKER_TLS_CA="${{docker_tls_dir}}/ca.crt" - export OPENSHELL_DOCKER_TLS_CERT="${{docker_tls_dir}}/client/tls.crt" - export OPENSHELL_DOCKER_TLS_KEY="${{docker_tls_dir}}/client/tls.key" - - gateway_env="#{{var}}/openshell/gateway.env" - if [ -f "${{gateway_env}}" ]; then - set -a - . "${{gateway_env}}" - set +a + gateway_config="${{OPENSHELL_GATEWAY_CONFIG:-#{{var}}/openshell/gateway.toml}}" + if [ ! -f "${{gateway_config}}" ]; then + mkdir -p "$(dirname "${{gateway_config}}")" "#{{var}}/openshell/vm-driver" + cat > "${{gateway_config}}" < Starting gateway on port $PORT (state=$STATE_DIR, health=$health_port)" mkdir -p "$STATE_DIR" + cat >"$config" < "$LOG" 2>&1 &