diff --git a/architecture/gateway-single-node.md b/architecture/gateway-single-node.md index 0b2e9a485..2d2351418 100644 --- a/architecture/gateway-single-node.md +++ b/architecture/gateway-single-node.md @@ -21,14 +21,14 @@ Out of scope: - `crates/openshell-cli/src/run.rs`: CLI command implementations (`gateway_start`, `gateway_stop`, `gateway_destroy`, `gateway_info`, `doctor_logs`). - `crates/openshell-cli/src/bootstrap.rs`: Auto-bootstrap helpers for `sandbox create` (offers to deploy a gateway when one is unreachable). - `crates/openshell-bootstrap/src/lib.rs`: Gateway lifecycle orchestration (`deploy_gateway`, `deploy_gateway_with_logs`, `gateway_handle`, `check_existing_deployment`). -- `crates/openshell-bootstrap/src/docker.rs`: Docker API wrappers (network, volume, container, image operations). +- `crates/openshell-bootstrap/src/docker.rs`: Docker API wrappers (per-gateway network, volume, container, image operations). - `crates/openshell-bootstrap/src/image.rs`: Remote image registry pull with XOR-obfuscated distribution credentials. - `crates/openshell-bootstrap/src/runtime.rs`: In-container operations via `docker exec` (health polling, stale node cleanup, deployment restart). - `crates/openshell-bootstrap/src/metadata.rs`: Gateway metadata creation, storage, and active gateway tracking. - `crates/openshell-bootstrap/src/mtls.rs`: Gateway TLS detection and CLI mTLS bundle extraction. - `crates/openshell-bootstrap/src/push.rs`: Local development image push into k3s containerd. - `crates/openshell-bootstrap/src/paths.rs`: XDG path resolution. -- `crates/openshell-bootstrap/src/constants.rs`: Shared constants (image name, network name, container/volume naming). +- `crates/openshell-bootstrap/src/constants.rs`: Shared constants (image name, container/volume/network naming). - `deploy/docker/Dockerfile.cluster`: Container image definition (k3s base + Helm charts + manifests + entrypoint). - `deploy/docker/cluster-entrypoint.sh`: Container entrypoint (DNS proxy, registry config, manifest injection). - `deploy/docker/cluster-healthcheck.sh`: Docker HEALTHCHECK script. @@ -44,7 +44,7 @@ All gateway lifecycle commands live under `openshell gateway`: |---|---| | `openshell gateway start [--name NAME] [--remote user@host] [--ssh-key PATH]` | Provision or update a gateway | | `openshell gateway stop [--name NAME] [--remote user@host]` | Stop the container (preserves state) | -| `openshell gateway destroy [--name NAME] [--remote user@host]` | Destroy container, attached volumes, metadata, and network | +| `openshell gateway destroy [--name NAME] [--remote user@host]` | Destroy container, attached volumes, per-gateway network, and metadata | | `openshell gateway info [--name NAME]` | Show deployment details (endpoint, SSH host) | | `openshell status` | Show gateway health via gRPC/HTTP | | `openshell doctor logs [--name NAME] [--remote user@host] [--tail N]` | Fetch gateway container logs | @@ -91,7 +91,7 @@ sequenceDiagram Note over B,R: Docker socket APIs only, no extra host dependencies B->>B: resolve SSH host for extra TLS SANs - B->>R: ensure_network (bridge, attachable) + B->>R: ensure_network (per-gateway bridge, attachable) B->>R: ensure_volume B->>R: ensure_container (privileged, k3s server) B->>R: start_container @@ -159,7 +159,7 @@ Image ref resolution in `default_gateway_image_ref()`: For the target daemon (local or remote): -1. **Ensure bridge network** `openshell-cluster` (attachable, bridge driver) via `ensure_network()`. +1. **Ensure bridge network** `openshell-cluster-{name}` (attachable, bridge driver) via `ensure_network()`. Each gateway gets its own isolated Docker network. 2. **Ensure volume** `openshell-cluster-{name}` via `ensure_volume()`. 3. **Compute extra TLS SANs**: - For **local deploys**: Check `DOCKER_HOST` for a non-loopback `tcp://` endpoint (e.g., `tcp://docker:2375` in CI). If found, extract the host as an extra SAN. The function `local_gateway_host_from_docker_host()` skips `localhost`, `127.0.0.1`, and `::1`. @@ -168,7 +168,7 @@ For the target daemon (local or remote): - k3s server command: `server --disable=traefik --tls-san=127.0.0.1 --tls-san=localhost --tls-san=host.docker.internal` plus computed extra SANs. - Privileged mode. - Volume bind mount: `openshell-cluster-{name}:/var/lib/rancher/k3s`. - - Network: `openshell-cluster`. + - Network: `openshell-cluster-{name}` (per-gateway bridge network). - Extra host: `host.docker.internal:host-gateway`. - Port mappings: @@ -349,7 +349,7 @@ flowchart LR 1. Stop the container. 2. Remove the container (`force=true`). Tolerates 404. 3. Remove the volume (`force=true`). Tolerates 404. -4. Remove the network if no containers remain attached (`cleanup_network_if_unused()`). +4. Force-remove the per-gateway network via `force_remove_network()`, disconnecting any stale endpoints first. **CLI layer** (`gateway_destroy()` in `run.rs` additionally): @@ -359,7 +359,7 @@ flowchart LR ## Idempotency and Error Behavior - Re-running deploy is safe: - - Existing network/volume are reused (inspect before create). + - Network is recreated on each deploy to guarantee a clean state; volume is reused (inspect before create). - If a container exists with the same image ID, it is reused; if the image changed, the container is recreated. - `start_container` tolerates already-running state (409). - In interactive terminals, the CLI prompts the user to optionally destroy and recreate an existing gateway before redeploying. diff --git a/crates/openshell-bootstrap/src/constants.rs b/crates/openshell-bootstrap/src/constants.rs index 0c6a93821..ff283b3ea 100644 --- a/crates/openshell-bootstrap/src/constants.rs +++ b/crates/openshell-bootstrap/src/constants.rs @@ -19,3 +19,7 @@ pub fn container_name(name: &str) -> String { pub fn volume_name(name: &str) -> String { format!("openshell-cluster-{name}") } + +pub fn network_name(name: &str) -> String { + format!("openshell-cluster-{name}") +} diff --git a/crates/openshell-bootstrap/src/docker.rs b/crates/openshell-bootstrap/src/docker.rs index f0040d21c..3812f72b4 100644 --- a/crates/openshell-bootstrap/src/docker.rs +++ b/crates/openshell-bootstrap/src/docker.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use crate::RemoteOptions; -use crate::constants::{container_name, volume_name}; +use crate::constants::{container_name, network_name, volume_name}; use crate::image::{ self, DEFAULT_IMAGE_REPO_BASE, DEFAULT_REGISTRY, DEFAULT_REGISTRY_USERNAME, parse_image_ref, }; @@ -10,10 +10,11 @@ use bollard::API_DEFAULT_VERSION; use bollard::Docker; use bollard::errors::Error as BollardError; use bollard::models::{ - ContainerCreateBody, DeviceRequest, HostConfig, PortBinding, VolumeCreateRequest, + ContainerCreateBody, DeviceRequest, HostConfig, NetworkCreateRequest, NetworkDisconnectRequest, + PortBinding, VolumeCreateRequest, }; use bollard::query_parameters::{ - CreateContainerOptions, CreateImageOptions, InspectContainerOptions, + CreateContainerOptions, CreateImageOptions, InspectContainerOptions, InspectNetworkOptions, ListContainersOptionsBuilder, RemoveContainerOptions, RemoveImageOptions, RemoveVolumeOptions, StartContainerOptions, }; @@ -185,6 +186,55 @@ pub async fn find_gateway_container(docker: &Docker, port: Option) -> Resul } } +/// Create a fresh Docker bridge network for the gateway. +/// +/// Always removes and recreates the network to guarantee a clean state. +/// Stale Docker networks (e.g., from a previous interrupted destroy or +/// Docker Desktop restart) can leave broken routing that causes the +/// container to fail with "no default routes found". +pub async fn ensure_network(docker: &Docker, net_name: &str) -> Result<()> { + force_remove_network(docker, net_name).await?; + + // Docker may return a 409 conflict if the previous network teardown has + // not fully completed in the daemon. Retry a few times with back-off, + // re-attempting the removal before each create. + let mut last_err = None; + for attempt in 0u64..5 { + if attempt > 0 { + tokio::time::sleep(std::time::Duration::from_millis(500 * attempt)).await; + // Re-attempt removal in case the previous teardown has now settled. + force_remove_network(docker, net_name).await?; + } + match docker + .create_network(NetworkCreateRequest { + name: net_name.to_string(), + driver: Some("bridge".to_string()), + attachable: Some(true), + ..Default::default() + }) + .await + { + Ok(_) => return Ok(()), + Err(err) if is_conflict(&err) => { + tracing::debug!( + "Network create conflict (attempt {}/5), retrying: {}", + attempt + 1, + err, + ); + last_err = Some(err); + } + Err(err) => { + return Err(err) + .into_diagnostic() + .wrap_err("failed to create Docker network"); + } + } + } + Err(last_err.expect("at least one retry attempt")) + .into_diagnostic() + .wrap_err("failed to create Docker network after retries (network still in use)") +} + pub async fn ensure_volume(docker: &Docker, name: &str) -> Result<()> { match docker.inspect_volume(name).await { Ok(_) => return Ok(()), @@ -328,6 +378,7 @@ pub async fn ensure_container( privileged: Some(true), port_bindings: Some(port_bindings), binds: Some(vec![format!("{}:/var/lib/rancher/k3s", volume_name(name))]), + network_mode: Some(network_name(name)), // Add host.docker.internal mapping for DNS resolution // This allows the entrypoint script to configure CoreDNS to use the host gateway extra_hosts: Some(vec!["host.docker.internal:host-gateway".to_string()]), @@ -629,6 +680,21 @@ pub async fn destroy_gateway_resources(docker: &Docker, name: &str) -> Result<() .ok() .and_then(|info| info.image); + // Explicitly disconnect the container from the per-gateway network before + // removing it. This ensures Docker tears down the network endpoint + // synchronously so port bindings are released immediately and the + // subsequent network cleanup sees zero connected containers. + let net_name = network_name(name); + let _ = docker + .disconnect_network( + &net_name, + NetworkDisconnectRequest { + container: container_name.clone(), + force: Some(true), + }, + ) + .await; + let _ = stop_container(docker, &container_name).await; let remove_container = docker @@ -700,9 +766,52 @@ pub async fn destroy_gateway_resources(docker: &Docker, name: &str) -> Result<() return Err(err).into_diagnostic(); } + // Force-remove the per-gateway network during a full destroy. First + // disconnect any stale endpoints that Docker may still report (race + // between container removal and network bookkeeping), then remove the + // network itself. + force_remove_network(docker, &net_name).await?; + Ok(()) } +/// Forcefully remove a Docker network, disconnecting any remaining +/// containers first. This ensures that stale Docker network endpoints +/// cannot prevent port bindings from being released. +async fn force_remove_network(docker: &Docker, net_name: &str) -> Result<()> { + let network = match docker + .inspect_network(net_name, None::) + .await + { + Ok(info) => info, + Err(err) if is_not_found(&err) => return Ok(()), + Err(err) => return Err(err).into_diagnostic(), + }; + + // Disconnect any containers still attached to the network. + if let Some(containers) = network.containers { + for (id, _) in containers { + let _ = docker + .disconnect_network( + net_name, + NetworkDisconnectRequest { + container: id, + force: Some(true), + }, + ) + .await; + } + } + + match docker.remove_network(net_name).await { + Ok(()) => Ok(()), + Err(err) if is_not_found(&err) => Ok(()), + Err(err) => Err(err) + .into_diagnostic() + .wrap_err("failed to remove Docker network"), + } +} + fn is_not_found(err: &BollardError) -> bool { matches!( err, diff --git a/crates/openshell-bootstrap/src/lib.rs b/crates/openshell-bootstrap/src/lib.rs index dcc9ed08f..bf6599b4b 100644 --- a/crates/openshell-bootstrap/src/lib.rs +++ b/crates/openshell-bootstrap/src/lib.rs @@ -26,11 +26,12 @@ use miette::{IntoDiagnostic, Result}; use std::sync::{Arc, Mutex}; use crate::constants::{ - CLIENT_TLS_SECRET_NAME, SERVER_CLIENT_CA_SECRET_NAME, SERVER_TLS_SECRET_NAME, volume_name, + CLIENT_TLS_SECRET_NAME, SERVER_CLIENT_CA_SECRET_NAME, SERVER_TLS_SECRET_NAME, network_name, + volume_name, }; use crate::docker::{ check_existing_gateway, check_port_conflicts, destroy_gateway_resources, ensure_container, - ensure_image, ensure_volume, start_container, stop_container, + ensure_image, ensure_network, ensure_volume, start_container, stop_container, }; use crate::metadata::{ create_gateway_metadata, create_gateway_metadata_with_host, local_gateway_host, @@ -309,6 +310,7 @@ where // All subsequent operations use the target Docker (remote or local) log("[status] Initializing environment".to_string()); + ensure_network(&target_docker, &network_name(&name)).await?; ensure_volume(&target_docker, &volume_name(&name)).await?; // Compute extra TLS SANs for remote deployments so the gateway and k3s