Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 25 additions & 6 deletions internal/providers/compute/k8s/build_context.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@ package k8s
//
// The legacy path stores the tarball in a k8s Secret which etcd caps at ~1 MiB.
// That cap routinely defeats agents shipping anything more than a Dockerfile +
// a tiny entrypoint. This file implements the S3 path: upload the tarball
// once via minio-go, then point kaniko at the resulting s3:// URL.
// a tiny entrypoint. This file uploads the tarball to MinIO and hands kaniko a
// short-lived presigned HTTP URL — avoiding the AWS-SDK-v2 path-style quirks
// that broke the s3:// approach (vhost-style hostname resolution against
// in-cluster MinIO DNS).
//
// Practical new cap = the multipart limit enforced in the deploy handler
// (currently 50 MiB) instead of the etcd object-size limit.
Expand All @@ -14,19 +16,33 @@ import (
"bytes"
"context"
"fmt"
"net/url"
"time"

"github.com/minio/minio-go/v7"
"github.com/minio/minio-go/v7/pkg/credentials"
)

// presignTTL is the lifetime of the kaniko-facing context URL. Short enough
// that a leaked link expires before it matters; long enough that a slow
// kaniko fetch finishes. Kaniko builds typically take 30s–3min on the
// provisioned build pod (250m CPU); 30 min is safe.
const presignTTL = 30 * time.Minute

// uploadBuildContext writes the tarball to MinIO and returns:
// - s3URL: the s3://bucket/key URL kaniko's --context flag accepts
// - contextURL: a presigned HTTPS-style URL kaniko reads via --context=<url>
// - objectKey: the bucket-relative key, so the caller can delete it post-build
//
// Returns ("", "", nil) when buildCtx is unconfigured — caller must fall back
// to the legacy Secret-based delivery.
func (p *K8sProvider) uploadBuildContext(ctx context.Context, appID string, tarball []byte) (s3URL, objectKey string, err error) {
//
// Why presigned-HTTP instead of s3://: kaniko v1.23 ships AWS SDK v2 which
// resolves S3 endpoints in vhost style by default. The env-only path-style
// switch (S3_FORCE_PATH_STYLE) was an SDK v1 knob and is silently ignored;
// AWS SDK v2 only honours an UsePathStyle option set in code, which we cannot
// inject. Generating a presigned URL on our side sidesteps the whole AWS-SDK
// path/vhost decision: kaniko receives a plain HTTP GET URL.
func (p *K8sProvider) uploadBuildContext(ctx context.Context, appID string, tarball []byte) (contextURL, objectKey string, err error) {
if p.buildCtx.Endpoint == "" {
return "", "", nil
}
Expand Down Expand Up @@ -61,6 +77,9 @@ func (p *K8sProvider) uploadBuildContext(ctx context.Context, appID string, tarb
return "", "", fmt.Errorf("uploadBuildContext: put object: %w", err)
}

s3URL = fmt.Sprintf("s3://%s/%s", p.buildCtx.BucketName, objectKey)
return s3URL, objectKey, nil
presignedURL, err := client.PresignedGetObject(ctx, p.buildCtx.BucketName, objectKey, presignTTL, url.Values{})
if err != nil {
return "", "", fmt.Errorf("uploadBuildContext: presign get: %w", err)
}
return presignedURL.String(), objectKey, nil
}
71 changes: 46 additions & 25 deletions internal/providers/compute/k8s/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -813,30 +813,21 @@ func (p *K8sProvider) ensureRegistryAuthInNS(ctx context.Context, ns, name strin
}

// createKanikoJob spawns a one-shot Job that builds and pushes the image.
// When s3ContextURL is non-empty kaniko reads the build context directly from
// MinIO via the S3 path (no 1 MiB cap); when empty it falls back to reading a
// tar Secret mounted at /workspace.
func (p *K8sProvider) createKanikoJob(ctx context.Context, ns, jobName, ctxSecret, authSecret, imageTag, s3ContextURL string) error {
// When httpContextURL is non-empty an initContainer curls the build context
// from MinIO into a shared emptyDir; kaniko then reads via the standard
// tar:// path. When empty it falls back to a tar Secret mounted at /workspace.
//
// Why not --context=s3://: kaniko v1.23 ships AWS SDK v2 which only resolves
// S3 endpoints in vhost style; the path-style env switches are SDK v1 and
// silently ignored, so the bucket name resolves as a non-existent subdomain.
// Why not --context=https://: MinIO is plaintext HTTP in-cluster, kaniko's
// HTTP context list does not include http://. The init-container sidesteps
// both — we control the fetch, kaniko sees a local tar volume.
func (p *K8sProvider) createKanikoJob(ctx context.Context, ns, jobName, ctxSecret, authSecret, imageTag, httpContextURL string) error {
backoff := int32(0)
ttl := int32(300)

useS3 := s3ContextURL != ""
contextArg := "--context=tar:///workspace/context.tar.gz"
if useS3 {
contextArg = "--context=" + s3ContextURL
}

// AWS env so kaniko's S3 reader talks to in-cluster MinIO rather than the
// AWS metadata endpoint. Honored only when --context=s3://, harmless
// otherwise — applied unconditionally to keep the spec simple.
envVars := []corev1.EnvVar{
{Name: "AWS_ACCESS_KEY_ID", Value: p.buildCtx.AccessKey},
{Name: "AWS_SECRET_ACCESS_KEY", Value: p.buildCtx.SecretKey},
{Name: "AWS_REGION", Value: "us-east-1"},
{Name: "S3_FORCE_PATH_STYLE", Value: "true"},
{Name: "AWS_S3_ENDPOINT", Value: "http://" + p.buildCtx.Endpoint},
{Name: "AWS_ENDPOINT_URL_S3", Value: "http://" + p.buildCtx.Endpoint},
}
useHTTP := httpContextURL != ""

volumes := []corev1.Volume{{
Name: "registry-auth",
Expand All @@ -852,7 +843,37 @@ func (p *K8sProvider) createKanikoJob(ctx context.Context, ns, jobName, ctxSecre
mounts := []corev1.VolumeMount{
{Name: "registry-auth", MountPath: "/kaniko/.docker"},
}
if !useS3 {

var initContainers []corev1.Container
if useHTTP {
// Shared emptyDir between init-container (curl) and main kaniko container.
volumes = append(volumes, corev1.Volume{
Name: "build-context",
VolumeSource: corev1.VolumeSource{EmptyDir: &corev1.EmptyDirVolumeSource{}},
})
mounts = append(mounts, corev1.VolumeMount{Name: "build-context", MountPath: "/workspace"})

initContainers = []corev1.Container{{
Name: "fetch-context",
Image: "curlimages/curl:8.10.1",
Command: []string{"sh", "-c", "curl --fail --silent --show-error --max-time 120 -o /workspace/context.tar.gz \"$URL\""},
Env: []corev1.EnvVar{{Name: "URL", Value: httpContextURL}},
VolumeMounts: []corev1.VolumeMount{
{Name: "build-context", MountPath: "/workspace"},
},
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("50m"),
corev1.ResourceMemory: resource.MustParse("32Mi"),
},
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("250m"),
corev1.ResourceMemory: resource.MustParse("64Mi"),
},
},
}}
} else {
// Legacy Secret path (≤1 MiB).
volumes = append(volumes, corev1.Volume{
Name: "build-context",
VolumeSource: corev1.VolumeSource{
Expand All @@ -875,19 +896,19 @@ func (p *K8sProvider) createKanikoJob(ctx context.Context, ns, jobName, ctxSecre
TTLSecondsAfterFinished: &ttl,
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
RestartPolicy: corev1.RestartPolicyNever,
RestartPolicy: corev1.RestartPolicyNever,
InitContainers: initContainers,
Containers: []corev1.Container{{
Name: "kaniko",
Image: "gcr.io/kaniko-project/executor:v1.23.2",
Args: []string{
contextArg,
"--context=tar:///workspace/context.tar.gz",
"--destination=" + imageTag,
"--snapshot-mode=redo",
"--cache=false",
"--single-snapshot",
"--cleanup",
},
Env: envVars,
// Explicit resources override the per-namespace LimitRange
// default (hobby tier defaults to 50m/256Mi which throttles
// kaniko + npm install to 5+ minutes). 250m/512Mi keeps a
Expand Down
86 changes: 55 additions & 31 deletions internal/providers/compute/k8s/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,20 @@ func TestKanikoJobHasExplicitResources(t *testing.T) {
}
}

// TestKanikoJobUsesS3ContextWhenURLSet guards the build-context lift past the
// k8s Secret's ~1 MiB cap (etcd object size limit). When s3ContextURL is set,
// kaniko's --context arg becomes the s3:// URL and the build-context Secret
// volume is absent; AWS env vars are set so kaniko's S3 reader talks to MinIO.
func TestKanikoJobUsesS3ContextWhenURLSet(t *testing.T) {
// TestKanikoJobUsesInitContainerWhenHTTPURLSet guards the build-context lift
// past the k8s Secret's ~1 MiB cap. When httpContextURL is set, the Job grows
// an initContainer that curls the presigned URL into a shared emptyDir; the
// main kaniko container then reads the tarball via the standard tar://
// volume path.
//
// Earlier attempts (s3:// and tar.gz+http://) failed live because:
// - AWS SDK v2 ignores S3_FORCE_PATH_STYLE → vhost-style DNS lookup against
// in-cluster MinIO fails.
// - kaniko v1.23 doesn't accept tar.gz+ scheme prefix.
// - kaniko's HTTPS context fetcher rejects plaintext http://.
// The init-container path sidesteps all three: curl handles the HTTP fetch,
// kaniko sees only a local file.
func TestKanikoJobUsesInitContainerWhenHTTPURLSet(t *testing.T) {
cs := fake.NewSimpleClientset()
p := &K8sProvider{
clientset: cs,
Expand All @@ -65,49 +74,64 @@ func TestKanikoJobUsesS3ContextWhenURLSet(t *testing.T) {
}

const ns, jobName = "instant-deploy-test", "build-test"
s3URL := "s3://instant-build-contexts/abc/20260511T000000Z.tar.gz"
if err := p.createKanikoJob(context.Background(), ns, jobName, "ctx-sec", "auth-sec", "ghcr.io/x/y:latest", s3URL); err != nil {
httpURL := "http://minio.test:9000/instant-build-contexts/abc/20260511T000000Z.tar.gz?X-Amz-Signature=fake"
if err := p.createKanikoJob(context.Background(), ns, jobName, "ctx-sec", "auth-sec", "ghcr.io/x/y:latest", httpURL); err != nil {
t.Fatalf("createKanikoJob: %v", err)
}

job, err := cs.BatchV1().Jobs(ns).Get(context.Background(), jobName, metav1.GetOptions{})
if err != nil {
t.Fatalf("get job: %v", err)
}
c := job.Spec.Template.Spec.Containers[0]
podSpec := job.Spec.Template.Spec

// --context arg points at the s3:// URL, not the tar:// volume mount.
hasS3Context := false
for _, a := range c.Args {
if a == "--context="+s3URL {
hasS3Context = true
}
if a == "--context=tar:///workspace/context.tar.gz" {
t.Errorf("kaniko still references tar:// mount when s3ContextURL is set; args=%v", c.Args)
// Init-container exists, uses curl, and points at the URL.
if len(podSpec.InitContainers) != 1 {
t.Fatalf("expected 1 init-container (curl fetch); got %d", len(podSpec.InitContainers))
}
ic := podSpec.InitContainers[0]
if ic.Image == "" || ic.Image[:7] != "curlima" {
t.Errorf("init-container image %q does not look like a curl image", ic.Image)
}
gotURL := ""
for _, e := range ic.Env {
if e.Name == "URL" {
gotURL = e.Value
}
}
if !hasS3Context {
t.Errorf("kaniko --context flag missing for s3URL %q; args=%v", s3URL, c.Args)
if gotURL != httpURL {
t.Errorf("init-container URL env = %q; want %q", gotURL, httpURL)
}

// AWS env so kaniko talks to MinIO, not the AWS metadata endpoint.
env := map[string]string{}
for _, e := range c.Env {
env[e.Name] = e.Value
}
for _, must := range []string{"AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_S3_ENDPOINT", "S3_FORCE_PATH_STYLE"} {
if env[must] == "" {
t.Errorf("kaniko env var %s is empty — S3 reader will fall back to AWS metadata", must)
// Main kaniko reads from the local tar volume.
c := podSpec.Containers[0]
hasTarContext := false
for _, a := range c.Args {
if a == "--context=tar:///workspace/context.tar.gz" {
hasTarContext = true
}
}
if got := env["AWS_S3_ENDPOINT"]; got != "http://minio.test:9000" {
t.Errorf("AWS_S3_ENDPOINT = %q; want http://minio.test:9000", got)
if !hasTarContext {
t.Errorf("kaniko must read --context=tar:///workspace/context.tar.gz when init-container delivers the tarball; got args=%v", c.Args)
}

// No build-context Secret volume when using S3.
for _, v := range job.Spec.Template.Spec.Volumes {
// build-context volume is emptyDir, not a Secret.
for _, v := range podSpec.Volumes {
if v.Name == "build-context" {
t.Errorf("build-context Secret volume should be absent when using S3, but found one")
if v.EmptyDir == nil {
t.Errorf("build-context volume must be emptyDir under the init-container path; got %#v", v.VolumeSource)
}
if v.Secret != nil {
t.Errorf("build-context volume must not be a Secret under the init-container path")
}
}
}

// No AWS_ env vars on the main kaniko container — they were the failed v1
// switches and serve no purpose in the init-container path.
for _, e := range c.Env {
if e.Name == "AWS_ACCESS_KEY_ID" || e.Name == "S3_FORCE_PATH_STYLE" {
t.Errorf("kaniko env should not include legacy AWS S3 envs; found %s", e.Name)
}
}
}
Expand Down