diff --git a/cmd/wfctl/type_registry.go b/cmd/wfctl/type_registry.go index 3aef6f1b..272d4640 100644 --- a/cmd/wfctl/type_registry.go +++ b/cmd/wfctl/type_registry.go @@ -600,6 +600,11 @@ func KnownStepTypes() map[string]StepTypeInfo { Plugin: "pipelinesteps", ConfigKeys: []string{"provider", "scheme", "secret", "secret_from", "header", "signature_header", "url_reconstruction", "include_form_params", "error_status"}, }, + "step.base64_decode": { + Type: "step.base64_decode", + Plugin: "pipelinesteps", + ConfigKeys: []string{"input_from", "format", "allowed_types", "max_size_bytes", "validate_magic_bytes"}, + }, "step.cache_get": { Type: "step.cache_get", Plugin: "pipelinesteps", diff --git a/module/pipeline_step_base64_decode.go b/module/pipeline_step_base64_decode.go new file mode 100644 index 00000000..1104dce4 --- /dev/null +++ b/module/pipeline_step_base64_decode.go @@ -0,0 +1,333 @@ +package module + +import ( + "context" + "encoding/base64" + "fmt" + "mime" + "net/http" + "strings" + + "github.com/CrisisTextLine/modular" +) + +const ( + base64DecodeFormatDataURI = "data_uri" + base64DecodeFormatRawBase64 = "raw_base64" +) + +// mimeToExtension maps common MIME types to their canonical file extensions. +var mimeToExtension = map[string]string{ + "image/jpeg": ".jpg", + "image/png": ".png", + "image/gif": ".gif", + "image/webp": ".webp", + "image/bmp": ".bmp", + "image/tiff": ".tiff", + "image/svg+xml": ".svg", + "image/x-icon": ".ico", + "application/pdf": ".pdf", + "application/zip": ".zip", + "text/plain": ".txt", + "text/html": ".html", + "text/css": ".css", + "text/javascript": ".js", + "application/json": ".json", + "application/xml": ".xml", + "audio/mpeg": ".mp3", + "audio/ogg": ".ogg", + "audio/wav": ".wav", + "video/mp4": ".mp4", + "video/webm": ".webm", + "video/ogg": ".ogv", + "application/octet-stream": ".bin", + "application/gzip": ".gz", + "application/x-tar": ".tar", + "application/vnd.ms-excel": ".xls", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx", + "application/msword": ".doc", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx", +} + +// Base64DecodeStep decodes base64-encoded content (raw or data-URI), optionally +// validating the MIME type and decoded size. +type Base64DecodeStep struct { + name string + inputFrom string + format string + allowedTypes []string + maxSizeBytes int + validateMagic bool +} + +// NewBase64DecodeStepFactory returns a StepFactory that creates Base64DecodeStep instances. +func NewBase64DecodeStepFactory() StepFactory { + return func(name string, config map[string]any, _ modular.Application) (PipelineStep, error) { + inputFrom, _ := config["input_from"].(string) + if inputFrom == "" { + return nil, fmt.Errorf("base64_decode step %q: 'input_from' is required", name) + } + + format, _ := config["format"].(string) + if format == "" { + format = base64DecodeFormatDataURI + } + if format != base64DecodeFormatDataURI && format != base64DecodeFormatRawBase64 { + return nil, fmt.Errorf("base64_decode step %q: 'format' must be %q or %q", name, base64DecodeFormatDataURI, base64DecodeFormatRawBase64) + } + + var allowedTypes []string + if raw, ok := config["allowed_types"].([]any); ok { + for _, t := range raw { + if s, ok := t.(string); ok && s != "" { + allowedTypes = append(allowedTypes, strings.ToLower(s)) + } + } + } + + maxSizeBytes := 0 + switch v := config["max_size_bytes"].(type) { + case int: + maxSizeBytes = v + case int64: + maxSizeBytes = int(v) + case float64: + maxSizeBytes = int(v) + } + + validateMagic, _ := config["validate_magic_bytes"].(bool) + + return &Base64DecodeStep{ + name: name, + inputFrom: inputFrom, + format: format, + allowedTypes: allowedTypes, + maxSizeBytes: maxSizeBytes, + validateMagic: validateMagic, + }, nil + } +} + +// Name returns the step name. +func (s *Base64DecodeStep) Name() string { return s.name } + +// Execute decodes the base64 content from the pipeline context, validates it, +// and returns structured metadata plus the re-encoded base64 data. +func (s *Base64DecodeStep) Execute(_ context.Context, pc *PipelineContext) (*StepResult, error) { + // Resolve the input value from the pipeline context. + // A missing or unresolvable path is treated as invalid input rather than a + // hard error, consistent with the step's non-fatal validation semantics. + raw, err := s.resolveInput(pc) + if err != nil { + return s.invalid(fmt.Sprintf("could not resolve input_from %q: %v", s.inputFrom, err)) + } + + encoded, ok := raw.(string) + if !ok { + return s.invalid(fmt.Sprintf("input at %q is not a string (got %T)", s.inputFrom, raw)) + } + + // Parse the encoded string and determine the claimed MIME type. + var claimedMIME, b64data string + switch s.format { + case base64DecodeFormatDataURI: + claimedMIME, b64data, err = parseDataURI(encoded) + if err != nil { + return s.invalid(fmt.Sprintf("invalid data-URI: %v", err)) + } + case base64DecodeFormatRawBase64: + b64data = encoded + } + + // Guard against excessively large allocations when max_size_bytes is set. + // Base64 encodes 3 bytes into 4 characters, so the decoded length is at + // most ceil(len(b64data)/4)*3. If that upper bound already exceeds the + // limit we can reject without decoding the full payload. + if s.maxSizeBytes > 0 { + estimatedMax := (len(b64data)/4 + 1) * 3 + if estimatedMax > s.maxSizeBytes { + // Perform a precise check only when the estimate exceeds the limit. + // We still need to decode to get the exact size, but we use the + // estimate as an early-exit hint for clearly oversized inputs. + if len(b64data) > (s.maxSizeBytes/3+1)*4 { + return s.invalid(fmt.Sprintf("encoded length indicates decoded size would exceed max_size_bytes %d", s.maxSizeBytes)) + } + } + } + + // Decode the base64 payload. + decoded, err := base64.StdEncoding.DecodeString(b64data) + if err != nil { + // Try URL-safe / padded variants used in some base64 encoders. + decoded, err = base64.RawStdEncoding.DecodeString(b64data) + if err != nil { + decoded, err = base64.URLEncoding.DecodeString(b64data) + if err != nil { + decoded, err = base64.RawURLEncoding.DecodeString(b64data) + if err != nil { + return s.invalid("base64 decode failed: not valid base64") + } + } + } + } + + // Enforce max size with exact decoded length. + if s.maxSizeBytes > 0 && len(decoded) > s.maxSizeBytes { + return s.invalid(fmt.Sprintf("decoded size %d exceeds max_size_bytes %d", len(decoded), s.maxSizeBytes)) + } + + // Detect actual MIME type via magic bytes (using Go's built-in sniffer). + detectedMIME := http.DetectContentType(decoded) + // DetectContentType may include parameters (e.g. "text/plain; charset=utf-8"); strip them. + detectedMIME, _, _ = mime.ParseMediaType(detectedMIME) + if detectedMIME == "" { + detectedMIME = "application/octet-stream" + } + + // Determine the effective content type: prefer the claimed type from the + // data-URI when not validating magic bytes; otherwise use the detected type. + contentType := detectedMIME + if s.format == base64DecodeFormatDataURI && claimedMIME != "" && !s.validateMagic { + contentType = claimedMIME + } + + // Validate magic bytes: the detected MIME should match the claimed one. + if s.validateMagic && s.format == base64DecodeFormatDataURI && claimedMIME != "" { + if !mimeTypesCompatible(detectedMIME, claimedMIME) { + return s.invalid(fmt.Sprintf("magic bytes indicate %q but data-URI claims %q", detectedMIME, claimedMIME)) + } + } + + // Check against the allowed-types whitelist. + if len(s.allowedTypes) > 0 { + if !mimeAllowed(contentType, s.allowedTypes) { + return s.invalid(fmt.Sprintf("content type %q is not in allowed_types", contentType)) + } + } + + ext := extensionForMIME(contentType) + + return &StepResult{ + Output: map[string]any{ + "content_type": contentType, + "extension": ext, + "size_bytes": len(decoded), + "data": base64.StdEncoding.EncodeToString(decoded), + "valid": true, + }, + }, nil +} + +// invalid returns a StepResult with valid=false and a reason field (no error). +// All output keys are present with zero/empty defaults so that downstream +// template expressions that reference e.g. {{ .content_type }} do not fail. +func (s *Base64DecodeStep) invalid(reason string) (*StepResult, error) { + return &StepResult{ + Output: map[string]any{ + "valid": false, + "reason": reason, + "content_type": "", + "extension": "", + "size_bytes": 0, + "data": "", + }, + }, nil +} + +// resolveInput reads the value at s.inputFrom from the pipeline context. +func (s *Base64DecodeStep) resolveInput(pc *PipelineContext) (any, error) { + data := make(map[string]any) + for k, v := range pc.Current { + data[k] = v + } + if len(pc.StepOutputs) > 0 { + steps := make(map[string]any, len(pc.StepOutputs)) + for k, v := range pc.StepOutputs { + steps[k] = v + } + data["steps"] = steps + } + return resolveDottedPath(data, s.inputFrom) +} + +// parseDataURI splits a data-URI string (data:[;base64],) into its +// MIME type and base64-encoded payload. Returns an error if the format is wrong +// or if the encoding is not ";base64". +func parseDataURI(s string) (mimeType, b64data string, err error) { + if !strings.HasPrefix(s, "data:") { + return "", "", fmt.Errorf("missing 'data:' prefix") + } + s = s[len("data:"):] + + commaIdx := strings.IndexByte(s, ',') + if commaIdx < 0 { + return "", "", fmt.Errorf("missing ',' separator") + } + + meta := s[:commaIdx] + b64data = s[commaIdx+1:] + + parts := strings.Split(meta, ";") + mimeType = strings.ToLower(strings.TrimSpace(parts[0])) + if mimeType == "" { + mimeType = "text/plain" + } + + // Verify that the encoding is base64 (";base64" must be present). + isBase64 := false + for _, p := range parts[1:] { + if strings.TrimSpace(p) == "base64" { + isBase64 = true + break + } + } + if !isBase64 { + return "", "", fmt.Errorf("only base64-encoded data-URIs are supported (missing ';base64')") + } + + return mimeType, b64data, nil +} + +// mimeTypesCompatible returns true when detected and claimed MIME types are +// considered to represent the same file format. It handles common equivalences +// (e.g. "image/jpg" vs "image/jpeg") and also accepts an exact match. +func mimeTypesCompatible(detected, claimed string) bool { + if detected == claimed { + return true + } + // Normalise jpeg variants. + normalize := func(m string) string { + m = strings.ToLower(m) + if m == "image/jpg" { + return "image/jpeg" + } + return m + } + return normalize(detected) == normalize(claimed) +} + +// mimeAllowed returns true when contentType matches one of the allowed types. +// The comparison is case-insensitive and strips any parameters. +func mimeAllowed(contentType string, allowed []string) bool { + ct := strings.ToLower(strings.TrimSpace(contentType)) + for _, a := range allowed { + if strings.ToLower(strings.TrimSpace(a)) == ct { + return true + } + } + return false +} + +// extensionForMIME returns a canonical file extension for a MIME type, falling +// back to the standard library's mime.ExtensionsByType, and ultimately ".bin". +func extensionForMIME(mimeType string) string { + if ext, ok := mimeToExtension[strings.ToLower(mimeType)]; ok { + return ext + } + // Try stdlib + exts, err := mime.ExtensionsByType(mimeType) + if err == nil && len(exts) > 0 { + return exts[0] + } + return ".bin" +} diff --git a/module/pipeline_step_base64_decode_test.go b/module/pipeline_step_base64_decode_test.go new file mode 100644 index 00000000..d9f2fddd --- /dev/null +++ b/module/pipeline_step_base64_decode_test.go @@ -0,0 +1,480 @@ +package module + +import ( + "context" + "encoding/base64" + "testing" +) + +// helper to produce a valid PNG data-URI (1x1 transparent PNG). +func testPNGDataURI() string { + // Minimal valid 1x1 white PNG bytes (67 bytes). + pngBytes := []byte{ + 0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, // PNG signature + 0x00, 0x00, 0x00, 0x0d, 0x49, 0x48, 0x44, 0x52, // IHDR length + type + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, // 1x1 + 0x08, 0x02, 0x00, 0x00, 0x00, 0x90, 0x77, 0x53, // 8-bit RGB + 0xde, 0x00, 0x00, 0x00, 0x0c, 0x49, 0x44, 0x41, // IDAT length + type + 0x54, 0x08, 0xd7, 0x63, 0xf8, 0xcf, 0xc0, 0x00, // IDAT data + 0x00, 0x00, 0x02, 0x00, 0x01, 0xe2, 0x21, 0xbc, // IDAT data cont + 0x33, 0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4e, // IEND length + type + 0x44, 0xae, 0x42, 0x60, 0x82, // IEND data + } + return "data:image/png;base64," + base64.StdEncoding.EncodeToString(pngBytes) +} + +func testJPEGDataURI() string { + // Minimal JPEG: SOI marker + EOI marker. + jpegBytes := []byte{0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01} + return "data:image/jpeg;base64," + base64.StdEncoding.EncodeToString(jpegBytes) +} + +// ---- Factory validation tests ---- + +func TestBase64DecodeStep_FactoryRequiresInputFrom(t *testing.T) { + factory := NewBase64DecodeStepFactory() + _, err := factory("test-step", map[string]any{}, nil) + if err == nil { + t.Fatal("expected error when input_from is missing") + } +} + +func TestBase64DecodeStep_FactoryRejectsUnknownFormat(t *testing.T) { + factory := NewBase64DecodeStepFactory() + _, err := factory("test-step", map[string]any{ + "input_from": "data", + "format": "hex", + }, nil) + if err == nil { + t.Fatal("expected error for unknown format") + } +} + +func TestBase64DecodeStep_FactoryAcceptsValidFormats(t *testing.T) { + factory := NewBase64DecodeStepFactory() + for _, fmt := range []string{"data_uri", "raw_base64"} { + _, err := factory("test-step", map[string]any{ + "input_from": "data", + "format": fmt, + }, nil) + if err != nil { + t.Errorf("format %q should be accepted, got error: %v", fmt, err) + } + } +} + +// ---- data_uri format tests ---- + +func TestBase64DecodeStep_DataURI_PNG(t *testing.T) { + factory := NewBase64DecodeStepFactory() + step, err := factory("decode-png", map[string]any{ + "input_from": "image_data", + "format": "data_uri", + }, nil) + if err != nil { + t.Fatalf("factory error: %v", err) + } + + pc := NewPipelineContext(map[string]any{"image_data": testPNGDataURI()}, nil) + result, err := step.Execute(context.Background(), pc) + if err != nil { + t.Fatalf("execute error: %v", err) + } + + if result.Output["valid"] != true { + t.Errorf("expected valid=true, got %v (reason: %v)", result.Output["valid"], result.Output["reason"]) + } + if result.Output["content_type"] != "image/png" { + t.Errorf("expected content_type='image/png', got %v", result.Output["content_type"]) + } + if result.Output["extension"] != ".png" { + t.Errorf("expected extension='.png', got %v", result.Output["extension"]) + } + if result.Output["data"] == nil || result.Output["data"] == "" { + t.Error("expected non-empty data field") + } + if sz, ok := result.Output["size_bytes"].(int); !ok || sz == 0 { + t.Errorf("expected positive size_bytes, got %v", result.Output["size_bytes"]) + } +} + +func TestBase64DecodeStep_DataURI_JPEG(t *testing.T) { + factory := NewBase64DecodeStepFactory() + step, err := factory("decode-jpeg", map[string]any{ + "input_from": "image_data", + "format": "data_uri", + "allowed_types": []any{"image/jpeg"}, + }, nil) + if err != nil { + t.Fatalf("factory error: %v", err) + } + + pc := NewPipelineContext(map[string]any{"image_data": testJPEGDataURI()}, nil) + result, err := step.Execute(context.Background(), pc) + if err != nil { + t.Fatalf("execute error: %v", err) + } + + if result.Output["valid"] != true { + t.Errorf("expected valid=true for JPEG, got %v (reason: %v)", result.Output["valid"], result.Output["reason"]) + } + if result.Output["extension"] != ".jpg" { + t.Errorf("expected extension='.jpg', got %v", result.Output["extension"]) + } +} + +// ---- raw_base64 format tests ---- + +func TestBase64DecodeStep_RawBase64(t *testing.T) { + factory := NewBase64DecodeStepFactory() + step, err := factory("decode-raw", map[string]any{ + "input_from": "payload", + "format": "raw_base64", + }, nil) + if err != nil { + t.Fatalf("factory error: %v", err) + } + + original := "hello, world" + encoded := base64.StdEncoding.EncodeToString([]byte(original)) + pc := NewPipelineContext(map[string]any{"payload": encoded}, nil) + result, err := step.Execute(context.Background(), pc) + if err != nil { + t.Fatalf("execute error: %v", err) + } + + if result.Output["valid"] != true { + t.Errorf("expected valid=true, got %v", result.Output["valid"]) + } + // Verify that re-encoding the output produces the same base64 + if result.Output["data"] != encoded { + t.Errorf("expected data=%q, got %v", encoded, result.Output["data"]) + } +} + +// ---- allowed_types tests ---- + +func TestBase64DecodeStep_AllowedTypes_Accepts(t *testing.T) { + factory := NewBase64DecodeStepFactory() + step, err := factory("decode-img", map[string]any{ + "input_from": "image_data", + "format": "data_uri", + "allowed_types": []any{"image/png", "image/jpeg"}, + }, nil) + if err != nil { + t.Fatalf("factory error: %v", err) + } + + pc := NewPipelineContext(map[string]any{"image_data": testPNGDataURI()}, nil) + result, err := step.Execute(context.Background(), pc) + if err != nil { + t.Fatalf("execute error: %v", err) + } + if result.Output["valid"] != true { + t.Errorf("expected valid=true for allowed PNG, got %v (reason: %v)", result.Output["valid"], result.Output["reason"]) + } +} + +func TestBase64DecodeStep_AllowedTypes_Rejects(t *testing.T) { + factory := NewBase64DecodeStepFactory() + step, err := factory("decode-img", map[string]any{ + "input_from": "image_data", + "format": "data_uri", + "allowed_types": []any{"image/jpeg"}, + }, nil) + if err != nil { + t.Fatalf("factory error: %v", err) + } + + pc := NewPipelineContext(map[string]any{"image_data": testPNGDataURI()}, nil) + result, err := step.Execute(context.Background(), pc) + if err != nil { + t.Fatalf("execute error: %v", err) + } + if result.Output["valid"] != false { + t.Errorf("expected valid=false when PNG not in allowed_types (jpeg only), got %v", result.Output["valid"]) + } +} + +// ---- max_size_bytes test ---- + +func TestBase64DecodeStep_MaxSizeBytes_Exceeded(t *testing.T) { + factory := NewBase64DecodeStepFactory() + step, err := factory("decode-size", map[string]any{ + "input_from": "payload", + "format": "raw_base64", + "max_size_bytes": 5, + }, nil) + if err != nil { + t.Fatalf("factory error: %v", err) + } + + encoded := base64.StdEncoding.EncodeToString([]byte("hello, world")) // 12 bytes + pc := NewPipelineContext(map[string]any{"payload": encoded}, nil) + result, err := step.Execute(context.Background(), pc) + if err != nil { + t.Fatalf("execute error: %v", err) + } + if result.Output["valid"] != false { + t.Errorf("expected valid=false when size exceeds max, got %v", result.Output["valid"]) + } +} + +func TestBase64DecodeStep_MaxSizeBytes_WithinLimit(t *testing.T) { + factory := NewBase64DecodeStepFactory() + step, err := factory("decode-size", map[string]any{ + "input_from": "payload", + "format": "raw_base64", + "max_size_bytes": 100, + }, nil) + if err != nil { + t.Fatalf("factory error: %v", err) + } + + encoded := base64.StdEncoding.EncodeToString([]byte("hello")) + pc := NewPipelineContext(map[string]any{"payload": encoded}, nil) + result, err := step.Execute(context.Background(), pc) + if err != nil { + t.Fatalf("execute error: %v", err) + } + if result.Output["valid"] != true { + t.Errorf("expected valid=true within size limit, got %v", result.Output["valid"]) + } +} + +// ---- invalid input tests ---- + +func TestBase64DecodeStep_InvalidBase64(t *testing.T) { + factory := NewBase64DecodeStepFactory() + step, err := factory("decode-bad", map[string]any{ + "input_from": "payload", + "format": "raw_base64", + }, nil) + if err != nil { + t.Fatalf("factory error: %v", err) + } + + pc := NewPipelineContext(map[string]any{"payload": "not!!valid@base64$$"}, nil) + result, err := step.Execute(context.Background(), pc) + if err != nil { + t.Fatalf("execute error: %v", err) + } + if result.Output["valid"] != false { + t.Errorf("expected valid=false for invalid base64, got %v", result.Output["valid"]) + } +} + +func TestBase64DecodeStep_InvalidDataURI_MissingComma(t *testing.T) { + factory := NewBase64DecodeStepFactory() + step, err := factory("decode-bad-uri", map[string]any{ + "input_from": "payload", + "format": "data_uri", + }, nil) + if err != nil { + t.Fatalf("factory error: %v", err) + } + + pc := NewPipelineContext(map[string]any{"payload": "data:image/png;base64:AAAA"}, nil) + result, err := step.Execute(context.Background(), pc) + if err != nil { + t.Fatalf("execute error: %v", err) + } + if result.Output["valid"] != false { + t.Errorf("expected valid=false for malformed data-URI, got %v", result.Output["valid"]) + } +} + +func TestBase64DecodeStep_InvalidDataURI_NoBase64Tag(t *testing.T) { + factory := NewBase64DecodeStepFactory() + step, err := factory("decode-bad-uri", map[string]any{ + "input_from": "payload", + "format": "data_uri", + }, nil) + if err != nil { + t.Fatalf("factory error: %v", err) + } + + // Missing ";base64" in data URI + pc := NewPipelineContext(map[string]any{"payload": "data:image/png,AAAA"}, nil) + result, err := step.Execute(context.Background(), pc) + if err != nil { + t.Fatalf("execute error: %v", err) + } + if result.Output["valid"] != false { + t.Errorf("expected valid=false for data-URI without ;base64, got %v", result.Output["valid"]) + } +} + +// ---- input_from path resolution tests ---- + +func TestBase64DecodeStep_InputFromStepOutput(t *testing.T) { + factory := NewBase64DecodeStepFactory() + step, err := factory("decode-from-step", map[string]any{ + "input_from": "steps.upload.encoded", + "format": "raw_base64", + }, nil) + if err != nil { + t.Fatalf("factory error: %v", err) + } + + encoded := base64.StdEncoding.EncodeToString([]byte("data from step")) + pc := NewPipelineContext(nil, nil) + pc.MergeStepOutput("upload", map[string]any{"encoded": encoded}) + + result, err := step.Execute(context.Background(), pc) + if err != nil { + t.Fatalf("execute error: %v", err) + } + if result.Output["valid"] != true { + t.Errorf("expected valid=true, got %v", result.Output["valid"]) + } +} + +func TestBase64DecodeStep_InputFrom_MissingPath(t *testing.T) { + factory := NewBase64DecodeStepFactory() + step, err := factory("decode-missing", map[string]any{ + "input_from": "steps.missing.encoded", + "format": "raw_base64", + }, nil) + if err != nil { + t.Fatalf("factory error: %v", err) + } + + pc := NewPipelineContext(nil, nil) + result, err := step.Execute(context.Background(), pc) + if err != nil { + t.Fatalf("unexpected error (should return valid=false, not error): %v", err) + } + if result.Output["valid"] != false { + t.Errorf("expected valid=false for missing input_from path, got %v", result.Output["valid"]) + } + if result.Output["reason"] == nil || result.Output["reason"] == "" { + t.Error("expected non-empty reason when input_from path does not exist") + } +} + +func TestBase64DecodeStep_NonStringInput(t *testing.T) { + factory := NewBase64DecodeStepFactory() + step, err := factory("decode-non-string", map[string]any{ + "input_from": "payload", + "format": "raw_base64", + }, nil) + if err != nil { + t.Fatalf("factory error: %v", err) + } + + // Integer value at the input path — not a string + pc := NewPipelineContext(map[string]any{"payload": 12345}, nil) + result, err := step.Execute(context.Background(), pc) + if err != nil { + t.Fatalf("unexpected error (should return valid=false, not error): %v", err) + } + if result.Output["valid"] != false { + t.Errorf("expected valid=false for non-string input, got %v", result.Output["valid"]) + } +} + +func TestBase64DecodeStep_InvalidResult_HasAllOutputKeys(t *testing.T) { + factory := NewBase64DecodeStepFactory() + step, err := factory("decode-invalid-keys", map[string]any{ + "input_from": "steps.missing.value", + "format": "raw_base64", + }, nil) + if err != nil { + t.Fatalf("factory error: %v", err) + } + + pc := NewPipelineContext(nil, nil) + result, err := step.Execute(context.Background(), pc) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result.Output["valid"] != false { + t.Errorf("expected valid=false, got %v", result.Output["valid"]) + } + + // All output keys must be present even on failure to allow safe template access + for _, key := range []string{"content_type", "extension", "size_bytes", "data", "valid", "reason"} { + if _, exists := result.Output[key]; !exists { + t.Errorf("expected output key %q to be present in invalid result", key) + } + } +} + +// ---- name test ---- + +func TestBase64DecodeStep_Name(t *testing.T) { + factory := NewBase64DecodeStepFactory() + step, err := factory("my-decode-step", map[string]any{ + "input_from": "data", + }, nil) + if err != nil { + t.Fatalf("factory error: %v", err) + } + if step.Name() != "my-decode-step" { + t.Errorf("expected name 'my-decode-step', got %q", step.Name()) + } +} + +// ---- helper function tests ---- + +func TestParseDataURI_Valid(t *testing.T) { + mimeType, data, err := parseDataURI("data:image/jpeg;base64,/9j/abc123") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if mimeType != "image/jpeg" { + t.Errorf("expected mime 'image/jpeg', got %q", mimeType) + } + if data != "/9j/abc123" { + t.Errorf("expected data '/9j/abc123', got %q", data) + } +} + +func TestParseDataURI_MissingPrefix(t *testing.T) { + _, _, err := parseDataURI("image/png;base64,abc") + if err == nil { + t.Error("expected error for missing data: prefix") + } +} + +func TestMimeAllowed(t *testing.T) { + if !mimeAllowed("image/png", []string{"image/png", "image/jpeg"}) { + t.Error("expected image/png to be allowed") + } + if mimeAllowed("image/gif", []string{"image/png", "image/jpeg"}) { + t.Error("expected image/gif to not be allowed") + } +} + +func TestExtensionForMIME(t *testing.T) { + tests := []struct { + mime string + ext string + }{ + {"image/png", ".png"}, + {"image/jpeg", ".jpg"}, + {"application/pdf", ".pdf"}, + {"application/octet-stream", ".bin"}, + } + for _, tt := range tests { + got := extensionForMIME(tt.mime) + if got != tt.ext { + t.Errorf("extensionForMIME(%q) = %q, want %q", tt.mime, got, tt.ext) + } + } +} + +func TestBase64DecodeStep_DefaultFormat(t *testing.T) { + factory := NewBase64DecodeStepFactory() + // No format specified — should default to data_uri + step, err := factory("decode-default", map[string]any{ + "input_from": "data", + }, nil) + if err != nil { + t.Fatalf("factory error with default format: %v", err) + } + if step == nil { + t.Fatal("expected non-nil step") + } +} diff --git a/plugins/pipelinesteps/plugin.go b/plugins/pipelinesteps/plugin.go index e3e06375..52a46b69 100644 --- a/plugins/pipelinesteps/plugin.go +++ b/plugins/pipelinesteps/plugin.go @@ -2,7 +2,7 @@ // types: validate, transform, conditional, set, log, delegate, jq, publish, // http_call, request_parse, db_query, db_exec, json_response, raw_response, // validate_path_param, validate_pagination, validate_request_body, -// foreach, webhook_verify, ui_scaffold, ui_scaffold_analyze, +// foreach, webhook_verify, base64_decode, ui_scaffold, ui_scaffold_analyze, // dlq_send, dlq_replay, retry_with_backoff, circuit_breaker (wrapping), // s3_upload, auth_validate. // It also provides the PipelineWorkflowHandler for composable pipelines. @@ -43,13 +43,13 @@ func New() *Plugin { BaseNativePlugin: plugin.BaseNativePlugin{ PluginName: "pipeline-steps", PluginVersion: "1.0.0", - PluginDescription: "Generic pipeline step types (validate, transform, conditional, set, log, delegate, jq, validate_path_param, validate_pagination, validate_request_body, foreach, webhook_verify, etc.)", + PluginDescription: "Generic pipeline step types (validate, transform, conditional, set, log, delegate, jq, base64_decode, validate_path_param, validate_pagination, validate_request_body, foreach, webhook_verify, etc.)", }, Manifest: plugin.PluginManifest{ Name: "pipeline-steps", Version: "1.0.0", Author: "GoCodeAlone", - Description: "Generic pipeline step types, pre-processing validators, and pipeline workflow handler", + Description: "Generic pipeline step types, pre-processing validators, and pipeline workflow handler (including base64_decode)", Tier: plugin.TierCore, StepTypes: []string{ "step.validate", @@ -73,6 +73,7 @@ func New() *Plugin { "step.validate_request_body", "step.foreach", "step.webhook_verify", + "step.base64_decode", "step.cache_get", "step.cache_set", "step.cache_delete", @@ -130,7 +131,8 @@ func (p *Plugin) StepFactories() map[string]plugin.StepFactory { "step.foreach": wrapStepFactory(module.NewForEachStepFactory(func() *module.StepRegistry { return p.concreteStepRegistry })), - "step.webhook_verify": wrapStepFactory(module.NewWebhookVerifyStepFactory()), + "step.webhook_verify": wrapStepFactory(module.NewWebhookVerifyStepFactory()), + "step.base64_decode": wrapStepFactory(module.NewBase64DecodeStepFactory()), "step.cache_get": wrapStepFactory(module.NewCacheGetStepFactory()), "step.cache_set": wrapStepFactory(module.NewCacheSetStepFactory()), "step.cache_delete": wrapStepFactory(module.NewCacheDeleteStepFactory()), diff --git a/plugins/pipelinesteps/plugin_test.go b/plugins/pipelinesteps/plugin_test.go index 05f9a6e2..a0e98c11 100644 --- a/plugins/pipelinesteps/plugin_test.go +++ b/plugins/pipelinesteps/plugin_test.go @@ -62,6 +62,7 @@ func TestStepFactories(t *testing.T) { "step.resilient_circuit_breaker", "step.s3_upload", "step.auth_validate", + "step.base64_decode", } for _, stepType := range expectedSteps { diff --git a/schema/module_schema.go b/schema/module_schema.go index 0108f352..92bb9889 100644 --- a/schema/module_schema.go +++ b/schema/module_schema.go @@ -1011,7 +1011,23 @@ func (r *ModuleSchemaRegistry) registerBuiltins() { }) r.Register(&ModuleSchema{ - Type: "step.s3_upload", + Type: "step.base64_decode", + Label: "Base64 Decode", + Category: "pipeline", + Description: "Decodes base64-encoded content (raw or data-URI), validates MIME type and size, and returns structured metadata", + Inputs: []ServiceIODef{{Name: "context", Type: "PipelineContext", Description: "Pipeline context containing the encoded data at the path specified by input_from"}}, + Outputs: []ServiceIODef{{Name: "result", Type: "StepResult", Description: "Decoded content metadata: content_type, extension, size_bytes, data (base64), valid, reason (on failure)"}}, + ConfigFields: []ConfigFieldDef{ + {Key: "input_from", Label: "Input From", Type: FieldTypeString, Required: true, Description: "Dotted path to the encoded data in the pipeline context (e.g., steps.upload.file_data)", Placeholder: "steps.upload.file_data"}, + {Key: "format", Label: "Format", Type: FieldTypeSelect, Options: []string{"data_uri", "raw_base64"}, DefaultValue: "data_uri", Description: "Encoding format: 'data_uri' expects a data:mime/type;base64,... string; 'raw_base64' expects plain base64"}, + {Key: "allowed_types", Label: "Allowed MIME Types", Type: FieldTypeArray, ArrayItemType: "string", Description: "Whitelist of allowed MIME types (e.g., [\"image/jpeg\", \"image/png\"]). Omit to allow all types."}, + {Key: "max_size_bytes", Label: "Max Size (bytes)", Type: FieldTypeNumber, Description: "Maximum allowed decoded size in bytes. 0 means unlimited."}, + {Key: "validate_magic_bytes", Label: "Validate Magic Bytes", Type: FieldTypeBool, DefaultValue: "false", Description: "When true, verifies the decoded content matches the MIME type claimed in the data-URI header"}, + }, + }) + + r.Register(&ModuleSchema{ + Type: "step.s3_upload", Label: "S3 Upload", Category: "pipeline", Description: "Uploads base64-encoded binary data from the pipeline context to AWS S3 or S3-compatible storage (MinIO, LocalStack). Returns the public URL, resolved object key, and bucket name.", diff --git a/schema/schema.go b/schema/schema.go index 90ad0148..2874a4f5 100644 --- a/schema/schema.go +++ b/schema/schema.go @@ -107,6 +107,7 @@ var coreModuleTypes = []string{ "step.ai_extract", "step.artifact_pull", "step.artifact_push", + "step.base64_decode", "step.build_ui", "step.circuit_breaker", "step.conditional",