Skip to content
Permalink
v0.1.0-alpha
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
583 lines (501 sloc) 16.6 KB
package baker
import (
"bytes"
"fmt"
"io"
"os"
"reflect"
"regexp"
"strings"
"unicode"
"github.com/rasky/toml"
)
// The configuration for the topology is parsed from TOML format.
// Each of Input, Filter, Output have a separate section (table)
// in the file containing a key called "Name" that specifies
// which component will be used in the pipeline, plus a couple
// of other generic keys.
//
// Then, there are sub-tables called [input.config], [filter.config],
// [output.config] that contains configuration specific of each
// component, and directly map to the *Config structure in the code,
// as specified in the components descriptions (see input.AllInputs(),
// filter.AllFilters(), output.AllOutputs(), upload.AllUploads()).
//
// To match this with the TOML parser, we need to use deferred parsing,
// because initially we don't know which component has been chosen and
// thus we cannot provide to the toml decoder the correct config instances.
// toml.Primitive is how deferred parsing is implemented (it's the
// equivalent of encoding/json.RawMessage). Then, in the second step,
// the DecodedConfig structure is populated correctly with the component
// configuration.
// ConfigInput specifies the configuration for the input component.
type ConfigInput struct {
Name string
ChanSize int // ChanSize represents the size of the channel to send records from the input to the filters, the default value is 1024
DecodedConfig interface{}
Config *toml.Primitive
desc *InputDesc
}
// ConfigFilterChain specifies the configuration for the whole fitler chain.
type ConfigFilterChain struct {
// Procs specifies the number of baker filters running concurrently.
// When set to a value greater than 1, filtering may be faster but
// record ordering is not guaranteed anymore.
// The default value is 16
Procs int
}
// ConfigFilter specifies the configuration for a single filter component.
type ConfigFilter struct {
Name string
DecodedConfig interface{}
Config *toml.Primitive
desc *FilterDesc
}
// ConfigOutput specifies the configuration for the output component.
type ConfigOutput struct {
Name string
// Procs defines the number of baker outputs running concurrently.
// Only set Procs to a value greater than 1 if the output is concurrent safe.
Procs int
ChanSize int // ChanSize represents the size of the channel to send records to the ouput component(s), the default value is 16384
Sharding string // Sharding is the name of the field used for sharding
Fields []string // Fields holds the name of the record fields the output receives
DecodedConfig interface{}
Config *toml.Primitive
desc *OutputDesc
}
// ConfigUpload specifies the configuration for the upload component.
type ConfigUpload struct {
Name string
DecodedConfig interface{}
Config *toml.Primitive
desc *UploadDesc
}
// A ConfigUser defines a user-specific configuration entry.
type ConfigUser struct {
Name string
Config *toml.Primitive
}
// ConfigCSV defines configuration for CSV records
type ConfigCSV struct {
// FieldSeparator defines the fields separator used in the records
FieldSeparator string `toml:"field_separator"`
}
// A ConfigGeneral specifies general configuration for the whole topology.
type ConfigGeneral struct {
// DontValidateFields reports whether records validation is skipped (by not calling Components.Validate)
DontValidateFields bool `toml:"dont_validate_fields"`
}
// ConfigMetrics holds metrics configuration.
type ConfigMetrics struct {
Name string
DecodedConfig interface{}
Config *toml.Primitive
desc *MetricsDesc
}
// ConfigFields specifies names for records fields. In addition of being a list
// of names, the position of each name in the slice also indicates the FieldIndex
// for that name. In other words, if Names[0] = "address", then a FieldIndex of
// 0 is that field, and "address" is the name of that field.
type ConfigFields struct {
Names []string
}
// ConfigValidation specifies regular expressions for field names, used to generate
// the Record validation function.
type ConfigValidation map[string]string
// A Config specifies the configuration for a topology.
type Config struct {
Input ConfigInput
FilterChain ConfigFilterChain
Filter []ConfigFilter
Output ConfigOutput
Upload ConfigUpload
General ConfigGeneral
Fields ConfigFields
Validation ConfigValidation
Metrics ConfigMetrics
CSV ConfigCSV
User []ConfigUser
shardingFuncs map[FieldIndex]ShardingFunc
validate ValidationFunc
createRecord func() Record
fieldByName func(string) (FieldIndex, bool)
fieldNames []string
}
// String returns a string representation of the exported fields of c.
func (c *Config) String() string {
s := fmt.Sprintf("Input:{Name:%s, ChanSize:%d} ", c.Input.Name, c.Input.ChanSize)
s += fmt.Sprintf("FilterChain:{Procs:%d} ", c.FilterChain.Procs)
for i, f := range c.Filter {
s += fmt.Sprintf("Filter-%d:{Name:%s} ", i, f.Name)
}
s += fmt.Sprintf("Output:{Name:%s, Procs:%d, ChanSize:%d, Sharding:%s, Fields:[%s]} ", c.Output.Name, c.Output.Procs, c.Output.ChanSize, c.Output.Sharding, strings.Join(c.Output.Fields, ","))
s += fmt.Sprintf("Upload:{Name:%s}", c.Upload.Name)
return s
}
func (c *Config) fillDefaults() error {
c.Input.fillDefaults()
c.FilterChain.fillDefaults()
c.Output.fillDefaults()
c.Upload.fillDefaults()
if err := c.fillCreateRecordDefault(); err != nil {
return err
}
return nil
}
func (c *Config) fillCreateRecordDefault() error {
if c.createRecord == nil {
fieldSeparator := DefaultLogLineFieldSeparator
if c.CSV.FieldSeparator != "" {
sep := []rune(c.CSV.FieldSeparator)
if len(sep) != 1 || sep[0] > unicode.MaxASCII {
return fmt.Errorf("Separator must be a 1-byte string or hex char")
}
fieldSeparator = byte(sep[0])
}
// For now, leave Logline as the default
c.createRecord = func() Record {
return &LogLine{
FieldSeparator: fieldSeparator,
}
}
}
return nil
}
func (c *ConfigInput) fillDefaults() {
if c.ChanSize == 0 {
c.ChanSize = 1024
}
}
func (c *ConfigFilterChain) fillDefaults() {
if c.Procs == 0 {
c.Procs = 16
}
}
func (c *ConfigOutput) fillDefaults() {
if c.ChanSize == 0 {
c.ChanSize = 16384
}
if c.Procs == 0 {
c.Procs = 32
}
}
func (c *ConfigUpload) fillDefaults() {}
// cloneConfig clones a configuration object.
func cloneConfig(i interface{}) interface{} {
return reflect.New(reflect.ValueOf(i).Elem().Type()).Interface()
}
// replaceEnvVars replaces any string in the format ${VALUE} or $VALUE with the corresponding
// $VALUE environment variable
func replaceEnvVars(f io.Reader, mapper func(string) string) (io.Reader, error) {
buf := new(bytes.Buffer)
_, err := buf.ReadFrom(f)
if err != nil {
return nil, fmt.Errorf("Error reading input: %v", err)
}
return strings.NewReader(os.Expand(buf.String(), mapper)), nil
}
func decodeAndCheckConfig(md toml.MetaData, compCfg interface{}) error {
var (
cfg *toml.Primitive // config
dcfg interface{} // decoded config
name string // component name
typ string // component type
)
switch t := compCfg.(type) {
case ConfigInput:
cfg, dcfg = t.Config, t.DecodedConfig
name, typ = t.Name, "input"
case ConfigFilter:
cfg, dcfg = t.Config, t.DecodedConfig
name, typ = t.Name, "filter"
case ConfigOutput:
cfg, dcfg = t.Config, t.DecodedConfig
name, typ = t.Name, "output"
case ConfigUpload:
cfg, dcfg = t.Config, t.DecodedConfig
name, typ = t.Name, "upload"
case ConfigMetrics:
cfg, dcfg = t.Config, t.DecodedConfig
name, typ = t.Name, "metrics"
default:
panic(fmt.Sprintf("unexpected type %#v", cfg))
}
if cfg == nil {
// No config section was given in the TOML, so we create a pointer to
// the zero value of our config struct to check required field
tcfg := reflect.TypeOf(dcfg).Elem()
dcfg = reflect.New(tcfg).Interface()
} else {
if err := md.PrimitiveDecode(*cfg, dcfg); err != nil {
return fmt.Errorf("%s %q: error parsing config: %v", typ, name, err)
}
}
if req := CheckRequiredFields(dcfg); req != "" {
return fmt.Errorf("%s %q: %w", typ, name, ErrorRequiredField{req})
}
return nil
}
// NewConfigFromToml creates a Config from a reader reading from a TOML
// configuration. comp describes all the existing components.
func NewConfigFromToml(f io.Reader, comp Components) (*Config, error) {
f, err := replaceEnvVars(f, os.Getenv)
if err != nil {
return nil, fmt.Errorf("Can't replace config with env vars: %v", err)
}
// Parse che configuration. Part of the configuration will be
// captured as toml.Primitive for deferred parsing (see comment
// at top of the file)
cfg := Config{}
md, err := toml.DecodeReader(f, &cfg)
if err != nil {
return nil, fmt.Errorf("error parsing topology: %v", err)
}
// We now go through inputs, filters and outputs, and match the names
// to the actual object descriptions provided by each respective package.
// Through the description, we also acquire an instance to the actual
// custom configuration structure, that will be filled later.
for _, inp := range comp.Inputs {
if strings.EqualFold(inp.Name, cfg.Input.Name) {
cfg.Input.desc = &inp
break
}
}
if cfg.Input.desc == nil {
return nil, fmt.Errorf("input does not exist: %q", cfg.Input.Name)
}
for idx := range cfg.Filter {
cfgfil := &cfg.Filter[idx]
for _, fil := range comp.Filters {
if strings.EqualFold(fil.Name, cfgfil.Name) {
cfgfil.desc = &fil
break
}
}
if cfgfil.desc == nil {
return nil, fmt.Errorf("filter does not exist: %q", cfgfil.Name)
}
}
for _, out := range comp.Outputs {
if strings.EqualFold(out.Name, cfg.Output.Name) {
cfg.Output.desc = &out
break
}
}
if cfg.Output.desc == nil {
return nil, fmt.Errorf("output does not exist: %q", cfg.Output.Name)
}
// Upload can be empty
for _, upl := range comp.Uploads {
if strings.EqualFold(upl.Name, cfg.Upload.Name) {
cfg.Upload.desc = &upl
break
}
}
if cfg.Metrics.Name != "" {
for _, mtr := range comp.Metrics {
if strings.EqualFold(mtr.Name, cfg.Metrics.Name) {
cfg.Metrics.desc = &mtr
break
}
}
if cfg.Metrics.desc == nil {
return nil, fmt.Errorf("metrics does not exist: %q", cfg.Metrics.Name)
}
}
// Copy custom configuration structure, to prepare for re-reading
cfg.Input.DecodedConfig = cfg.Input.desc.Config
if err := decodeAndCheckConfig(md, cfg.Input); err != nil {
return nil, err
}
for idx := range cfg.Filter {
// Clone the configuration object to allow the use of multiple instances of the same filter
cfg.Filter[idx].DecodedConfig = cloneConfig(cfg.Filter[idx].desc.Config)
if err := decodeAndCheckConfig(md, cfg.Filter[idx]); err != nil {
return nil, err
}
}
cfg.Output.DecodedConfig = cfg.Output.desc.Config
if err := decodeAndCheckConfig(md, cfg.Output); err != nil {
return nil, err
}
if cfg.Upload.Name != "" {
cfg.Upload.DecodedConfig = cfg.Upload.desc.Config
if err := decodeAndCheckConfig(md, cfg.Upload); err != nil {
return nil, err
}
}
if cfg.Metrics.Name != "" {
cfg.Metrics.DecodedConfig = cfg.Metrics.desc.Config
if err := decodeAndCheckConfig(md, cfg.Metrics); err != nil {
return nil, err
}
}
// Decode user-specific configuration entries.
for _, cfgUser := range cfg.User {
found := false
for idx := range comp.User {
if strings.EqualFold(cfgUser.Name, comp.User[idx].Name) {
found = true
if err := md.PrimitiveDecode(*cfgUser.Config, comp.User[idx].Config); err != nil {
return nil, fmt.Errorf("error parsing user config: %v", err)
}
break
}
}
if !found {
return nil, fmt.Errorf("user configuration does not exist: %q", cfgUser.Name)
}
}
// Abort if there's any unknown key in the configuration file
if keys := md.Undecoded(); len(keys) > 0 {
return nil, fmt.Errorf("invalid keys in configuration file: %v", keys)
}
if err := assignFieldMapping(&cfg, comp); err != nil {
return nil, err
}
if err := assignValidationMapping(&cfg, comp); err != nil {
return nil, err
}
// Copy pluggable functions
cfg.shardingFuncs = comp.ShardingFuncs
cfg.createRecord = comp.CreateRecord
// Fill-in with missing defaults
return &cfg, cfg.fillDefaults()
}
// hasConfig returns true if the underlying structure has at least one field.
func hasConfig(cfg interface{}) bool {
tf := reflect.TypeOf(cfg).Elem()
return tf.NumField() != 0
}
// assignFieldMapping verifies that field mapping has been set once, but only
// once (either in cfg or comp). Then if that is the case, assignFieldMapping
// sets both fieldByName and fieldName in cfg.
func assignFieldMapping(cfg *Config, comp Components) error {
cfgOk := len(cfg.Fields.Names) != 0
compOk := comp.FieldByName != nil && comp.FieldNames != nil
if (comp.FieldByName == nil) != (comp.FieldNames == nil) {
return fmt.Errorf("FieldByName and FieldName must be either both set or both unset")
}
// First, get inconsistent cases out of the way.
if !cfgOk && !compOk {
return fmt.Errorf("field indexes/names have not been set")
}
if cfgOk && compOk {
return fmt.Errorf("field indexes/names can't both be set in TOML and in Components")
}
if compOk {
// Ok, mapping has been set from Components.
cfg.fieldByName = comp.FieldByName
cfg.fieldNames = comp.FieldNames
return nil
}
// Mapping has been set from Config, create both closures and assign them.
m := make(map[string]FieldIndex, len(cfg.Fields.Names))
for f, s := range cfg.Fields.Names {
_, ok := m[s]
if ok {
return fmt.Errorf("duplicated field name %q", s)
}
m[s] = FieldIndex(f)
}
cfg.fieldNames = cfg.Fields.Names
cfg.fieldByName = func(name string) (FieldIndex, bool) {
f, ok := m[name]
return f, ok
}
return nil
}
// assignValidationMapping verifies that validation has been set once either in comp or in cfg.
// If validation has not been set assignValidationMapping generates a dummy function.
// Finally, assignValidationMapping sets validate in cfg.
// assignValidationMapping requires that the fieldByName function in cfg has been set.
func assignValidationMapping(cfg *Config, comp Components) error {
cfgOk := len(cfg.Validation) != 0
compOk := comp.Validate != nil
if cfgOk && compOk {
return fmt.Errorf("validation can't both be set in TOML and in Components")
}
if !cfgOk && !compOk {
// Ok, validation not present, set a dummy funcition.
cfg.validate = func(r Record) (bool, FieldIndex) {
return true, 0
}
return nil
}
if compOk {
// Ok, validation has been set from Components.
cfg.validate = comp.Validate
return nil
}
// Field validation has been set from Config, check errors and create the closures.
idxs := make([]FieldIndex, 0, len(cfg.Validation))
regs := make([]*regexp.Regexp, 0, len(cfg.Validation))
for k, v := range cfg.Validation {
i, ok := cfg.fieldByName(k)
if !ok {
return fmt.Errorf("validation field %q not exists", k)
}
reg, err := regexp.Compile(v)
if err != nil {
return fmt.Errorf("validation regex %q of field %q: %v", v, k, err)
}
idxs = append(idxs, i)
regs = append(regs, reg)
}
cfg.validate = func(r Record) (bool, FieldIndex) {
for i, idx := range idxs {
reg := regs[i]
if !reg.Match(r.Get(idx)) {
return false, idx
}
}
return true, 0
}
return nil
}
// RequiredFields returns the names of the underlying configuration structure
// fields which are tagged as required. To tag a field as being required, a
// "required" struct struct tag must be present and set to true.
//
// RequiredFields doesn't support struct embedding other structs.
func RequiredFields(cfg interface{}) []string {
var fields []string
tf := reflect.TypeOf(cfg).Elem()
for i := 0; i < tf.NumField(); i++ {
field := tf.Field(i)
req := field.Tag.Get("required")
if req != "true" {
continue
}
fields = append(fields, field.Name)
}
return fields
}
// CheckRequiredFields checks that all fields that are tagged as required in
// cfg's type have actually been set to a value other than the field type zero
// value. If not CheckRequiredFields returns the name of the first required
// field that is not set, or, it returns an empty string if all required fields
// are set of the struct doesn't have any required fields (or any fields at all).
//
// CheckRequiredFields doesn't support struct embedding other structs.
func CheckRequiredFields(cfg interface{}) string {
fields := RequiredFields(cfg)
for _, name := range fields {
rv := reflect.ValueOf(cfg).Elem()
fv := rv.FieldByName(name)
if fv.IsZero() {
return name
}
}
return ""
}
// ErrorRequiredField describes the absence of a required field
// in a component configuration.
type ErrorRequiredField struct {
Field string // Field is the name of the missing field
}
func (e ErrorRequiredField) Error() string {
return fmt.Sprintf("%q is a required field", e.Field)
}