Icinga · yhabteab · Oct 23, 2023 · Oct 26, 2023 · May 2, 2023 · May 2, 2023
diff --git a/.gitignore b/.gitignore
@@ -1 +1,4 @@
 /build/
+
+# Exclude the autogenerated parser.output file
+parser.output
diff --git a/internal/filter/filter_test.go b/internal/filter/filter_test.go
@@ -0,0 +1,140 @@
+package filter
+
+import (
+	"errors"
+	"github.com/stretchr/testify/assert"
+	"regexp"
+	"strings"
+	"testing"
+)
+
+const unknown string = "unknown"
+
+var evalError = errors.New("evaluation error")
+
+func TestFilter(t *testing.T) {
+	t.Parallel()
+
+	filterable := &filterableType{
+		key:   "domain",
+		value: "example.com",
+	}
+
+	t.Run("InvalidOperator", func(t *testing.T) {
+		chain, err := NewChain("unknown", nil)
+		assert.Nil(t, chain)
+		assert.EqualError(t, err, "invalid logical operator provided: \"unknown\"")
+
+		condition, err := NewCondition("column", "unknown", "value")
+		assert.Nil(t, condition)
+		assert.EqualError(t, err, "invalid comparison operator provided: \"unknown\"")
+	})
+
+	t.Run("EvaluationError", func(t *testing.T) {
+		t.Parallel()
+
+		testInvalidData := []struct {
+			Expression string
+		}{
+			{"domain=" + unknown},
+			{"domain!=" + unknown},
+			{"domain<" + unknown},
+			{"domain<=" + unknown},
+			{"domain>" + unknown},
+			{"domain>=" + unknown},
+			{"domain~" + unknown},
+			{"domain!~" + unknown},
+			{"!(domain!=" + unknown + ")"},
+			{"domain=" + unknown + "&domain<=test.example.com"},
+			{"domain<=" + unknown + "|domain<=test.example.com"},
+		}
+
+		for _, td := range testInvalidData {
+			f, err := Parse(td.Expression)
+			assert.NoError(t, err)
+
+			matched, err := f.Eval(filterable)
+			assert.EqualError(t, err, evalError.Error())
+			assert.Equal(t, matched, false, "unexpected filter result for %q", td.Expression)
+		}
+	})
+
+	t.Run("EvaluateFilter", func(t *testing.T) {
+		t.Parallel()
+
+		testdata := []struct {
+			Expression string
+			Expected   bool
+		}{
+			{"domain=example.com", true},
+			{"domain!=example.com", false},
+			{"domain=test.example.com", false},
+			{"name!=example.com", false},
+			{"domain", true},
+			{"name", false},
+			{"display_name", false},
+			{"!name", true},
+			{"domain~example*", true},
+			{"domain!~example*", false},
+			{"domain~example*&!domain", false},
+			{"domain>a", true},
+			{"domain<a", false},
+			{"domain>z", false},
+			{"domain<z", true},
+			{"domain>=example&domain<=test.example.com", true},
+			{"domain<=example|domain<=test.example.com", true},
+			{"domain<=example|domain>=test.example.com", false},
+		}
+
+		for _, td := range testdata {
+			f, err := Parse(td.Expression)
+			if assert.NoError(t, err, "parsing %q should not return an error", td.Expression) {
+				matched, err := f.Eval(filterable)
+				assert.NoError(t, err)
+				assert.Equal(t, td.Expected, matched, "unexpected filter result for %q", td.Expression)
+			}
+		}
+	})
+}
+
+type filterableType struct {
+	key   string
+	value string
+}
+
+func (f *filterableType) EvalEqual(_ string, value string) (bool, error) {
+	if value == unknown {
+		return false, evalError
+	}
+
+	return strings.EqualFold(f.value, value), nil
+}
+
+func (f *filterableType) EvalLess(_ string, value string) (bool, error) {
+	if value == unknown {
+		return false, evalError
+	}
+
+	return f.value < value, nil
+}
+
+func (f *filterableType) EvalLike(_ string, value string) (bool, error) {
+	if value == unknown {
+		return false, evalError
+	}
+
+	regex := regexp.MustCompile("^example.*$")
+	return regex.MatchString(f.value), nil
+}
+
+func (f *filterableType) EvalLessOrEqual(_ string, value string) (bool, error) {
+	if value == unknown {
+		return false, evalError
+	}
+
+	return f.value <= value, nil
+}
+
+func (f *filterableType) EvalExists(key string) bool {
+	return f.key == key
+}
diff --git a/internal/filter/lexer.go b/internal/filter/lexer.go
@@ -0,0 +1,177 @@
+//go:generate goyacc -l -v parser.output -o parser.go parser.y
+
+package filter
+
+import (
+	"errors"
+	"fmt"
+	"regexp"
+	"strings"
+	"text/scanner"
+)
+
+// identifiersMatcher contains a compiled regexp and is used by the Lexer to match filter identifiers.
+// Currently, it allows to match any character except a LogicalOp and CompOperator.
+var identifiersMatcher = regexp.MustCompile("[^!&|~<>=()]")
+
+// tokenDisplayNames contains a list of all the defined parser tokens and their respective
+// friendly names used to output in error messages.
+var tokenDisplayNames = map[string]string{
+	"$unk":                    `"unknown"`,
+	"T_EQUAL":                 `"="`,
+	"T_UNEQUAL":               `"!="`,
+	"T_LIKE":                  `"~"`,
+	"T_UNLIKE":                `"!~"`,
+	"T_LESS_THAN":             `"<"`,
+	"T_GREATER_THAN":          `">"`,
+	"T_LESS_THAN_OR_EQUAL":    `"<="`,
+	"T_GREATER_THAN_OR_EQUAL": `">="`,
+	"T_IDENTIFIER":            `"column or value"`,
+}
+
+// init just sets the global yyErrorVerbose variable to true.
+func init() {
+	// Enable parsers error verbose to get more context of the parsing failures
+	yyErrorVerbose = true
+}
+
+// Parse wraps the auto generated yyParse function.
+// It parses the given filter string and returns on success a Filter instance.
+func Parse(expr string) (rule Filter, err error) {
+	lex := new(Lexer)
+	lex.IsIdentRune = isIdentRune
+	lex.Init(strings.NewReader(expr))
+
+	// Set the scanner mode to recognize only identifiers. This way all unrecognized tokens will be returned
+	// just as they are, and our Lexer#Lex() method will then recognize whatever valid input is required.
+	// Note: This is in fact not necessary, as our custom function `isIdentRune` accepts any token that matches the
+	// regex pattern `identifiersMatcher`, so the scanner would never match all the scanner.GoTokens except ScanIdents.
+	lex.Mode = scanner.ScanIdents
+
+	// scanner.Init sets the error function to nil, therefore, we have to register
+	// our error function after the scanner initialization.
+	lex.Scanner.Error = lex.ScanError
+
+	defer func() {
+		// All the grammar rules panics when encountering any errors while reducing the filter rules, so try
+		// to recover from it and return an error instead. Since we're using a named return values, we can set
+		// the err value even in deferred function. See https://go.dev/blog/defer-panic-and-recover
+		if r := recover(); r != nil {
+			err = errors.New(fmt.Sprint(r))
+		}
+
+		if err != nil {
+			// The lexer may contain some incomplete filter rules constructed before the parser panics, so reset it.
+			rule = nil
+		}
+	}()
+
+	yyParse(lex)
+
+	rule, err = lex.rule, lex.err
+	return
+}
+
+// Lexer is used to tokenize the filter input into a set of literals.
+// This is just a wrapper around the Scanner type and implements the yyLexer interface used by the parser.
+type Lexer struct {
+	scanner.Scanner
+
+	rule Filter
+	err  error
+}
+
+func (l *Lexer) Lex(yyval *yySymType) int {
+	token := l.Scan()
+	lit := l.TokenText()
+	yyval.text = lit
+	if token == scanner.Ident {
+		return T_IDENTIFIER
+	}
+
+	switch lit {
+	case "&":
+		return '&'
+	case "|":
+		return '|'
+	case "~":
+		return T_LIKE
+	case "=":
+		return T_EQUAL
+	case "(":
+		return '('
+	case ")":
+		return ')'
+	case "!":
+		next := l.Peek()
+		switch next {
+		case '=', '~':
+			yyval.text = "!" + string(next)
+			// Since we manually picked the next char input, we also need to advance the internal scanner
+			// states by calling Scan. Otherwise, the same rune will be scanned multiple times.
+			l.Scan()
+
+			if next == '~' {
+				return T_UNLIKE
+			} else {
+				return T_UNEQUAL
+			}
+		default:
+			return '!'
+		}
+	case "<":
+		if next := l.Peek(); next == '=' {
+			yyval.text = "<="
+			// Since we manually picked the next char input, we also need to advance the internal scanner
+			// states by calling Scan. Otherwise, the same rune will be scanned multiple times.
+			l.Scan()
+
+			return T_LESS_THAN_OR_EQUAL
+		}
+
+		return T_LESS_THAN
+	case ">":
+		if next := l.Peek(); next == '=' {
+			yyval.text = ">="
+			// Since we manually picked the next char input, we also need to advance the internal scanner
+			// states by calling Scan. Otherwise, the same rune will be scanned multiple times.
+			l.Scan()
+
+			return T_GREATER_THAN_OR_EQUAL
+		}
+
+		return T_GREATER_THAN
+	}
+
+	// No more inputs to scan that we are interested in.
+	// Scan returns EOF as well if there's no more token to stream, but we just want to be explicit.
+	return scanner.EOF
+}
+
+// Error receives any syntax/semantic errors produced by the parser.
+//
+// The parser never returns an error when it fails to parse, but will forward the errors to our lexer with some
+// additional context instead. This function then wraps the provided err and adds line, column number and offset
+// to the error string. Error is equivalent to "yyerror" in the original yacc.
+func (l *Lexer) Error(s string) {
+	// Replace all parser token names by their corresponding friendly names.
+	for token, name := range tokenDisplayNames {
+		s = strings.ReplaceAll(s, token, name)
+	}
+
+	l.err = fmt.Errorf("%d:%d (%d): %s", l.Line, l.Column, l.Offset, s)
+
+	// Always reset the current filter rule when encountering an error.
+	l.rule = nil
+}
+
+// ScanError is used to capture all errors the Scanner encounters.
+//
+// It's a rare case that the scanner actually will fail to scan the input string, but in these cases it will just
+// output to std.Err and we won't be able to notice this. Hence, this function is registered by the filter.Parse
+// function after the Lexer initialisation.
+func (l *Lexer) ScanError(_ *scanner.Scanner, msg string) { l.Error(msg) }
+
+// isIdentRune provides custom implementation of scanner.IsIdentRune.
+// This function determines whether a given character is allowed to be part of an identifier.
+func isIdentRune(ch rune, _ int) bool { return identifiersMatcher.MatchString(string(ch)) }