From 46d302088ee15b9a2d7da001f13676544b32e65a Mon Sep 17 00:00:00 2001 From: Patrick Mann Date: Wed, 8 May 2024 16:06:20 +0200 Subject: [PATCH 1/3] New pipeline functions `remove_single_field` and `remove_multiple_fields` (#19268) * new pipeline functions remove_single_field and remove_multiple_fields * CL * improve rulebuilder descriptions * compile regex and other feedback changes * pre-compile pattern * UI string improvements (cherry picked from commit 4a1cbb4dcb8563a6686e0709040dea75ed831359) --- changelog/unreleased/issue-19098.toml | 16 ++++ .../functions/ProcessorFunctionsModule.java | 4 + .../functions/messages/RemoveField.java | 17 ++-- .../messages/RemoveMultipleFields.java | 88 +++++++++++++++++++ .../functions/messages/RemoveSingleField.java | 63 +++++++++++++ .../functions/FunctionsSnippetsTest.java | 41 ++++++++- .../functions/removeFieldsByName.txt | 15 ++++ .../functions/removeFieldsByRegex.txt | 10 +++ .../functions/removeSingleField.txt | 15 ++++ 9 files changed, 262 insertions(+), 7 deletions(-) create mode 100644 changelog/unreleased/issue-19098.toml create mode 100644 graylog2-server/src/main/java/org/graylog/plugins/pipelineprocessor/functions/messages/RemoveMultipleFields.java create mode 100644 graylog2-server/src/main/java/org/graylog/plugins/pipelineprocessor/functions/messages/RemoveSingleField.java create mode 100644 graylog2-server/src/test/resources/org/graylog/plugins/pipelineprocessor/functions/removeFieldsByName.txt create mode 100644 graylog2-server/src/test/resources/org/graylog/plugins/pipelineprocessor/functions/removeFieldsByRegex.txt create mode 100644 graylog2-server/src/test/resources/org/graylog/plugins/pipelineprocessor/functions/removeSingleField.txt diff --git a/changelog/unreleased/issue-19098.toml b/changelog/unreleased/issue-19098.toml new file mode 100644 index 000000000000..70b2dbe6ace6 --- /dev/null +++ b/changelog/unreleased/issue-19098.toml @@ -0,0 +1,16 @@ +type="a" +message="Introduce new pipeline functions `remove_single_field` and `remove_multiple_fields` to (eventually) replace `remove_field`." + +details.user=""" +GL 5.1 added regex-matching to the pipeline function `remove_field`. This breaks existing pipeline rules that call +`remove_field` with a field name containing a regex reserved character, notably `.`. Performance of existing rules +may also be degraded. +Both issues are addressed by introducing alternate, more specific functions: +`remove_single_field` removes just a single field specified by name. It is simple and fast. +`remove_multiple_fields` removes fields matching a regex pattern and/or list of names. Depending on the +complexity of the matching it is slower. +'remove_field' will be deprecated and removed in the next major version. Do not use it. +""" + +issues=["19098"] +pulls=["19268"] diff --git a/graylog2-server/src/main/java/org/graylog/plugins/pipelineprocessor/functions/ProcessorFunctionsModule.java b/graylog2-server/src/main/java/org/graylog/plugins/pipelineprocessor/functions/ProcessorFunctionsModule.java index 5510a1b93ae8..4fa2efa9fb81 100644 --- a/graylog2-server/src/main/java/org/graylog/plugins/pipelineprocessor/functions/ProcessorFunctionsModule.java +++ b/graylog2-server/src/main/java/org/graylog/plugins/pipelineprocessor/functions/ProcessorFunctionsModule.java @@ -109,6 +109,8 @@ import org.graylog.plugins.pipelineprocessor.functions.messages.NormalizeFields; import org.graylog.plugins.pipelineprocessor.functions.messages.RemoveField; import org.graylog.plugins.pipelineprocessor.functions.messages.RemoveFromStream; +import org.graylog.plugins.pipelineprocessor.functions.messages.RemoveMultipleFields; +import org.graylog.plugins.pipelineprocessor.functions.messages.RemoveSingleField; import org.graylog.plugins.pipelineprocessor.functions.messages.RenameField; import org.graylog.plugins.pipelineprocessor.functions.messages.RouteToStream; import org.graylog.plugins.pipelineprocessor.functions.messages.SetField; @@ -179,6 +181,8 @@ protected void configure() { addMessageProcessorFunction(SetFields.NAME, SetFields.class); addMessageProcessorFunction(RenameField.NAME, RenameField.class); addMessageProcessorFunction(RemoveField.NAME, RemoveField.class); + addMessageProcessorFunction(RemoveSingleField.NAME, RemoveSingleField.class); + addMessageProcessorFunction(RemoveMultipleFields.NAME, RemoveMultipleFields.class); addMessageProcessorFunction(NormalizeFields.NAME, NormalizeFields.class); addMessageProcessorFunction(DropMessage.NAME, DropMessage.class); diff --git a/graylog2-server/src/main/java/org/graylog/plugins/pipelineprocessor/functions/messages/RemoveField.java b/graylog2-server/src/main/java/org/graylog/plugins/pipelineprocessor/functions/messages/RemoveField.java index c8f2f39eabf8..50226f0b18b9 100644 --- a/graylog2-server/src/main/java/org/graylog/plugins/pipelineprocessor/functions/messages/RemoveField.java +++ b/graylog2-server/src/main/java/org/graylog/plugins/pipelineprocessor/functions/messages/RemoveField.java @@ -25,6 +25,7 @@ import org.graylog.plugins.pipelineprocessor.rulebuilder.RuleBuilderFunctionGroup; import org.graylog2.plugin.Message; +import java.util.regex.Pattern; import java.util.stream.Collectors; import static org.graylog.plugins.pipelineprocessor.ast.functions.ParameterDescriptor.type; @@ -33,25 +34,27 @@ public class RemoveField extends AbstractFunction { public static final String NAME = "remove_field"; public static final String FIELD = "field"; public static final String INVERT = "invert"; - private final ParameterDescriptor fieldParam; + private final ParameterDescriptor fieldParam; private final ParameterDescriptor messageParam; private final ParameterDescriptor invertParam; public RemoveField() { - fieldParam = ParameterDescriptor.string(FIELD).description("The field(s) to remove (name or regex)").build(); + fieldParam = ParameterDescriptor.string(FIELD, Pattern.class) + .transform(Pattern::compile) + .description("The field(s) to remove (name or regex)").build(); messageParam = type("message", Message.class).optional().description("The message to use, defaults to '$message'").build(); invertParam = ParameterDescriptor.bool(INVERT).optional().description("Invert: keep matching field(s) and remove all others").build(); } @Override public Void evaluate(FunctionArgs args, EvaluationContext context) { - final String fieldOrPattern = fieldParam.required(args, context); + final Pattern pattern = fieldParam.required(args, context); final Message message = messageParam.optional(args, context).orElse(context.currentMessage()); final Boolean invert = invertParam.optional(args, context).orElse(false); message.getFieldNames().stream() .filter(f -> { - boolean condition = f.matches(fieldOrPattern); + boolean condition = pattern.matcher(f).matches(); return invert ? !condition : condition; }) .collect(Collectors.toList()) // required to avoid ConcurrentModificationException @@ -67,9 +70,11 @@ public FunctionDescriptor descriptor() { .name(NAME) .returnType(Void.class) .params(ImmutableList.of(fieldParam, messageParam, invertParam)) - .description("Removes the named field from message, unless the field is reserved. If no specific message is provided, it removes the field from the currently processed message.") + .description("Removes the named field from message, unless the field is reserved. " + + "If no specific message is provided, it uses the currently processed message. " + + "This function is deprecated - use the more performant remove_single_field or remove_multiple_fields.") .ruleBuilderEnabled() - .ruleBuilderName("Remove field") + .ruleBuilderName("Remove field (deprecated)") .ruleBuilderTitle("Remove field '${field}'") .ruleBuilderFunctionGroup(RuleBuilderFunctionGroup.MESSAGE) .build(); diff --git a/graylog2-server/src/main/java/org/graylog/plugins/pipelineprocessor/functions/messages/RemoveMultipleFields.java b/graylog2-server/src/main/java/org/graylog/plugins/pipelineprocessor/functions/messages/RemoveMultipleFields.java new file mode 100644 index 000000000000..138ee598c33d --- /dev/null +++ b/graylog2-server/src/main/java/org/graylog/plugins/pipelineprocessor/functions/messages/RemoveMultipleFields.java @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2020 Graylog, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + */ +package org.graylog.plugins.pipelineprocessor.functions.messages; + +import com.google.common.collect.ImmutableList; +import org.graylog.plugins.pipelineprocessor.EvaluationContext; +import org.graylog.plugins.pipelineprocessor.ast.functions.AbstractFunction; +import org.graylog.plugins.pipelineprocessor.ast.functions.FunctionArgs; +import org.graylog.plugins.pipelineprocessor.ast.functions.FunctionDescriptor; +import org.graylog.plugins.pipelineprocessor.ast.functions.ParameterDescriptor; +import org.graylog.plugins.pipelineprocessor.rulebuilder.RuleBuilderFunctionGroup; +import org.graylog2.plugin.Message; + +import java.util.List; +import java.util.regex.Pattern; + +import static org.graylog.plugins.pipelineprocessor.ast.functions.ParameterDescriptor.type; + +public class RemoveMultipleFields extends AbstractFunction { + public static final String NAME = "remove_multiple_fields"; + private static final String REGEX_PATTERN = "pattern"; + private static final String LIST_OF_NAMES = "names"; + private final ParameterDescriptor regexParam; + private final ParameterDescriptor namesParam; + private final ParameterDescriptor messageParam; + + public RemoveMultipleFields() { + regexParam = ParameterDescriptor.string(REGEX_PATTERN, Pattern.class) + .optional() + .transform(Pattern::compile) + .description("A regex specifying field names to be removed").build(); + namesParam = type(LIST_OF_NAMES, List.class).optional().description("A list of field names to be removed").build(); + messageParam = type("message", Message.class).optional().description("The message to use, defaults to '$message'").build(); + } + + @Override + public Void evaluate(FunctionArgs args, EvaluationContext context) { + final Message message = messageParam.optional(args, context).orElse(context.currentMessage()); + if (regexParam.optional(args, context).isPresent()) { + removeRegex(message, regexParam.optional(args, context).get()); + } + if (namesParam.optional(args, context).isPresent()) { + removeNames(message, namesParam.optional(args, context).get()); + } + return null; + } + + private void removeRegex(Message message, Pattern pattern) { + message.getFieldNames().stream() + .filter(name -> pattern.matcher(name).matches()) + .toList() // required to avoid ConcurrentModificationException + .forEach(message::removeField); + } + + private void removeNames(Message message, List names) { + for (Object name : names) { + message.removeField(String.valueOf(name)); + } + } + + @Override + public FunctionDescriptor descriptor() { + return FunctionDescriptor.builder() + .name(NAME) + .returnType(Void.class) + .params(ImmutableList.of(regexParam, namesParam, messageParam)) + .description("Removes the specified field(s) from message, unless the field name is reserved. If no specific message is provided, it uses the currently processed message.") + .ruleBuilderEnabled() + .ruleBuilderName("Remove field - multiple") + .ruleBuilderTitle("Remove multiple fields by<#if pattern??> regex '${pattern}'<#if pattern?? && names??> or<#if names??> name list '${names}'") + .ruleBuilderFunctionGroup(RuleBuilderFunctionGroup.MESSAGE) + .build(); + } +} diff --git a/graylog2-server/src/main/java/org/graylog/plugins/pipelineprocessor/functions/messages/RemoveSingleField.java b/graylog2-server/src/main/java/org/graylog/plugins/pipelineprocessor/functions/messages/RemoveSingleField.java new file mode 100644 index 000000000000..4838ca8fff98 --- /dev/null +++ b/graylog2-server/src/main/java/org/graylog/plugins/pipelineprocessor/functions/messages/RemoveSingleField.java @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2020 Graylog, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + */ +package org.graylog.plugins.pipelineprocessor.functions.messages; + +import com.google.common.collect.ImmutableList; +import org.graylog.plugins.pipelineprocessor.EvaluationContext; +import org.graylog.plugins.pipelineprocessor.ast.functions.AbstractFunction; +import org.graylog.plugins.pipelineprocessor.ast.functions.FunctionArgs; +import org.graylog.plugins.pipelineprocessor.ast.functions.FunctionDescriptor; +import org.graylog.plugins.pipelineprocessor.ast.functions.ParameterDescriptor; +import org.graylog.plugins.pipelineprocessor.rulebuilder.RuleBuilderFunctionGroup; +import org.graylog2.plugin.Message; + +import static org.graylog.plugins.pipelineprocessor.ast.functions.ParameterDescriptor.type; + +public class RemoveSingleField extends AbstractFunction { + public static final String NAME = "remove_single_field"; + public static final String FIELD = "field"; + private final ParameterDescriptor fieldParam; + private final ParameterDescriptor messageParam; + + public RemoveSingleField() { + fieldParam = ParameterDescriptor.string(FIELD).description("The field to remove").build(); + messageParam = type("message", Message.class).optional().description("The message to use, defaults to '$message'").build(); + } + + @Override + public Void evaluate(FunctionArgs args, EvaluationContext context) { + final String field = fieldParam.required(args, context); + final Message message = messageParam.optional(args, context).orElse(context.currentMessage()); + + message.removeField(field); + return null; + } + + @Override + public FunctionDescriptor descriptor() { + return FunctionDescriptor.builder() + .name(NAME) + .returnType(Void.class) + .params(ImmutableList.of(fieldParam, messageParam)) + .description("Removes a field from a message") + .ruleBuilderEnabled() + .ruleBuilderName("Remove field - single") + .ruleBuilderTitle("Remove the single field '${field}'") + .ruleBuilderFunctionGroup(RuleBuilderFunctionGroup.MESSAGE) + .build(); + } +} diff --git a/graylog2-server/src/test/java/org/graylog/plugins/pipelineprocessor/functions/FunctionsSnippetsTest.java b/graylog2-server/src/test/java/org/graylog/plugins/pipelineprocessor/functions/FunctionsSnippetsTest.java index 843313f59922..4586090ee8f9 100644 --- a/graylog2-server/src/test/java/org/graylog/plugins/pipelineprocessor/functions/FunctionsSnippetsTest.java +++ b/graylog2-server/src/test/java/org/graylog/plugins/pipelineprocessor/functions/FunctionsSnippetsTest.java @@ -112,6 +112,8 @@ import org.graylog.plugins.pipelineprocessor.functions.messages.NormalizeFields; import org.graylog.plugins.pipelineprocessor.functions.messages.RemoveField; import org.graylog.plugins.pipelineprocessor.functions.messages.RemoveFromStream; +import org.graylog.plugins.pipelineprocessor.functions.messages.RemoveMultipleFields; +import org.graylog.plugins.pipelineprocessor.functions.messages.RemoveSingleField; import org.graylog.plugins.pipelineprocessor.functions.messages.RenameField; import org.graylog.plugins.pipelineprocessor.functions.messages.RouteToStream; import org.graylog.plugins.pipelineprocessor.functions.messages.SetField; @@ -219,7 +221,6 @@ public class FunctionsSnippetsTest extends BaseParserTest { private static LookupTableService lookupTableService; private static LookupTableService.Function lookupServiceFunction; private static LookupTable lookupTable; - private static Map aMap; private static Logger loggerMock; @@ -240,6 +241,8 @@ public static void registerFunctions() { functions.put(SetFields.NAME, new SetFields()); functions.put(RenameField.NAME, new RenameField()); functions.put(RemoveField.NAME, new RemoveField()); + functions.put(RemoveSingleField.NAME, new RemoveSingleField()); + functions.put(RemoveMultipleFields.NAME, new RemoveMultipleFields()); functions.put(NormalizeFields.NAME, new NormalizeFields()); functions.put(DropMessage.NAME, new DropMessage()); @@ -1563,6 +1566,42 @@ public void removeField() { assertThat(message.getField("f3")).isEqualTo("f3"); } + @Test + public void removeSingleField() { + final Rule rule = parser.parseRule(ruleForTest(), true); + final Message message = messageFactory.createMessage("test", "test", Tools.nowUTC()); + evaluateRule(rule, message); + + assertThat(message.getField("a.1")).isNull(); + assertThat(message.getField("f1")).isNull(); + assertThat(message.getField("a_1")).isEqualTo("a_1"); + assertThat(message.getField("f2")).isEqualTo("f2"); + } + + @Test + public void removeFieldsByName() { + final Rule rule = parser.parseRule(ruleForTest(), true); + final Message message = messageFactory.createMessage("test", "test", Tools.nowUTC()); + evaluateRule(rule, message); + + assertThat(message.getField("a.1")).isNull(); + assertThat(message.getField("f1")).isNull(); + assertThat(message.getField("a_1")).isEqualTo("a_1"); + assertThat(message.getField("f2")).isEqualTo("f2"); + } + + @Test + public void removeFieldsByRegex() { + final Rule rule = parser.parseRule(ruleForTest(), true); + final Message message = messageFactory.createMessage("test", "test", Tools.nowUTC()); + evaluateRule(rule, message); + + assertThat(message.getField("a.1")).isNull(); + assertThat(message.getField("a_1")).isNull(); + assertThat(message.getField("f2")).isNull(); + assertThat(message.getField("f1")).isEqualTo("f1"); + } + @Test public void setField() { final Rule rule = parser.parseRule(ruleForTest(), true); diff --git a/graylog2-server/src/test/resources/org/graylog/plugins/pipelineprocessor/functions/removeFieldsByName.txt b/graylog2-server/src/test/resources/org/graylog/plugins/pipelineprocessor/functions/removeFieldsByName.txt new file mode 100644 index 000000000000..538d3e29813d --- /dev/null +++ b/graylog2-server/src/test/resources/org/graylog/plugins/pipelineprocessor/functions/removeFieldsByName.txt @@ -0,0 +1,15 @@ +rule "remove_fields_by_name" +when true +then + set_field(field: "a.1", value: "a.1"); + set_field(field: "a_1", value: "a_1"); + set_field(field: "f1", value: "f1"); + set_field(field: "f2", value: "f2"); + + remove_multiple_fields(names:["a.1", "f1"]); + + // invalid - should be NOOP + remove_multiple_fields(names:["dummy"]); + remove_multiple_fields(names:[]); + remove_multiple_fields(); +end diff --git a/graylog2-server/src/test/resources/org/graylog/plugins/pipelineprocessor/functions/removeFieldsByRegex.txt b/graylog2-server/src/test/resources/org/graylog/plugins/pipelineprocessor/functions/removeFieldsByRegex.txt new file mode 100644 index 000000000000..5ec49d8ddea3 --- /dev/null +++ b/graylog2-server/src/test/resources/org/graylog/plugins/pipelineprocessor/functions/removeFieldsByRegex.txt @@ -0,0 +1,10 @@ +rule "remove_fields_by_regex_and_name)" +when true +then + set_field(field: "a.1", value: "a.1"); + set_field(field: "a_1", value: "a_1"); + set_field(field: "f1", value: "f1"); + set_field(field: "f2", value: "f2"); + + remove_multiple_fields(pattern:"a.1", names:["f2"]); +end diff --git a/graylog2-server/src/test/resources/org/graylog/plugins/pipelineprocessor/functions/removeSingleField.txt b/graylog2-server/src/test/resources/org/graylog/plugins/pipelineprocessor/functions/removeSingleField.txt new file mode 100644 index 000000000000..c84edbb92e14 --- /dev/null +++ b/graylog2-server/src/test/resources/org/graylog/plugins/pipelineprocessor/functions/removeSingleField.txt @@ -0,0 +1,15 @@ +rule "remove_single_field" +when true +then + set_field(field: "a.1", value: "a.1"); + set_field(field: "a_1", value: "a_1"); + set_field(field: "f1", value: "f1"); + set_field(field: "f2", value: "f2"); + + remove_single_field(field:"a.1"); + remove_single_field(field:"f1"); + + // invalid - should be NO-OP + remove_single_field(field:"f."); + remove_single_field(field:"dummy"); +end From e9a6aabde182c709b5a52cfc6fe7a0344b9646f7 Mon Sep 17 00:00:00 2001 From: patrickmann Date: Wed, 8 May 2024 17:14:27 +0200 Subject: [PATCH 2/3] adjust unit test for 5.2 --- .../pipelineprocessor/functions/FunctionsSnippetsTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/graylog2-server/src/test/java/org/graylog/plugins/pipelineprocessor/functions/FunctionsSnippetsTest.java b/graylog2-server/src/test/java/org/graylog/plugins/pipelineprocessor/functions/FunctionsSnippetsTest.java index 4586090ee8f9..26dfc2705752 100644 --- a/graylog2-server/src/test/java/org/graylog/plugins/pipelineprocessor/functions/FunctionsSnippetsTest.java +++ b/graylog2-server/src/test/java/org/graylog/plugins/pipelineprocessor/functions/FunctionsSnippetsTest.java @@ -1569,7 +1569,7 @@ public void removeField() { @Test public void removeSingleField() { final Rule rule = parser.parseRule(ruleForTest(), true); - final Message message = messageFactory.createMessage("test", "test", Tools.nowUTC()); + final Message message = new Message("test", "test", Tools.nowUTC()); evaluateRule(rule, message); assertThat(message.getField("a.1")).isNull(); @@ -1581,7 +1581,7 @@ public void removeSingleField() { @Test public void removeFieldsByName() { final Rule rule = parser.parseRule(ruleForTest(), true); - final Message message = messageFactory.createMessage("test", "test", Tools.nowUTC()); + final Message message = new Message("test", "test", Tools.nowUTC()); evaluateRule(rule, message); assertThat(message.getField("a.1")).isNull(); @@ -1593,7 +1593,7 @@ public void removeFieldsByName() { @Test public void removeFieldsByRegex() { final Rule rule = parser.parseRule(ruleForTest(), true); - final Message message = messageFactory.createMessage("test", "test", Tools.nowUTC()); + final Message message = new Message("test", "test", Tools.nowUTC()); evaluateRule(rule, message); assertThat(message.getField("a.1")).isNull(); From 0a9971be68026ad0930a631c021b7f8657bc94a9 Mon Sep 17 00:00:00 2001 From: Patrick Mann Date: Wed, 5 Jun 2024 09:59:55 +0200 Subject: [PATCH 3/3] Update issue-19098.toml --- changelog/unreleased/issue-19098.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog/unreleased/issue-19098.toml b/changelog/unreleased/issue-19098.toml index 70b2dbe6ace6..c35dd7edc5c9 100644 --- a/changelog/unreleased/issue-19098.toml +++ b/changelog/unreleased/issue-19098.toml @@ -13,4 +13,4 @@ complexity of the matching it is slower. """ issues=["19098"] -pulls=["19268"] +pulls=["19300"]