Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport 5.1: New pipeline functions remove_single_field and remove_multiple_fields #19301

Merged
merged 3 commits into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
16 changes: 16 additions & 0 deletions changelog/unreleased/issue-19098.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
type="a"
message="Introduce new pipeline functions `remove_single_field` and `remove_multiple_fields` to (eventually) replace `remove_field`."

details.user="""
GL 5.1 added regex-matching to the pipeline function `remove_field`. This breaks existing pipeline rules that call
`remove_field` with a field name containing a regex reserved character, notably `.`. Performance of existing rules
may also be degraded.
Both issues are addressed by introducing alternate, more specific functions:
`remove_single_field` removes just a single field specified by name. It is simple and fast.
`remove_multiple_fields` removes fields matching a regex pattern and/or list of names. Depending on the
complexity of the matching it is slower.
'remove_field' will be deprecated and removed in the next major version. Do not use it.
"""

issues=["19098"]
pulls=["19301"]
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@
import org.graylog.plugins.pipelineprocessor.functions.messages.NormalizeFields;
import org.graylog.plugins.pipelineprocessor.functions.messages.RemoveField;
import org.graylog.plugins.pipelineprocessor.functions.messages.RemoveFromStream;
import org.graylog.plugins.pipelineprocessor.functions.messages.RemoveMultipleFields;
import org.graylog.plugins.pipelineprocessor.functions.messages.RemoveSingleField;
import org.graylog.plugins.pipelineprocessor.functions.messages.RenameField;
import org.graylog.plugins.pipelineprocessor.functions.messages.RouteToStream;
import org.graylog.plugins.pipelineprocessor.functions.messages.SetField;
Expand Down Expand Up @@ -164,6 +166,8 @@ protected void configure() {
addMessageProcessorFunction(SetFields.NAME, SetFields.class);
addMessageProcessorFunction(RenameField.NAME, RenameField.class);
addMessageProcessorFunction(RemoveField.NAME, RemoveField.class);
addMessageProcessorFunction(RemoveSingleField.NAME, RemoveSingleField.class);
addMessageProcessorFunction(RemoveMultipleFields.NAME, RemoveMultipleFields.class);
addMessageProcessorFunction(NormalizeFields.NAME, NormalizeFields.class);

addMessageProcessorFunction(DropMessage.NAME, DropMessage.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,28 +24,31 @@
import org.graylog.plugins.pipelineprocessor.ast.functions.ParameterDescriptor;
import org.graylog2.plugin.Message;

import java.util.regex.Pattern;
import java.util.stream.Collectors;

import static org.graylog.plugins.pipelineprocessor.ast.functions.ParameterDescriptor.type;

public class RemoveField extends AbstractFunction<Void> {
public static final String NAME = "remove_field";
public static final String FIELD = "field";
private final ParameterDescriptor<String, String> fieldParam;
private final ParameterDescriptor<String, Pattern> fieldParam;
private final ParameterDescriptor<Message, Message> messageParam;

public RemoveField() {
fieldParam = ParameterDescriptor.string(FIELD).description("The field(s) to remove (name or regex)").build();
fieldParam = ParameterDescriptor.string(FIELD, Pattern.class)
.transform(Pattern::compile)
.description("The field(s) to remove (name or regex)").build();
messageParam = type("message", Message.class).optional().description("The message to use, defaults to '$message'").build();
}

@Override
public Void evaluate(FunctionArgs args, EvaluationContext context) {
final String fieldOrPattern = fieldParam.required(args, context);
final Pattern pattern = fieldParam.required(args, context);
final Message message = messageParam.optional(args, context).orElse(context.currentMessage());

message.getFieldNames().stream()
.filter(f -> f.matches(fieldOrPattern))
.filter(f -> pattern.matcher(f).matches())
.collect(Collectors.toList()) // required to avoid ConcurrentModificationException
.forEach(message::removeField);

Expand All @@ -59,7 +62,9 @@ public FunctionDescriptor<Void> descriptor() {
.name(NAME)
.returnType(Void.class)
.params(ImmutableList.of(fieldParam, messageParam))
.description("Removes a field from a message")
.description("Removes the named field from message, unless the field is reserved. " +
"If no specific message is provided, it uses the currently processed message. " +
"This function is deprecated - use the more performant remove_single_field or remove_multiple_fields.")
.build();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Copyright (C) 2020 Graylog, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*/
package org.graylog.plugins.pipelineprocessor.functions.messages;

import com.google.common.collect.ImmutableList;
import org.graylog.plugins.pipelineprocessor.EvaluationContext;
import org.graylog.plugins.pipelineprocessor.ast.functions.AbstractFunction;
import org.graylog.plugins.pipelineprocessor.ast.functions.FunctionArgs;
import org.graylog.plugins.pipelineprocessor.ast.functions.FunctionDescriptor;
import org.graylog.plugins.pipelineprocessor.ast.functions.ParameterDescriptor;
import org.graylog2.plugin.Message;

import java.util.List;
import java.util.regex.Pattern;

import static org.graylog.plugins.pipelineprocessor.ast.functions.ParameterDescriptor.type;

public class RemoveMultipleFields extends AbstractFunction<Void> {
public static final String NAME = "remove_multiple_fields";
private static final String REGEX_PATTERN = "pattern";
private static final String LIST_OF_NAMES = "names";
private final ParameterDescriptor<String, Pattern> regexParam;
private final ParameterDescriptor<List, List> namesParam;
private final ParameterDescriptor<Message, Message> messageParam;

public RemoveMultipleFields() {
regexParam = ParameterDescriptor.string(REGEX_PATTERN, Pattern.class)
.optional()
.transform(Pattern::compile)
.description("A regex specifying field names to be removed").build();
namesParam = type(LIST_OF_NAMES, List.class).optional().description("A list of field names to be removed").build();
messageParam = type("message", Message.class).optional().description("The message to use, defaults to '$message'").build();
}

@Override
public Void evaluate(FunctionArgs args, EvaluationContext context) {
final Message message = messageParam.optional(args, context).orElse(context.currentMessage());
if (regexParam.optional(args, context).isPresent()) {
removeRegex(message, regexParam.optional(args, context).get());
}
if (namesParam.optional(args, context).isPresent()) {
removeNames(message, namesParam.optional(args, context).get());
}
return null;
}

private void removeRegex(Message message, Pattern pattern) {
message.getFieldNames().stream()
.filter(name -> pattern.matcher(name).matches())
.toList() // required to avoid ConcurrentModificationException
.forEach(message::removeField);
}

private void removeNames(Message message, List names) {
for (Object name : names) {
message.removeField(String.valueOf(name));
}
}

@Override
public FunctionDescriptor<Void> descriptor() {
return FunctionDescriptor.<Void>builder()
.name(NAME)
.returnType(Void.class)
.params(ImmutableList.of(regexParam, namesParam, messageParam))
.description("Removes the specified field(s) from message, unless the field name is reserved. If no specific message is provided, it uses the currently processed message.")
.build();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright (C) 2020 Graylog, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*/
package org.graylog.plugins.pipelineprocessor.functions.messages;

import com.google.common.collect.ImmutableList;
import org.graylog.plugins.pipelineprocessor.EvaluationContext;
import org.graylog.plugins.pipelineprocessor.ast.functions.AbstractFunction;
import org.graylog.plugins.pipelineprocessor.ast.functions.FunctionArgs;
import org.graylog.plugins.pipelineprocessor.ast.functions.FunctionDescriptor;
import org.graylog.plugins.pipelineprocessor.ast.functions.ParameterDescriptor;
import org.graylog2.plugin.Message;

import static org.graylog.plugins.pipelineprocessor.ast.functions.ParameterDescriptor.type;

public class RemoveSingleField extends AbstractFunction<Void> {
public static final String NAME = "remove_single_field";
public static final String FIELD = "field";
private final ParameterDescriptor<String, String> fieldParam;
private final ParameterDescriptor<Message, Message> messageParam;

public RemoveSingleField() {
fieldParam = ParameterDescriptor.string(FIELD).description("The field to remove").build();
messageParam = type("message", Message.class).optional().description("The message to use, defaults to '$message'").build();
}

@Override
public Void evaluate(FunctionArgs args, EvaluationContext context) {
final String field = fieldParam.required(args, context);
final Message message = messageParam.optional(args, context).orElse(context.currentMessage());

message.removeField(field);
return null;
}

@Override
public FunctionDescriptor<Void> descriptor() {
return FunctionDescriptor.<Void>builder()
.name(NAME)
.returnType(Void.class)
.params(ImmutableList.of(fieldParam, messageParam))
.description("Removes a field from a message")
.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@
import org.graylog.plugins.pipelineprocessor.functions.messages.NormalizeFields;
import org.graylog.plugins.pipelineprocessor.functions.messages.RemoveField;
import org.graylog.plugins.pipelineprocessor.functions.messages.RemoveFromStream;
import org.graylog.plugins.pipelineprocessor.functions.messages.RemoveMultipleFields;
import org.graylog.plugins.pipelineprocessor.functions.messages.RemoveSingleField;
import org.graylog.plugins.pipelineprocessor.functions.messages.RenameField;
import org.graylog.plugins.pipelineprocessor.functions.messages.RouteToStream;
import org.graylog.plugins.pipelineprocessor.functions.messages.SetField;
Expand Down Expand Up @@ -221,6 +223,8 @@ public static void registerFunctions() {
functions.put(SetFields.NAME, new SetFields());
functions.put(RenameField.NAME, new RenameField());
functions.put(RemoveField.NAME, new RemoveField());
functions.put(RemoveSingleField.NAME, new RemoveSingleField());
functions.put(RemoveMultipleFields.NAME, new RemoveMultipleFields());
functions.put(NormalizeFields.NAME, new NormalizeFields());

functions.put(DropMessage.NAME, new DropMessage());
Expand Down Expand Up @@ -1388,7 +1392,7 @@ public void stringEntropy() {
assertThat(message.getField("one_entropy")).isEqualTo(1.0D);
}

@ Test
@Test
public void notExpressionTypeCheck() {
try {
Rule rule = parser.parseRule(ruleForTest(), true);
Expand Down Expand Up @@ -1451,4 +1455,53 @@ public void setFields() {
assertThat(message.getField("k_5")).isEqualTo("v_5");
assertThat(message.getField("k_6")).isEqualTo("will be added with clean_fields param");
}

@Test
public void removeField() {
final Rule rule = parser.parseRule(ruleForTest(), true);
final Message message = new Message("test", "test", Tools.nowUTC());
evaluateRule(rule, message);

assertThat(message.getField("f1")).isNull();
assertThat(message.getField("f2")).isNull();
assertThat(message.getField("f3")).isNull();
assertThat(message.getField("i1")).isEqualTo("i1");
assertThat(message.getField("i2")).isEqualTo("i2");
}

@Test
public void removeSingleField() {
final Rule rule = parser.parseRule(ruleForTest(), true);
final Message message = new Message("test", "test", Tools.nowUTC());
evaluateRule(rule, message);

assertThat(message.getField("a.1")).isNull();
assertThat(message.getField("f1")).isNull();
assertThat(message.getField("a_1")).isEqualTo("a_1");
assertThat(message.getField("f2")).isEqualTo("f2");
}

@Test
public void removeFieldsByName() {
final Rule rule = parser.parseRule(ruleForTest(), true);
final Message message = new Message("test", "test", Tools.nowUTC());
evaluateRule(rule, message);

assertThat(message.getField("a.1")).isNull();
assertThat(message.getField("f1")).isNull();
assertThat(message.getField("a_1")).isEqualTo("a_1");
assertThat(message.getField("f2")).isEqualTo("f2");
}

@Test
public void removeFieldsByRegex() {
final Rule rule = parser.parseRule(ruleForTest(), true);
final Message message = new Message("test", "test", Tools.nowUTC());
evaluateRule(rule, message);

assertThat(message.getField("a.1")).isNull();
assertThat(message.getField("a_1")).isNull();
assertThat(message.getField("f2")).isNull();
assertThat(message.getField("f1")).isEqualTo("f1");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
rule "remove_field"
when true
then
set_field(field: "f1", value: "f1");
set_field(field: "f2", value: "f2");
set_field(field: "f3", value: "f3");
set_field(field: "i1", value: "i1");
set_field(field: "i2", value: "i2");

remove_field(field:"f1");
remove_field(field:"f.");
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
rule "remove_fields_by_name"
when true
then
set_field(field: "a.1", value: "a.1");
set_field(field: "a_1", value: "a_1");
set_field(field: "f1", value: "f1");
set_field(field: "f2", value: "f2");

remove_multiple_fields(names:["a.1", "f1"]);

// invalid - should be NOOP
remove_multiple_fields(names:["dummy"]);
remove_multiple_fields(names:[]);
remove_multiple_fields();
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
rule "remove_fields_by_regex_and_name)"
when true
then
set_field(field: "a.1", value: "a.1");
set_field(field: "a_1", value: "a_1");
set_field(field: "f1", value: "f1");
set_field(field: "f2", value: "f2");

remove_multiple_fields(pattern:"a.1", names:["f2"]);
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
rule "remove_single_field"
when true
then
set_field(field: "a.1", value: "a.1");
set_field(field: "a_1", value: "a_1");
set_field(field: "f1", value: "f1");
set_field(field: "f2", value: "f2");

remove_single_field(field:"a.1");
remove_single_field(field:"f1");

// invalid - should be NO-OP
remove_single_field(field:"f.");
remove_single_field(field:"dummy");
end