Skip to content

Commit

Permalink
Using consistent collection of non displayable fields to filter again…
Browse files Browse the repository at this point in the history
…st. (#3585)

Before this change, the "source" field was suppressed from being
returned (and therefore) displayed in decorated results. This was caused
by some fields included in the `RESERVED_FIELDS` set, which were not
added back from the decorator processor before returning the list of fields included
in the search result.

This change could have extracted the method used in `SearchResult.java`
into a commonly used helper method used in both code points. This would
have involved a lot of casting/conversion between different
(`ResultMessage`/`ResultMessageSummary`) data types for each returned
message which is decorated, so the impact of deduplication would be
bigger than its benefits. Instead, I cleaned up and (hopefully) made the
sets of fields more expressive and easier to use.

Fixes #3584
(cherry picked from commit a724a27)
  • Loading branch information
dennisoelkers committed Mar 28, 2017
1 parent 448d032 commit eb0878b
Show file tree
Hide file tree
Showing 4 changed files with 149 additions and 65 deletions.
Expand Up @@ -95,7 +95,7 @@ private String getMessageKey(ResultMessageSummary messageSummary) {
private Set<String> extractFields(List<ResultMessageSummary> messages) {
return messages.stream()
.flatMap(message -> message.message().keySet().stream())
.filter(field -> !Message.RESERVED_FIELDS.contains(field))
.filter(field -> !Message.FILTERED_FIELDS.contains(field))
.collect(Collectors.toSet());
}

Expand Down
Expand Up @@ -16,14 +16,14 @@
*/
package org.graylog2.indexer.results;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Sets;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.search.SearchHits;
import org.graylog2.indexer.ranges.IndexRange;
import org.graylog2.plugin.Message;

import java.util.Iterator;
import java.util.List;
import java.util.Set;

Expand All @@ -45,45 +45,31 @@ public SearchResult(SearchHits searchHits, Set<IndexRange> usedIndices, String o
this.totalResults = searchHits.getTotalHits();
this.usedIndices = usedIndices;
}

public long getTotalResults() {
return totalResults;
}

public List<ResultMessage> getResults() {
return results;
}

public Set<String> getFields() {
return fields;
}

private Set<String> extractFields(List<ResultMessage> hits) {
@VisibleForTesting
Set<String> extractFields(List<ResultMessage> hits) {
Set<String> filteredFields = Sets.newHashSet();
Set<String> allFields = Sets.newHashSet();

Iterator<ResultMessage> i = hits.iterator();
while(i.hasNext()) {
final Message message = i.next().getMessage();
allFields.addAll(message.getFieldNames());

hits.forEach(hit -> {
final Message message = hit.getMessage();
for (String field : message.getFieldNames()) {
if (!Message.RESERVED_FIELDS.contains(field)) {
if (!Message.FILTERED_FIELDS.contains(field)) {
filteredFields.add(field);
}
}
}

// Because some fields actually make sense in this result and some don't.
// TODO: This is super awkward. First we do not include RESERVED_FIELDS, then we add some back...
if (allFields.contains("message")) {
filteredFields.add("message");
}
if (allFields.contains("source")) {
filteredFields.add("source");
}
filteredFields.remove("streams");
filteredFields.remove("full_message");
});

return filteredFields;
}
Expand Down
87 changes: 47 additions & 40 deletions graylog2-server/src/main/java/org/graylog2/plugin/Message.java
Expand Up @@ -68,49 +68,56 @@ public class Message implements Messages {
private static final Pattern VALID_KEY_CHARS = Pattern.compile("^[\\w\\.\\-@]*$");
private static final char KEY_REPLACEMENT_CHAR = '_';

public static final ImmutableSet<String> RESERVED_FIELDS = ImmutableSet.of(
// ElasticSearch fields.
FIELD_ID,
"_ttl",
"_source",
"_all",
"_index",
"_type",
"_score",

// Our reserved fields.
FIELD_MESSAGE,
FIELD_SOURCE,
FIELD_TIMESTAMP,
"gl2_source_node",
"gl2_source_input",
"gl2_source_collector",
"gl2_source_collector_input",
"gl2_remote_ip",
"gl2_remote_port",
"gl2_remote_hostname",
// TODO Due to be removed in Graylog 3.x
"gl2_source_radio",
"gl2_source_radio_input"
private static final ImmutableSet<String> GRAYLOG_FIELDS = ImmutableSet.of(
"gl2_source_node",
"gl2_source_input",
// TODO Due to be removed in Graylog 3.x
"gl2_source_radio",
"gl2_source_radio_input",

"gl2_source_collector",
"gl2_source_collector_input",
"gl2_remote_ip",
"gl2_remote_port",
"gl2_remote_hostname"
);

public static final ImmutableSet<String> RESERVED_SETTABLE_FIELDS = ImmutableSet.of(
FIELD_MESSAGE,
FIELD_SOURCE,
FIELD_TIMESTAMP,
"gl2_source_node",
"gl2_source_input",
"gl2_source_radio",
"gl2_source_radio_input",
"gl2_source_collector",
"gl2_source_collector_input",
"gl2_remote_ip",
"gl2_remote_port",
"gl2_remote_hostname"
private static final ImmutableSet<String> CORE_MESSAGE_FIELDS = ImmutableSet.of(
FIELD_MESSAGE,
FIELD_SOURCE,
FIELD_TIMESTAMP
);

private static final ImmutableSet<String> ES_FIELDS = ImmutableSet.of(
// ElasticSearch fields.
FIELD_ID,
"_ttl",
"_source",
"_all",
"_index",
"_type",
"_score"
);

public static final ImmutableSet<String> RESERVED_SETTABLE_FIELDS = new ImmutableSet.Builder<String>()
.addAll(GRAYLOG_FIELDS)
.addAll(CORE_MESSAGE_FIELDS)
.build();

public static final ImmutableSet<String> RESERVED_FIELDS = new ImmutableSet.Builder<String>()
.addAll(RESERVED_SETTABLE_FIELDS)
.addAll(ES_FIELDS)
.build();

public static final ImmutableSet<String> FILTERED_FIELDS = new ImmutableSet.Builder<String>()
.addAll(GRAYLOG_FIELDS)
.addAll(ES_FIELDS)
.add(FIELD_STREAMS)
.add(FIELD_FULL_MESSAGE)
.build();

private static final ImmutableSet<String> REQUIRED_FIELDS = ImmutableSet.of(
FIELD_MESSAGE, FIELD_ID
FIELD_MESSAGE, FIELD_ID
);

public static final Function<Message, String> ID_FUNCTION = new MessageIdFunction();
Expand Down Expand Up @@ -198,7 +205,7 @@ public Map<String, Object> toElasticSearchObject(@Nonnull final Meter invalidTim
obj.put(newKey, entry.getValue());
} else {
LOG.warn("Keys must not contain a \".\" character! Ignoring field \"{}\"=\"{}\" in message [{}] - Unable to replace \".\" with a \"{}\" because of key conflict: \"{}\"=\"{}\"",
key, entry.getValue(), getId(), KEY_REPLACEMENT_CHAR, newKey, obj.get(newKey));
key, entry.getValue(), getId(), KEY_REPLACEMENT_CHAR, newKey, obj.get(newKey));
LOG.debug("Full message with \".\" in message key: {}", this);
}
} else {
Expand All @@ -207,7 +214,7 @@ public Map<String, Object> toElasticSearchObject(@Nonnull final Meter invalidTim
// Deliberate warning duplicates because the key with the "." might be transformed before reaching
// the duplicate original key with a "_". Otherwise we would silently overwrite the transformed key.
LOG.warn("Keys must not contain a \".\" character! Ignoring field \"{}\"=\"{}\" in message [{}] - Unable to replace \".\" with a \"{}\" because of key conflict: \"{}\"=\"{}\"",
newKey, fields.get(newKey), getId(), KEY_REPLACEMENT_CHAR, key, entry.getValue());
newKey, fields.get(newKey), getId(), KEY_REPLACEMENT_CHAR, key, entry.getValue());
LOG.debug("Full message with \".\" in message key: {}", this);
}
obj.put(key, entry.getValue());
Expand Down
@@ -0,0 +1,91 @@
/**
* This file is part of Graylog.
*
* Graylog is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Graylog is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Graylog. If not, see <http://www.gnu.org/licenses/>.
*/
package org.graylog2.indexer.results;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import org.elasticsearch.search.SearchHits;
import org.graylog2.plugin.Message;
import org.junit.Before;
import org.junit.Test;

import java.util.Collections;
import java.util.Set;

import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

public class SearchResultTest {
private SearchResult searchResult;

@Before
public void setUp() throws Exception {
final SearchHits searchHits = mock(SearchHits.class);
when(searchHits.iterator()).thenReturn(Collections.emptyIterator());
this.searchResult = new SearchResult(searchHits, null, null, null, null);
}

@Test
public void extractFieldsForEmptyResult() throws Exception {
final Set<String> result = searchResult.extractFields(Collections.emptyList());

assertThat(result)
.isNotNull()
.isEmpty();
}

@Test
public void extractFieldsForTwoMessagesContainingDifferentFields() throws Exception {
final ResultMessage r1 = mock(ResultMessage.class);
final Message m1 = mock(Message.class);
when(m1.getFieldNames()).thenReturn(ImmutableSet.of(
"message",
"source",
"timestamp",
"http_response",
"gl2_source_node",
"_index"
));
when(r1.getMessage()).thenReturn(m1);

final ResultMessage r2 = mock(ResultMessage.class);
final Message m2 = mock(Message.class);
when(m2.getFieldNames()).thenReturn(ImmutableSet.of(
"message",
"source",
"timestamp",
"took_ms",
"gl2_source_collector"
));
when(r2.getMessage()).thenReturn(m2);

final Set<String> result = searchResult.extractFields(ImmutableList.of(r1, r2));

assertThat(result)
.isNotNull()
.isNotEmpty()
.hasSize(5)
.containsExactlyInAnyOrder(
"message",
"source",
"timestamp",
"http_response",
"took_ms"
);
}
}

0 comments on commit eb0878b

Please sign in to comment.