Skip to content

Commit

Permalink
initial support for grok patterns in extractors
Browse files Browse the repository at this point in the history
* change the internal extractor interfaces to support multiple results, and make the original behavior a special case of that
* no outside visible changes are made, all existing extractors will just work as before
* opportunity to support multiple matcher groups is there, but needs significant ui work
* the grok pattern list is static right now
* missing tests for corner cases
  • Loading branch information
kroepke committed Jan 4, 2015
1 parent 60335d1 commit 640a8e4
Show file tree
Hide file tree
Showing 13 changed files with 440 additions and 23 deletions.
Expand Up @@ -24,13 +24,18 @@


import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.Timer; import com.codahale.metrics.Timer;
import com.google.common.base.Predicate;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap;
import org.graylog2.plugin.Message; import org.graylog2.plugin.Message;
import org.graylog2.plugin.database.EmbeddedPersistable; import org.graylog2.plugin.database.EmbeddedPersistable;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;


import java.util.Arrays;
import java.util.Comparator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
Expand All @@ -55,12 +60,14 @@ public abstract class Extractor implements EmbeddedPersistable {
public static final String FIELD_CONVERTERS = "converters"; public static final String FIELD_CONVERTERS = "converters";
public static final String FIELD_CONVERTER_TYPE = "type"; public static final String FIELD_CONVERTER_TYPE = "type";
public static final String FIELD_CONVERTER_CONFIG = "config"; public static final String FIELD_CONVERTER_CONFIG = "config";
public static final ResultPredicate VALUE_NULL_PREDICATE = new ResultPredicate();


public enum Type { public enum Type {
SUBSTRING, SUBSTRING,
REGEX, REGEX,
SPLIT_AND_INDEX, SPLIT_AND_INDEX,
COPY_INPUT COPY_INPUT,
GROK
} }


public enum CursorStrategy { public enum CursorStrategy {
Expand Down Expand Up @@ -96,7 +103,7 @@ public enum ConditionType {
private final String totalTimerName; private final String totalTimerName;
private final String converterTimerName; private final String converterTimerName;


protected abstract Result run(String field); protected abstract Result[] run(String field);


protected final MetricRegistry metricRegistry; protected final MetricRegistry metricRegistry;


Expand Down Expand Up @@ -148,7 +155,7 @@ public void runExtractor(Message msg) {
return; return;
} }


String field = (String) msg.getField(sourceField); final String field = (String) msg.getField(sourceField);


// Decide if to extract at all. // Decide if to extract at all.
if (conditionType.equals(ConditionType.STRING)) { if (conditionType.equals(ConditionType.STRING)) {
Expand All @@ -163,28 +170,44 @@ public void runExtractor(Message msg) {


final Timer.Context timerContext = metricRegistry.timer(getTotalTimerName()).time(); final Timer.Context timerContext = metricRegistry.timer(getTotalTimerName()).time();


Result result = run(field); final Result[] results = run(field);


if (result == null || result.getValue() == null) { if (results == null || results.length == 0 || FluentIterable.of(results).anyMatch(VALUE_NULL_PREDICATE)) {
timerContext.close(); timerContext.close();
return; return;
} else if (results.length == 1) {
msg.addField(targetField, results[0].getValue());
} else { } else {
msg.addField(targetField, result.getValue()); for (final Result result : results) {
msg.addField(result.getTarget(), result.getValue());
}
} }


// Remove original from message? // Remove original from message?
if (cursorStrategy.equals(CursorStrategy.CUT) && !targetField.equals(sourceField) && !Message.RESERVED_FIELDS.contains(sourceField)) { if (cursorStrategy.equals(CursorStrategy.CUT) && !targetField.equals(sourceField) && !Message.RESERVED_FIELDS.contains(sourceField) && results[0].beginIndex != -1) {
StringBuilder sb = new StringBuilder(field); final StringBuilder sb = new StringBuilder(field);


sb.delete(result.getBeginIndex(), result.getEndIndex()); final ImmutableList<Result> reverseList = FluentIterable.from(Arrays.asList(results)).toSortedList(new Comparator<Result>() {
@Override
public int compare(Result left, Result right) {
// reversed!
return -1 * ComparisonChain.start().compare(left.endIndex, right.endIndex).result();
}
});
// remove all from reverse so that the indices still match
for (final Result result : reverseList) {
sb.delete(result.getBeginIndex(), result.getEndIndex());
}


String finalResult = sb.toString(); String finalResult = sb.toString();


if (finalResult.isEmpty()) { // also ignore pure whitespace
if (finalResult.trim().isEmpty()) {
finalResult = "fullyCutByExtractor"; finalResult = "fullyCutByExtractor";
} }


msg.removeField(sourceField); msg.removeField(sourceField);
// TODO don't add an empty field back, or rather don't add fullyCutByExtractor
msg.addField(sourceField, finalResult); msg.addField(sourceField, finalResult);
} }


Expand Down Expand Up @@ -212,7 +235,7 @@ public void runConverters(Message msg) {
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
final Map<String, Object> convert = final Map<String, Object> convert =
(Map<String, Object>) converter.convert((String) msg.getField(targetField)); (Map<String, Object>) converter.convert((String) msg.getField(targetField));
for (String reservedField : Message.RESERVED_FIELDS) { for (final String reservedField : Message.RESERVED_FIELDS) {
if (convert.containsKey(reservedField)) { if (convert.containsKey(reservedField)) {
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug( LOG.debug(
Expand Down Expand Up @@ -347,11 +370,17 @@ public void incrementExceptions() {
public static class Result { public static class Result {


private final String value; private final String value;
private final String target;
private final int beginIndex; private final int beginIndex;
private final int endIndex; private final int endIndex;


public Result(String value, int beginIndex, int endIndex) { public Result(String value, int beginIndex, int endIndex) {
this(value, null, beginIndex, endIndex);
}

public Result(String value, String target, int beginIndex, int endIndex) {
this.value = value; this.value = value;
this.target = target;
this.beginIndex = beginIndex; this.beginIndex = beginIndex;
this.endIndex = endIndex; this.endIndex = endIndex;
} }
Expand All @@ -360,6 +389,10 @@ public String getValue() {
return value; return value;
} }


public String getTarget() {
return target;
}

public int getBeginIndex() { public int getBeginIndex() {
return beginIndex; return beginIndex;
} }
Expand All @@ -369,4 +402,9 @@ public int getEndIndex() {
} }


} }

private static class ResultPredicate implements Predicate<Result> {
@Override
public boolean apply(Result input) { return input.getValue() == null; }
}
} }
Expand Up @@ -55,7 +55,8 @@ public enum Type {
SUBSTRING("Substring"), SUBSTRING("Substring"),
REGEX("Regular expression"), REGEX("Regular expression"),
SPLIT_AND_INDEX("Split & Index"), SPLIT_AND_INDEX("Split & Index"),
COPY_INPUT("Copy Input"); COPY_INPUT("Copy Input"),
GROK("Grok pattern");
private final String description; private final String description;


Type(String description) { Type(String description) {
Expand Down Expand Up @@ -201,6 +202,8 @@ public void loadConfigFromForm(Type extractorType, Map<String,String[]> form) {
break; break;
case SPLIT_AND_INDEX: case SPLIT_AND_INDEX:
loadSplitAndIndexConfig(form); loadSplitAndIndexConfig(form);
case GROK:
loadGrokConfig(form);
break; break;
} }
} }
Expand Down Expand Up @@ -333,6 +336,14 @@ private void loadSplitAndIndexConfig(Map<String,String[]> form) {
extractorConfig.put("split_by", form.get("split_by")[0]); extractorConfig.put("split_by", form.get("split_by")[0]);
extractorConfig.put("index", Integer.parseInt(form.get("index")[0])); extractorConfig.put("index", Integer.parseInt(form.get("index")[0]));
} }

private void loadGrokConfig(Map<String,String[]> form) {
if (!formFieldSet(form, "grok_pattern")) {
throw new RuntimeException("Missing extractor config: grok_pattern");
}

extractorConfig.put("grok_pattern", form.get("grok_pattern")[0]);
}


private boolean formFieldSet(Map<String,String[]> form, String key) { private boolean formFieldSet(Map<String,String[]> form, String key) {
return form.get(key) != null && form.get(key)[0] != null && !form.get(key)[0].isEmpty(); return form.get(key) != null && form.get(key)[0] != null && !form.get(key)[0].isEmpty();
Expand Down
@@ -0,0 +1,34 @@
/*
* Copyright 2015 TORCH GmbH
*
* This file is part of Graylog2.
*
* Graylog2 is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Graylog2 is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Graylog2. If not, see <http://www.gnu.org/licenses/>.
*/
package org.graylog2.restclient.models.api.responses;

import java.util.List;

public class GrokTestResponse {
public String pattern;
public boolean matched;
public String string;
public List<Match> matches;

// must be static for jackson deser
public static class Match {
public String name;
public String match;
}
}
4 changes: 4 additions & 0 deletions graylog2-server/pom.xml
Expand Up @@ -277,6 +277,10 @@
<artifactId>commons-codec</artifactId> <artifactId>commons-codec</artifactId>
</dependency> </dependency>


<dependency>
<groupId>io.thekraken</groupId>
<artifactId>grok</artifactId>
</dependency>
<dependency> <dependency>
<groupId>com.atlassian.ip</groupId> <groupId>com.atlassian.ip</groupId>
<artifactId>atlassian-ip</artifactId> <artifactId>atlassian-ip</artifactId>
Expand Down
Expand Up @@ -41,7 +41,7 @@ public CopyInputExtractor(MetricRegistry metricRegistry, String id, String title
} }


@Override @Override
protected Result run(String value) { protected Result[] run(String value) {
return new Result(value, 0, value.length()); return new Result[] { new Result(value, 0, value.length())};
} }
} }
Expand Up @@ -17,11 +17,11 @@
package org.graylog2.inputs.extractors; package org.graylog2.inputs.extractors;


import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.MetricRegistry;
import javax.inject.Inject;
import org.graylog2.ConfigurationException; import org.graylog2.ConfigurationException;
import org.graylog2.plugin.inputs.Converter; import org.graylog2.plugin.inputs.Converter;
import org.graylog2.plugin.inputs.Extractor; import org.graylog2.plugin.inputs.Extractor;


import javax.inject.Inject;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;


Expand All @@ -36,7 +36,6 @@ public ExtractorFactory(MetricRegistry metricRegistry) {
this.metricRegistry = metricRegistry; this.metricRegistry = metricRegistry;
} }


// TODO: This parameter list is growing a bit out of control.
public Extractor factory(String id, public Extractor factory(String id,
String title, String title,
int order, int order,
Expand All @@ -50,6 +49,7 @@ public Extractor factory(String id,
String conditionValue) String conditionValue)
throws NoSuchExtractorException, Extractor.ReservedFieldException, ConfigurationException { throws NoSuchExtractorException, Extractor.ReservedFieldException, ConfigurationException {


// TODO convert to guice factory
switch (type) { switch (type) {
case REGEX: case REGEX:
return new RegexExtractor(metricRegistry, id, title, order, cursorStrategy, sourceField, targetField, extractorConfig, creatorUserId, converters, conditionType, conditionValue); return new RegexExtractor(metricRegistry, id, title, order, cursorStrategy, sourceField, targetField, extractorConfig, creatorUserId, converters, conditionType, conditionValue);
Expand All @@ -59,6 +59,8 @@ public Extractor factory(String id,
return new SplitAndIndexExtractor(metricRegistry, id, title, order, cursorStrategy, sourceField, targetField, extractorConfig, creatorUserId, converters, conditionType, conditionValue); return new SplitAndIndexExtractor(metricRegistry, id, title, order, cursorStrategy, sourceField, targetField, extractorConfig, creatorUserId, converters, conditionType, conditionValue);
case COPY_INPUT: case COPY_INPUT:
return new CopyInputExtractor(metricRegistry, id, title, order, cursorStrategy, sourceField, targetField, extractorConfig, creatorUserId, converters, conditionType, conditionValue); return new CopyInputExtractor(metricRegistry, id, title, order, cursorStrategy, sourceField, targetField, extractorConfig, creatorUserId, converters, conditionType, conditionValue);
case GROK:
return new GrokExtrator(metricRegistry, id, title, order, cursorStrategy, sourceField, targetField, extractorConfig, creatorUserId, converters, conditionType, conditionValue);
default: default:
throw new NoSuchExtractorException(); throw new NoSuchExtractorException();
} }
Expand Down

0 comments on commit 640a8e4

Please sign in to comment.