forked from elastic/elasticsearch
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Don't reject full document in case of invalid metadata
From original PR elastic#17 from @fcamblor If you try to index a document with an invalid metadata, the full document is rejected. For example: ```html <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <html lang="fr"> <head> <title>Hello</title> <meta name="date" content=""> <meta name="Author" content="kimchy"> <meta name="Keywords" content="elasticsearch,cool,bonsai"> </head> <body>World</body> </html> ``` has a non parseable date. This fix add a new option that ignore parsing errors `"index.mapping.attachment.ignore_errors":true` (default to `true`). Closes elastic#17, elastic#38.
- Loading branch information
Showing
7 changed files
with
194 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
89 changes: 89 additions & 0 deletions
89
src/test/java/org/elasticsearch/index/mapper/xcontent/MetadataMapperTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
package org.elasticsearch.index.mapper.xcontent; | ||
|
||
import org.apache.lucene.document.Document; | ||
import org.elasticsearch.common.bytes.BytesReference; | ||
import org.elasticsearch.common.settings.ImmutableSettings; | ||
import org.elasticsearch.common.settings.Settings; | ||
import org.elasticsearch.index.Index; | ||
import org.elasticsearch.index.analysis.AnalysisService; | ||
import org.elasticsearch.index.mapper.DocumentMapper; | ||
import org.elasticsearch.index.mapper.DocumentMapperParser; | ||
import org.elasticsearch.index.mapper.MapperParsingException; | ||
import org.elasticsearch.index.mapper.attachment.AttachmentMapper; | ||
import org.testng.annotations.Test; | ||
|
||
import java.io.IOException; | ||
|
||
import static org.elasticsearch.common.io.Streams.copyToBytesFromClasspath; | ||
import static org.elasticsearch.common.io.Streams.copyToStringFromClasspath; | ||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; | ||
import static org.hamcrest.MatcherAssert.assertThat; | ||
import static org.hamcrest.Matchers.*; | ||
|
||
/** | ||
* Test for https://github.com/elasticsearch/elasticsearch-mapper-attachments/issues/38 | ||
*/ | ||
public class MetadataMapperTest { | ||
|
||
protected void checkDate(String filename, Settings settings, Long expected) throws IOException { | ||
DocumentMapperParser mapperParser = new DocumentMapperParser(new Index("test"), settings, new AnalysisService(new Index("test")), null, null); | ||
mapperParser.putTypeParser(AttachmentMapper.CONTENT_TYPE, new AttachmentMapper.TypeParser()); | ||
|
||
String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/xcontent/test-mapping.json"); | ||
DocumentMapper docMapper = mapperParser.parse(mapping); | ||
byte[] html = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/xcontent/" + filename); | ||
|
||
BytesReference json = jsonBuilder() | ||
.startObject() | ||
.field("_id", 1) | ||
.startObject("file") | ||
.field("_name", "htmlWithoutDateMeta.html") | ||
.field("content", html) | ||
.endObject() | ||
.endObject().bytes(); | ||
|
||
Document doc = docMapper.parse(json).rootDoc(); | ||
assertThat(doc.get(docMapper.mappers().smartName("file").mapper().names().indexName()), containsString("World")); | ||
assertThat(doc.get(docMapper.mappers().smartName("file.name").mapper().names().indexName()), equalTo("htmlWithoutDateMeta.html")); | ||
if (expected == null) { | ||
assertThat(doc.getField(docMapper.mappers().smartName("file.date").mapper().names().indexName()), nullValue()); | ||
} else { | ||
assertThat(doc.getField(docMapper.mappers().smartName("file.date").mapper().names().indexName()).numericValue().longValue(), is(expected)); | ||
} | ||
assertThat(doc.get(docMapper.mappers().smartName("file.title").mapper().names().indexName()), equalTo("Hello")); | ||
assertThat(doc.get(docMapper.mappers().smartName("file.author").mapper().names().indexName()), equalTo("kimchy")); | ||
assertThat(doc.get(docMapper.mappers().smartName("file.keywords").mapper().names().indexName()), equalTo("elasticsearch,cool,bonsai")); | ||
assertThat(doc.get(docMapper.mappers().smartName("file.content_type").mapper().names().indexName()), equalTo("text/html; charset=ISO-8859-1")); | ||
} | ||
|
||
@Test | ||
public void testIgnoreWithoutDate() throws Exception { | ||
checkDate("htmlWithoutDateMeta.html", ImmutableSettings.builder().build(), null); | ||
} | ||
|
||
@Test | ||
public void testIgnoreWithEmptyDate() throws Exception { | ||
checkDate("htmlWithEmptyDateMeta.html", ImmutableSettings.builder().build(), null); | ||
} | ||
|
||
@Test | ||
public void testIgnoreWithCorrectDate() throws Exception { | ||
checkDate("htmlWithValidDateMeta.html", ImmutableSettings.builder().build(), 1354233600000L); | ||
} | ||
|
||
@Test | ||
public void testWithoutDate() throws Exception { | ||
checkDate("htmlWithoutDateMeta.html", ImmutableSettings.builder().put("index.mapping.attachment.ignore_errors", false).build(), null); | ||
} | ||
|
||
@Test(expectedExceptions = MapperParsingException.class) | ||
public void testWithEmptyDate() throws Exception { | ||
checkDate("htmlWithEmptyDateMeta.html", ImmutableSettings.builder().put("index.mapping.attachment.ignore_errors", false).build(), null); | ||
} | ||
|
||
@Test | ||
public void testWithCorrectDate() throws Exception { | ||
checkDate("htmlWithValidDateMeta.html", ImmutableSettings.builder().put("index.mapping.attachment.ignore_errors", false).build(), 1354233600000L); | ||
} | ||
|
||
} |
11 changes: 11 additions & 0 deletions
11
src/test/resources/org/elasticsearch/index/mapper/xcontent/htmlWithEmptyDateMeta.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" | ||
"http://www.w3.org/TR/html4/loose.dtd"> | ||
<html lang="fr"> | ||
<head> | ||
<title>Hello</title> | ||
<meta name="date" content=""> | ||
<meta name="Author" content="kimchy"> | ||
<meta name="Keywords" content="elasticsearch,cool,bonsai"> | ||
</head> | ||
<body>World</body> | ||
</html> |
11 changes: 11 additions & 0 deletions
11
src/test/resources/org/elasticsearch/index/mapper/xcontent/htmlWithValidDateMeta.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" | ||
"http://www.w3.org/TR/html4/loose.dtd"> | ||
<html lang="fr"> | ||
<head> | ||
<title>Hello</title> | ||
<meta name="date" content="2012-11-30"> | ||
<meta name="Author" content="kimchy"> | ||
<meta name="Keywords" content="elasticsearch,cool,bonsai"> | ||
</head> | ||
<body>World</body> | ||
</html> |
10 changes: 10 additions & 0 deletions
10
src/test/resources/org/elasticsearch/index/mapper/xcontent/htmlWithoutDateMeta.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" | ||
"http://www.w3.org/TR/html4/loose.dtd"> | ||
<html lang="fr"> | ||
<head> | ||
<title>Hello</title> | ||
<meta name="Author" content="kimchy"> | ||
<meta name="Keywords" content="elasticsearch,cool,bonsai"> | ||
</head> | ||
<body>World</body> | ||
</html> |
8 changes: 4 additions & 4 deletions
8
src/test/resources/org/elasticsearch/index/mapper/xcontent/test-mapping.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,9 @@ | ||
{ | ||
person:{ | ||
properties:{ | ||
"person":{ | ||
"properties":{ | ||
"file":{ | ||
type:"attachment" | ||
"type":"attachment" | ||
} | ||
} | ||
} | ||
} | ||
} |