Skip to content

Commit

Permalink
[PIG] Case sensitivity is now preserved from Pig to ES
Browse files Browse the repository at this point in the history
relates elastic#380
  • Loading branch information
costin committed Jun 1, 2015
1 parent 5212a9f commit befb758
Show file tree
Hide file tree
Showing 7 changed files with 32 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ static Collection<String> columnToAlias(Settings settings) {
}

static FieldAlias alias(Settings settings) {
Map<String, String> aliasMap = SettingsUtils.aliases(settings.getProperty(HiveConstants.MAPPING_NAMES));
Map<String, String> aliasMap = SettingsUtils.aliases(settings.getProperty(HiveConstants.MAPPING_NAMES), true);

// add default aliases for serialization (_colX -> mapping name)
Map<String, String> columnMap = columnMap(settings);
Expand All @@ -114,7 +114,7 @@ static FieldAlias alias(Settings settings) {
aliasMap.put(columnIndex, columnName);
}

return new FieldAlias(aliasMap);
return new FieldAlias(aliasMap, true);
}

static Map<String, String> columnMap(Settings settings) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public class HiveValueWriter extends FilteringValueWriter<HiveType> {
public HiveValueWriter() {
this.writeUnknownTypes = false;
this.writableWriter = new HiveWritableValueWriter(false);
this.alias = new FieldAlias();
this.alias = new FieldAlias(true);
}

@Override
Expand Down
11 changes: 6 additions & 5 deletions mr/src/main/java/org/elasticsearch/hadoop/util/FieldAlias.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,21 @@
public class FieldAlias {

private final Map<String, String> fieldToAlias;
private final boolean caseInsensitive;

public FieldAlias() {
this.fieldToAlias = new LinkedHashMap<String, String>();
public FieldAlias(boolean caseInsensitive) {
this(new LinkedHashMap<String, String>(), caseInsensitive);
}

public FieldAlias(Map<String, String> alias) {
public FieldAlias(Map<String, String> alias, boolean caseInsensitive) {
this.fieldToAlias = alias;
this.caseInsensitive = caseInsensitive;
}

public String toES(String string) {
String alias = fieldToAlias.get(string);
if (alias == null) {
// ES fields are all lowercase
alias = string.toLowerCase(Locale.ENGLISH);
alias = (caseInsensitive ? string.toLowerCase(Locale.ENGLISH) : string);
fieldToAlias.put(string, alias);
}
return alias;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ public static List<String> discoveredOrDeclaredNodes(Settings settings) {
return (StringUtils.hasText(discoveredNodes) ? StringUtils.tokenize(discoveredNodes) : declaredNodes(settings));
}

public static Map<String, String> aliases(String definition) {
public static Map<String, String> aliases(String definition, boolean caseInsensitive) {
List<String> aliases = StringUtils.tokenize(definition);

Map<String, String> aliasMap = new LinkedHashMap<String, String>();
Expand All @@ -99,9 +99,8 @@ public static Map<String, String> aliases(String definition) {
int index = string.indexOf(":");
if (index > 0) {
String key = string.substring(0, index);
// save the lower case version as well since Hive does that for top-level keys
aliasMap.put(key, string.substring(index + 1));
aliasMap.put(key.toLowerCase(Locale.ENGLISH), string.substring(index + 1));
aliasMap.put(caseInsensitive ? key.toLowerCase(Locale.ENGLISH) : key, string.substring(index + 1));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,25 @@ public void testFieldAlias() throws Exception {

@Test
public void testFieldAliasMapping() throws Exception {
assertThat(RestUtils.getMapping("pig/fieldalias").skipHeaders().toString(), is("fieldalias=[@name=STRING, @timestamp=DATE, picture=STRING, url=STRING]"));
assertThat(RestUtils.getMapping("pig/fieldalias").skipHeaders().toString(), is("fieldalias=[@timestamp=DATE, name=STRING, picture=STRING, url=STRING]"));
}

@Test
public void testCaseSensitivity() throws Exception {
String script =
"REGISTER "+ Provisioner.ESHADOOP_TESTING_JAR + ";" +
//"A = LOAD 'src/test/resources/artists.dat' USING PigStorage() AS (id:long, name, links:bag{t:(url:chararray, picture: chararray)});" +
"A = LOAD '" + TestUtils.sampleArtistsDat() + "' USING PigStorage() AS (id:long, Name:chararray, uRL:chararray, pIctUre: chararray, timestamp: chararray); " +
"B = FOREACH A GENERATE Name, uRL, pIctUre;" +
"ILLUSTRATE B;" +
"STORE B INTO 'pig/casesensitivity' USING org.elasticsearch.hadoop.pig.EsStorage();";

pig.executeScript(script);
}

@Test
public void testCaseSensitivityMapping() throws Exception {
assertThat(RestUtils.getMapping("pig/casesensitivity").skipHeaders().toString(), is("casesensitivity=[Name=STRING, pIctUre=STRING, uRL=STRING]"));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ static Object convertFromES(Long esDate) {
}

static FieldAlias alias(Settings settings) {
return new FieldAlias(SettingsUtils.aliases(settings.getProperty(MAPPING_NAMES)));
return new FieldAlias(SettingsUtils.aliases(settings.getProperty(MAPPING_NAMES), false), false);
}

static String asProjection(Schema schema, Properties props) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public PigValueWriter() {
public PigValueWriter(boolean useTupleFieldNames) {
writeUnknownTypes = false;
this.useTupleFieldNames = useTupleFieldNames;
alias = new FieldAlias();
alias = new FieldAlias(false);
}

@Override
Expand Down

0 comments on commit befb758

Please sign in to comment.