Skip to content

Commit

Permalink
Improve detection of Hadoop taskID
Browse files Browse the repository at this point in the history
Hadoop 2.5.x introduced a bug where the task attempt is used for the
task id. To cope around this, the code searches first for the task
attempt and only then falls back to the task id.

relates #280

(cherry picked from commit a2084e2)
  • Loading branch information
costin committed Sep 25, 2014
1 parent 7118caf commit 11e558b
Showing 1 changed file with 17 additions and 1 deletion.
18 changes: 17 additions & 1 deletion mr/src/main/java/org/elasticsearch/hadoop/mr/HeartBeat.java
Expand Up @@ -24,9 +24,11 @@

import org.apache.commons.logging.Log;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.TaskAttemptID;
import org.apache.hadoop.mapred.TaskID;
import org.apache.hadoop.util.Progressable;
import org.elasticsearch.hadoop.util.Assert;
import org.elasticsearch.hadoop.util.StringUtils;
import org.elasticsearch.hadoop.util.unit.TimeValue;

/**
Expand All @@ -48,7 +50,21 @@ class HeartBeat {
this.progressable = progressable;
this.rate = new TimeValue(tv.getMillis() - delay.getMillis(), TimeUnit.MILLISECONDS);
this.log = log;
TaskID taskID = TaskID.forName(HadoopCfgUtils.getTaskId(cfg));

TaskID taskID = null;
// first try with the attempt since some Hadoop versions mix the two

String taskAttemptId = HadoopCfgUtils.getTaskAttemptId(cfg);
if (StringUtils.hasText(taskAttemptId)) {
taskID = TaskAttemptID.forName(taskAttemptId).getTaskID();
}
else {
String taskIdProp = HadoopCfgUtils.getTaskId(cfg);
// double-check task id bug in Hadoop 2.5.x
if (StringUtils.hasText(taskIdProp) && !taskIdProp.contains("attempt")) {
taskID = TaskID.forName(taskIdProp);
}
}

String taskId;
if (taskID == null) {
Expand Down

0 comments on commit 11e558b

Please sign in to comment.