Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Comparing changes

Choose two branches to see what's changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
base fork: NathanNeff/njn-hadoop
base: 90e369b6bd
...
head fork: NathanNeff/njn-hadoop
compare: 6d2b32af3c
Checking mergeability… Don't worry, you can still create the pull request.
  • 2 commits
  • 7 files changed
  • 0 commit comments
  • 2 contributors
Commits on Nov 08, 2012
training Added wordcount and build.gradle\! 7b0dbed
Commits on Dec 06, 2012
@NathanNeff groovy wordcount 6d2b32a
View
26 examples/groovy-wordcount/SumReducer.java
@@ -0,0 +1,26 @@
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+public class SumReducer extends MapReduceBase implements
+ Reducer<Text, IntWritable, Text, IntWritable> {
+
+ @Override
+ public void reduce(Text key, Iterator<IntWritable> values,
+ OutputCollector<Text, IntWritable> output, Reporter reporter)
+ throws IOException {
+
+ int wordCount = 0;
+ while (values.hasNext()) {
+ IntWritable value = values.next();
+ wordCount += value.get();
+ }
+ output.collect(key, new IntWritable(wordCount));
+ }
+}
View
49 examples/groovy-wordcount/WordCount.java
@@ -0,0 +1,49 @@
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+public class WordCount extends Configured implements Tool {
+
+ @Override
+ public int run(String[] args) throws Exception {
+
+ if (args.length != 2) {
+ System.out.printf(
+ "Usage: %s [generic options] <input dir> <output dir>\n", getClass()
+ .getSimpleName());
+ ToolRunner.printGenericCommandUsage(System.out);
+ return -1;
+ }
+
+ JobConf conf = new JobConf(getConf(), WordCount.class);
+ conf.setJobName(this.getClass().getName());
+
+ FileInputFormat.setInputPaths(conf, new Path(args[0]));
+ FileOutputFormat.setOutputPath(conf, new Path(args[1]));
+
+ conf.setMapperClass(WordMapper.class);
+ conf.setReducerClass(SumReducer.class);
+
+ conf.setMapOutputKeyClass(Text.class);
+ conf.setMapOutputValueClass(IntWritable.class);
+
+ conf.setOutputKeyClass(Text.class);
+ conf.setOutputValueClass(IntWritable.class);
+
+ JobClient.runJob(conf);
+ return 0;
+ }
+
+ public static void main(String[] args) throws Exception {
+ int exitCode = ToolRunner.run(new WordCount(), args);
+ System.exit(exitCode);
+ }
+}
View
25 examples/groovy-wordcount/WordMapper.groovy
@@ -0,0 +1,25 @@
+import java.io.IOException;
+
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+public class WordMapper extends MapReduceBase implements
+ Mapper<LongWritable, Text, Text, IntWritable> {
+
+ @Override
+ public void map(LongWritable key, Text value,
+ OutputCollector<Text, IntWritable> output, Reporter reporter)
+ throws IOException {
+ String s = value.toString();
+ for (String word : s.split("\\W+")) {
+ if (word.length() > 0) {
+ output.collect(new Text(word), new IntWritable(1));
+ }
+ }
+ }
+}
View
21 wordcount/build.gradle
@@ -0,0 +1,21 @@
+// http://docs.codehaus.org/display/GRADLE/Cookbook
+// Example of the "too lazy to create dirs"
+apply plugin: 'java'
+
+repositories {
+ mavenCentral()
+ maven {
+ url {
+ "https://repository.cloudera.com/artifactory/cloudera-repos"
+ }
+ }
+}
+
+dependencies {
+ compile 'org.apache.hadoop:hadoop-client:2.0.0-mr1-cdh4.1.1'
+}
+
+task "create-dirs" << {
+ sourceSets*.java.srcDirs*.each { it.mkdirs() }
+ sourceSets*.resources.srcDirs*.each { it.mkdirs() }
+}
View
26 wordcount/src/main/java/SumReducer.java
@@ -0,0 +1,26 @@
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+public class SumReducer extends MapReduceBase implements
+ Reducer<Text, IntWritable, Text, IntWritable> {
+
+ @Override
+ public void reduce(Text key, Iterator<IntWritable> values,
+ OutputCollector<Text, IntWritable> output, Reporter reporter)
+ throws IOException {
+
+ int wordCount = 0;
+ while (values.hasNext()) {
+ IntWritable value = values.next();
+ wordCount += value.get();
+ }
+ output.collect(key, new IntWritable(wordCount));
+ }
+}
View
49 wordcount/src/main/java/WordCount.java
@@ -0,0 +1,49 @@
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+public class WordCount extends Configured implements Tool {
+
+ @Override
+ public int run(String[] args) throws Exception {
+
+ if (args.length != 2) {
+ System.out.printf(
+ "Usage: %s [generic options] <input dir> <output dir>\n", getClass()
+ .getSimpleName());
+ ToolRunner.printGenericCommandUsage(System.out);
+ return -1;
+ }
+
+ JobConf conf = new JobConf(getConf(), WordCount.class);
+ conf.setJobName(this.getClass().getName());
+
+ FileInputFormat.setInputPaths(conf, new Path(args[0]));
+ FileOutputFormat.setOutputPath(conf, new Path(args[1]));
+
+ conf.setMapperClass(WordMapper.class);
+ conf.setReducerClass(SumReducer.class);
+
+ conf.setMapOutputKeyClass(Text.class);
+ conf.setMapOutputValueClass(IntWritable.class);
+
+ conf.setOutputKeyClass(Text.class);
+ conf.setOutputValueClass(IntWritable.class);
+
+ JobClient.runJob(conf);
+ return 0;
+ }
+
+ public static void main(String[] args) throws Exception {
+ int exitCode = ToolRunner.run(new WordCount(), args);
+ System.exit(exitCode);
+ }
+}
View
25 wordcount/src/main/java/WordMapper.java
@@ -0,0 +1,25 @@
+import java.io.IOException;
+
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+public class WordMapper extends MapReduceBase implements
+ Mapper<LongWritable, Text, Text, IntWritable> {
+
+ @Override
+ public void map(LongWritable key, Text value,
+ OutputCollector<Text, IntWritable> output, Reporter reporter)
+ throws IOException {
+ String s = value.toString();
+ for (String word : s.split("\\W+")) {
+ if (word.length() > 0) {
+ output.collect(new Text(word), new IntWritable(1));
+ }
+ }
+ }
+}

No commit comments for this range

Something went wrong with that request. Please try again.