Permalink
Browse files

Update examples for API changes.

Update PageRank and WordCount examples for changes in the API,
specifically the change to ScoobiApp and the introduction of DObject.

Also remove all other examples as they have been ported to be acceptance
test specs.
  • Loading branch information...
1 parent 7dafbb9 commit c1fb514ea4f77c773c532a5e83cfeabedf34eebf @blever blever committed with espringe May 29, 2012
View
@@ -27,7 +27,7 @@ To use the sbt-scoobi plugin we need to include a `project/project/scoobi.scala`
}
```
-And, we can add a pretty standard `build.sbt` that has a dependency on Scoobi:
+And, we can add a `build.sbt` that has a dependency on Scoobi:
```scala
name := "MyApp"
@@ -38,7 +38,9 @@ And, we can add a pretty standard `build.sbt` that has a dependency on Scoobi:
libraryDependencies += "com.nicta" %% "scoobi" % "0.4.0-SNAPSHOT" % "provided"
- scalacOptions += "-deprecation"
+ scalacOptions ++= Seq("-Ydependent-method-types", "-deprecation")
+
+ resolvers += "snapshots" at "http://oss.sonatype.org/content/repositories/snapshots"
```
The `provided` is added to the `scoobi` dependency to let sbt know that Scoobi
@@ -1,7 +0,0 @@
-name := "Average Age Calculator"
-
-version := "0.1"
-
-scalaVersion := "2.9.2"
-
-libraryDependencies += "com.nicta" %% "scoobi" % "0.4.0-SNAPSHOT" % "provided"
@@ -1,7 +0,0 @@
-import sbt._
-
-object Plugins extends Build {
- lazy val root = Project("root", file(".")) dependsOn(
- uri("git://github.com/NICTA/sbt-scoobi.git#master")
- )
-}
@@ -1,94 +0,0 @@
-/**
- * Copyright 2011 National ICT Australia Limited
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.nicta.scoobi.examples
-
-import com.nicta.scoobi.Scoobi._
-import java.io._
-
-/*
- * This example takes a list of names and ages in the form: <id>, <firstName>, <secondName>, <age>
- * then gets the average age for each first name.
- */
-
-object AverageAge extends ScoobiApp {
-
- if (!new File("output-dir").mkdir) {
- sys.error("Could not make output-dir for results. Perhaps it already exists (and you should delete/rename the old one)")
- }
-
- val fileName = "output-dir/names.txt"
-
- // write some names to a file (so this example has no external requirements)
- generateNames(fileName)
-
- case class Person(val id: Long,
- val secondName: String,
- val firstName: String,
- val age: Int)
-
- // With this implicit conversion, we let Scoobi know the apply and unapply function, which it uses
- // to construct and deconstruct Person objects. Now it can very efficiently serialize them (i.e. no overhead)
- implicit val PersonFmt = mkCaseWireFormat(Person, Person.unapply _)
-
-
- // Read in lines of the form: 234242, Bob, Smith, 31.
- val persons : DList[Person] = fromDelimitedTextFile(fileName, ",") {
- case ALong(id) :: fN :: sN :: AnInt(age) :: _ => Person(id, sN, fN, age)
- }
-
- // The only thing we're interested in, is the firstName and age
- val nameAndAge: DList[(String, Int)] = persons.map { p => (p.firstName, p.age) }
-
- // Let's group everyone with the same name together
- val grouped: DList[(String, Iterable[Int])] = nameAndAge groupByKey
-
- // And for every name, we will average all the avages
- val avgAgeForName: DList[(String, Int)] = grouped map { case (n, ages) => (n, average(ages)) }
-
- // Execute everything, and throw it into a directory
- DList.persist (toTextFile(avgAgeForName, "output-dir/avg-age"))
-
- private def average[A](values: Iterable[A])(implicit ev: Numeric[A]) = {
- import ev._
-
- var value: Int = 0
- var count = 0
-
- for (i <- values) {
- value = value + toInt(i)
- count = count + 1
- }
-
- value / count
- }
-
- private def generateNames(filename: String) {
- val fstream = new FileWriter(filename)
-
- fstream write ("""100,Ben,Lever,31
-101,Tom,Smith,45
-102,Michael,Robson,33
-103,Rami,Mukhatar,34
-104,Sean,Seefried,33
-105,Ben,Cool,27
-106,Tom,Selleck,66
-107,Michael,Jordan,48
-108,Rami,Yacoub,36
-109,Sean,Connery,81""")
-
- fstream close()
- }
-}
@@ -1,8 +1,10 @@
name := "Java Word Count"
-version := "0.1"
+version := "1.0"
scalaVersion := "2.9.2"
libraryDependencies += "com.nicta" %% "scoobi" % "0.4.0-SNAPSHOT" % "provided"
+resolvers += "snapshots" at "http://oss.sonatype.org/content/repositories/snapshots"
+
@@ -14,26 +14,12 @@
* limitations under the License.
*/
package com.nicta.scoobij.examples;
-/**
- * Copyright 2011 National ICT Australia Limited
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
import com.nicta.scoobij.*;
import java.io.*;
import com.nicta.scoobij.io.text.*;
+
public class WordCount {
public static void main(String[] args) throws java.io.IOException {
@@ -88,7 +74,7 @@ public Integer apply(Integer a, Integer b) {
}, WireFormats.string(), WireFormats.integer());
// We can evalute this, and write it to a text file
- Scoobi.persist(TextOutput.toTextFile(reduced, outputPath,
+ Scoobi.persist(TextOutput.toTextFile(reduced, outputPath, false,
WireFormats.string(), WireFormats.integer()));
}
@@ -1,7 +0,0 @@
-name := "Join Examples"
-
-version := "0.1"
-
-scalaVersion := "2.9.2"
-
-libraryDependencies += "com.nicta" %% "scoobi" % "0.4.0-SNAPSHOT" % "provided"
@@ -1,7 +0,0 @@
-import sbt._
-
-object Plugins extends Build {
- lazy val root = Project("root", file(".")) dependsOn(
- uri("git://github.com/NICTA/sbt-scoobi.git#master")
- )
-}
@@ -1,105 +0,0 @@
-/**
- * Copyright 2011 National ICT Australia Limited
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.nicta.scoobi.examples
-
-import com.nicta.scoobi.Scoobi._
-import java.io._
-
-/*
- * This example will show you how to apply different types of joins using the sample
- * dataset from the WikiPedia page on SQL Joins (http://en.wikipedia.org/wiki/Join_(SQL)),
- * slighly adjusted to be more OO/Scala.
- */
-
-object JoinExamples {
- def main(args: Array[String]) = withHadoopArgs(args) { _ =>
-
- if (!new File("output-dir").mkdir) {
- sys.error("Could not make output-dir for results. Perhaps it already exists (and you should delete/rename the old one)")
- }
-
- val employeesFile = "output-dir/employees.txt"
- val departmentsFile = "output-dir/departments.txt"
-
- // write some names to a file (so this example has no external requirements)
- generateDataSet(employeesFile, departmentsFile)
-
- case class Employee(val name: String, val departmentId: Long)
- case class Department(val id: Long, val name: String)
-
- // With this implicit conversion, we let Scoobi know the apply and unapply function, which it uses
- // to construct and deconstruct Employee and Department objects.
- // Now it can very efficiently serialize them (i.e. no overhead)
- implicit val EmployeeFmt = mkCaseWireFormat(Employee, Employee.unapply _)
- implicit val DepartmentFmt = mkCaseWireFormat(Department, Department.unapply _)
-
- // Read in lines of the form: Bob Smith, 31
- val employees : DList[Employee] = fromDelimitedTextFile(employeesFile, ",") {
- case name :: ALong(departmentId) :: _ => Employee(name, departmentId)
- }
-
- // Read in lines of the form: 31, Finance
- val departments : DList[Department] = fromDelimitedTextFile(departmentsFile, ",") {
- case ALong(id) :: name :: _ => Department(id, name)
- }
-
- val employeesByDepartmentId: DList[(Long, Employee)] = employees.by(_.departmentId)
- val departmentsById: DList[(Long, Department)] = departments.by(_.id)
-
- // Perform an inner (equi)join
- val inner: DList[(Long, (Employee, Department))] = join(employeesByDepartmentId, departmentsById)
-
- // Perform a left outer join and specify what to do when the left has an
- // entry without a matching entry on the right
- val left: DList[(Long, (Employee, Department))] =
- joinLeft(employeesByDepartmentId,
- departmentsById,
- (departmentId, employee) => Department(departmentId, "Unknown"))
-
- // Perform a right outer join and specify what to do when the right has an
- // entry without a matching entry on the left
- val right: DList[(Long, (Employee, Department))] =
- joinRight(employeesByDepartmentId, departmentsById, (id, department) => Employee("Unknown", id))
-
- // Execute everything, and throw it into a directory
- DList.persist(
- toTextFile(inner, "output-dir/inner"),
- toTextFile(left, "output-dir/left"),
- toTextFile(right, "output-dir/right")
- )
- }
-
- private def generateDataSet(employeesFile: String, departmentsFile: String) {
- val e = new FileWriter(employeesFile)
- val d = new FileWriter(departmentsFile)
-
- e.write("""Rafferty,31
-Jones,33
-Steinberg,33
-Robinson,34
-Smith,34
-John,-1""")
-
- e.close()
-
- d.write ("""31,Sales
-33,Engineering
-34,Clerical
-35,Marketing""")
-
- d.close()
- }
-}
@@ -1,7 +0,0 @@
-name := "Scoobi Number Filter"
-
-version := "0.1"
-
-scalaVersion := "2.9.2"
-
-libraryDependencies += "com.nicta" %% "scoobi" % "0.4.0-SNAPSHOT" % "provided"
@@ -1,8 +0,0 @@
-import sbt._
-
-object Plugins extends Build {
- lazy val root = Project("root", file(".")) dependsOn(
- uri("git://github.com/NICTA/sbt-scoobi.git#master")
- )
-}
-
@@ -1,48 +0,0 @@
-/**
- * Copyright 2011 National ICT Australia Limited
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.nicta.scoobi.examples
-
-import com.nicta.scoobi.Scoobi._
-import java.io._
-
-
-object NumberPartitioner extends ScoobiApp {
- val fileName = "output-dir/all-ints.txt"
-
- // Write 50 (new line seperated) ints to a file. We do this to make the example self contained
- generateInts(fileName, 50)
-
- // fromTextFile creates a list of Strings, where each String is a line
- val data : DList[String] = fromTextFile(fileName);
-
- // since they're numbers, we can easily parse them
- val intData : DList[Int] = data.map(_.toInt)
-
- // Now we can parition this data into two lists, one where they're even one where they're odd
- val (evens, odds) = intData.partition(_ % 2 == 0)
-
- DList.persist (
- TextOutput.toTextFile(evens, "output-dir/evens"),
- TextOutput.toTextFile(odds, "output-dir/odds")
- )
-
- private def generateInts(filename: String, count: Int) {
- val fstream = new FileWriter(filename)
- val r = new scala.util.Random()
- (1 to count) foreach { _ => fstream write ( r.nextInt(count * 2).toString ++ "\n" ) }
- fstream.close()
- }
-}
@@ -1,7 +1,11 @@
name := "PageRank"
-version := "0.1"
+version := "1.0"
scalaVersion := "2.9.2"
+scalacOptions ++= Seq("-Ydependent-method-types", "-deprecation")
+
libraryDependencies += "com.nicta" %% "scoobi" % "0.4.0-SNAPSHOT" % "provided"
+
+resolvers += "snapshots" at "http://oss.sonatype.org/content/repositories/snapshots"
Oops, something went wrong.

0 comments on commit c1fb514

Please sign in to comment.