Skip to content

Commit

Permalink
Merge branch 'mesos'
Browse files Browse the repository at this point in the history
  • Loading branch information
haitaoyao committed Jan 24, 2013
2 parents df9ae8a + 548856a commit 97e2420
Show file tree
Hide file tree
Showing 246 changed files with 17,206 additions and 2,297 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Expand Up @@ -12,6 +12,7 @@ third_party/libmesos.so
third_party/libmesos.dylib
conf/java-opts
conf/spark-env.sh
conf/streaming-env.sh
conf/log4j.properties
docs/_site
docs/api
Expand All @@ -31,6 +32,7 @@ project/plugins/src_managed/
logs/
log/
spark-tests.log
streaming-tests.log
dependency-reduced-pom.xml
.ensime
.ensime_lucene
11 changes: 11 additions & 0 deletions bagel/pom.xml
Expand Up @@ -45,6 +45,11 @@
<profiles>
<profile>
<id>hadoop1</id>
<activation>
<property>
<name>!hadoopVersion</name>
</property>
</activation>
<dependencies>
<dependency>
<groupId>org.spark-project</groupId>
Expand Down Expand Up @@ -72,6 +77,12 @@
</profile>
<profile>
<id>hadoop2</id>
<activation>
<property>
<name>hadoopVersion</name>
<value>2</value>
</property>
</activation>
<dependencies>
<dependency>
<groupId>org.spark-project</groupId>
Expand Down
4 changes: 2 additions & 2 deletions bagel/src/test/resources/log4j.properties
@@ -1,8 +1,8 @@
# Set everything to be logged to the console
# Set everything to be logged to the file bagel/target/unit-tests.log
log4j.rootCategory=INFO, file
log4j.appender.file=org.apache.log4j.FileAppender
log4j.appender.file.append=false
log4j.appender.file.file=spark-tests.log
log4j.appender.file.file=bagel/target/unit-tests.log
log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n

Expand Down
17 changes: 16 additions & 1 deletion core/pom.xml
Expand Up @@ -71,6 +71,10 @@
<groupId>cc.spray</groupId>
<artifactId>spray-server</artifactId>
</dependency>
<dependency>
<groupId>cc.spray</groupId>
<artifactId>spray-json_${scala.version}</artifactId>
</dependency>
<dependency>
<groupId>org.tomdz.twirl</groupId>
<artifactId>twirl-api</artifactId>
Expand Down Expand Up @@ -159,6 +163,11 @@
<profiles>
<profile>
<id>hadoop1</id>
<activation>
<property>
<name>!hadoopVersion</name>
</property>
</activation>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
Expand Down Expand Up @@ -211,6 +220,12 @@
</profile>
<profile>
<id>hadoop2</id>
<activation>
<property>
<name>hadoopVersion</name>
<value>2</value>
</property>
</activation>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
Expand Down Expand Up @@ -267,4 +282,4 @@
</build>
</profile>
</profiles>
</project>
</project>
41 changes: 32 additions & 9 deletions core/src/main/scala/spark/Accumulators.scala
Expand Up @@ -25,8 +25,7 @@ class Accumulable[R, T] (
extends Serializable {

val id = Accumulators.newId
@transient
private var value_ = initialValue // Current value on master
@transient private var value_ = initialValue // Current value on master
val zero = param.zero(initialValue) // Zero value to be passed to workers
var deserialized = false

Expand All @@ -38,20 +37,37 @@ class Accumulable[R, T] (
*/
def += (term: T) { value_ = param.addAccumulator(value_, term) }

/**
* Add more data to this accumulator / accumulable
* @param term the data to add
*/
def add(term: T) { value_ = param.addAccumulator(value_, term) }

/**
* Merge two accumulable objects together
*
*
* Normally, a user will not want to use this version, but will instead call `+=`.
* @param term the other Accumulable that will get merged with this
* @param term the other `R` that will get merged with this
*/
def ++= (term: R) { value_ = param.addInPlace(value_, term)}

/**
* Merge two accumulable objects together
*
* Normally, a user will not want to use this version, but will instead call `add`.
* @param term the other `R` that will get merged with this
*/
def merge(term: R) { value_ = param.addInPlace(value_, term)}

/**
* Access the accumulator's current value; only allowed on master.
*/
def value = {
if (!deserialized) value_
else throw new UnsupportedOperationException("Can't read accumulator value in task")
def value: R = {
if (!deserialized) {
value_
} else {
throw new UnsupportedOperationException("Can't read accumulator value in task")
}
}

/**
Expand All @@ -68,10 +84,17 @@ class Accumulable[R, T] (
/**
* Set the accumulator's value; only allowed on master.
*/
def value_= (r: R) {
if (!deserialized) value_ = r
def value_= (newValue: R) {
if (!deserialized) value_ = newValue
else throw new UnsupportedOperationException("Can't assign accumulator value in task")
}

/**
* Set the accumulator's value; only allowed on master
*/
def setValue(newValue: R) {
this.value = newValue
}

// Called by Java when deserializing an object
private def readObject(in: ObjectInputStream) {
Expand Down
118 changes: 0 additions & 118 deletions core/src/main/scala/spark/BoundedMemoryCache.scala

This file was deleted.

65 changes: 65 additions & 0 deletions core/src/main/scala/spark/CacheManager.scala
@@ -0,0 +1,65 @@
package spark

import scala.collection.mutable.{ArrayBuffer, HashSet}
import spark.storage.{BlockManager, StorageLevel}


/** Spark class responsible for passing RDDs split contents to the BlockManager and making
sure a node doesn't load two copies of an RDD at once.
*/
private[spark] class CacheManager(blockManager: BlockManager) extends Logging {
private val loading = new HashSet[String]

/** Gets or computes an RDD split. Used by RDD.iterator() when a RDD is cached. */
def getOrCompute[T](rdd: RDD[T], split: Split, context: TaskContext, storageLevel: StorageLevel)
: Iterator[T] = {
val key = "rdd_%d_%d".format(rdd.id, split.index)
logInfo("Cache key is " + key)
blockManager.get(key) match {
case Some(cachedValues) =>
// Split is in cache, so just return its values
logInfo("Found partition in cache!")
return cachedValues.asInstanceOf[Iterator[T]]

case None =>
// Mark the split as loading (unless someone else marks it first)
loading.synchronized {
if (loading.contains(key)) {
logInfo("Loading contains " + key + ", waiting...")
while (loading.contains(key)) {
try {loading.wait()} catch {case _ =>}
}
logInfo("Loading no longer contains " + key + ", so returning cached result")
// See whether someone else has successfully loaded it. The main way this would fail
// is for the RDD-level cache eviction policy if someone else has loaded the same RDD
// partition but we didn't want to make space for it. However, that case is unlikely
// because it's unlikely that two threads would work on the same RDD partition. One
// downside of the current code is that threads wait serially if this does happen.
blockManager.get(key) match {
case Some(values) =>
return values.asInstanceOf[Iterator[T]]
case None =>
logInfo("Whoever was loading " + key + " failed; we'll try it ourselves")
loading.add(key)
}
} else {
loading.add(key)
}
}
try {
// If we got here, we have to load the split
val elements = new ArrayBuffer[Any]
logInfo("Computing partition " + split)
elements ++= rdd.compute(split, context)
// Try to put this block in the blockManager
blockManager.put(key, elements, storageLevel, true)
return elements.iterator.asInstanceOf[Iterator[T]]
} finally {
loading.synchronized {
loading.remove(key)
loading.notifyAll()
}
}
}
}
}

0 comments on commit 97e2420

Please sign in to comment.