apache · longliveenduro · Oct 15, 2018 · Oct 15, 2018 · Oct 17, 2018 · Oct 23, 2018
diff --git a/core/src/main/scala/org/apache/predictionio/controller/LAlgorithm.scala b/core/src/main/scala/org/apache/predictionio/controller/LAlgorithm.scala
@@ -24,6 +24,9 @@ import org.apache.predictionio.workflow.PersistentModelManifest
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 
+import scala.concurrent.duration._
+import scala.concurrent.{Await, ExecutionContext, Future, blocking}
+import scala.language.postfixOps
 import scala.reflect._
 
 /** Base class of a local algorithm.
@@ -72,13 +75,40 @@ abstract class LAlgorithm[PD, M : ClassTag, Q, P]
     val glomQs: RDD[Array[(Long, Q)]] = qs.glom()
     val cartesian: RDD[(M, Array[(Long, Q)])] = mRDD.cartesian(glomQs)
     cartesian.flatMap { case (m, qArray) =>
-      qArray.map { case (qx, q) => (qx, predict(m, q)) }
+      qArray.map {
+        case (qx, q) =>
+          (qx,
+            Await.result(predictAsync(m, q)(scala.concurrent.ExecutionContext.global), 60 minutes) )
+      }
     }
   }
 
-  def predictBase(localBaseModel: Any, q: Q): P = {
+  override def predictBaseAsync(localBaseModel: Any, q: Q)(implicit ec: ExecutionContext)
+      : Future[P] =
+    predictAsync(localBaseModel.asInstanceOf[M], q)(ec)
+
+  @deprecated(message =
+    "this method is just here for backward compatibility, predictBaseAsync() is called now",
+    since = "0.14.0")
+  override def predictBase(localBaseModel: Any, q: Q): P =
     predict(localBaseModel.asInstanceOf[M], q)
-  }
+
+  /** Implement this method to produce a Future of a prediction in a non blocking way
+    * from a query and trained model.
+    *
+    * This method is implemented to just delegate to blocking predict() for
+    * backward compatibility reasons.
+    * Definitely overwrite it to implement your blocking prediction method, and leave
+    * the old blocking predict() as it is (throwing an exception), it won't be called from
+    * now on.
+    *
+    * @param model Trained model produced by [[train]].
+    * @param query An input query.
+    * @param ec ExecutionContext to use for async operations
+    * @return A Future of a prediction.
+    */
+  def predictAsync(model: M, query: Q)(implicit ec: ExecutionContext): Future[P] =
+    Future.successful(blocking(predict(model, query)))
 
   /** Implement this method to produce a prediction from a query and trained
     * model.
@@ -87,7 +117,9 @@ abstract class LAlgorithm[PD, M : ClassTag, Q, P]
     * @param q An input query.
     * @return A prediction.
     */
-  def predict(m: M, q: Q): P
+  @deprecated(message = "override non blocking predictAsync() instead", since = "0.14.0")
+  def predict(m: M, q: Q): P =
+    throw new NotImplementedError("predict() is deprecated, override predictAsync() instead")
 
   /** :: DeveloperApi ::
     * Engine developers should not use this directly (read on to see how local

diff --git a/core/src/main/scala/org/apache/predictionio/controller/P2LAlgorithm.scala b/core/src/main/scala/org/apache/predictionio/controller/P2LAlgorithm.scala
@@ -25,6 +25,9 @@ import org.apache.spark.SparkContext
 import org.apache.spark.SparkContext._
 import org.apache.spark.rdd.RDD
 
+import scala.concurrent.duration._
+import scala.concurrent.{Await, ExecutionContext, Future, blocking}
+import scala.language.postfixOps
 import scala.reflect._
 
 /** Base class of a parallel-to-local algorithm.
@@ -67,10 +70,35 @@ abstract class P2LAlgorithm[PD, M: ClassTag, Q: ClassTag, P]
     * @return Batch of predicted results
     */
   def batchPredict(m: M, qs: RDD[(Long, Q)]): RDD[(Long, P)] = {
-    qs.mapValues { q => predict(m, q) }
+    qs.mapValues { q =>
+      Await.result(predictAsync(m, q)(scala.concurrent.ExecutionContext.global), 60 minutes)
+    }
   }
 
-  def predictBase(bm: Any, q: Q): P = predict(bm.asInstanceOf[M], q)
+  override def predictBaseAsync(bm: Any, q: Q)(implicit ec: ExecutionContext): Future[P] =
+    predictAsync(bm.asInstanceOf[M], q)(ec)
+
+  @deprecated(message =
+    "this method is just here for backward compatibility, predictBaseAsync() is called now",
+     since = "0.14.0")
+  override def predictBase(bm: Any, q: Q): P = predict(bm.asInstanceOf[M], q)
+
+  /** Implement this method to produce a Future of a prediction in a non blocking way
+    * from a query and trained model.
+    *
+    * This method is implemented to just delegate to blocking predict() for
+    * backward compatibility reasons.
+    * Definitely overwrite it to implement your blocking prediction method, and leave
+    * the old blocking predict() as it is (throwing an exception), it won't be called from
+    * now on.
+    *
+    * @param model Trained model produced by [[train]].
+    * @param query An input query.
+    * @param ec ExecutionContext to use for async operations
+    * @return A Future of a prediction.
+    */
+  def predictAsync(model: M, query: Q)(implicit ec: ExecutionContext): Future[P] =
+    Future.successful(blocking(predict(model, query)))
 
   /** Implement this method to produce a prediction from a query and trained
     * model.
@@ -79,7 +107,9 @@ abstract class P2LAlgorithm[PD, M: ClassTag, Q: ClassTag, P]
     * @param query An input query.
     * @return A prediction.
     */
-  def predict(model: M, query: Q): P
+  @deprecated(message = "override non blocking predictAsync() instead", since = "0.14.0")
+  def predict(model: M, query: Q): P =
+    throw new NotImplementedError("predict() is deprecated, override predictAsync() instead")
 
   /** :: DeveloperApi ::
     * Engine developers should not use this directly (read on to see how

diff --git a/core/src/main/scala/org/apache/predictionio/controller/PAlgorithm.scala b/core/src/main/scala/org/apache/predictionio/controller/PAlgorithm.scala
@@ -24,6 +24,8 @@ import org.apache.predictionio.workflow.PersistentModelManifest
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 
+import scala.concurrent.{ExecutionContext, Future, blocking}
+
 /** Base class of a parallel algorithm.
   *
   * A parallel algorithm can be run in parallel on a cluster and produces a
@@ -72,9 +74,31 @@ abstract class PAlgorithm[PD, M, Q, P]
   def batchPredict(m: M, qs: RDD[(Long, Q)]): RDD[(Long, P)] =
     throw new NotImplementedError("batchPredict not implemented")
 
-  def predictBase(baseModel: Any, query: Q): P = {
+  override def predictBaseAsync(bm: Any, q: Q)(implicit ec: ExecutionContext): Future[P] =
+    predictAsync(bm.asInstanceOf[M], q)(ec)
+
+  @deprecated(message =
+    "this method is just here for backward compatibility, predictBaseAsync() is called now",
+    since = "0.14.0")
+  override def predictBase(baseModel: Any, query: Q): P =
     predict(baseModel.asInstanceOf[M], query)
-  }
+
+  /** Implement this method to produce a Future of a prediction in a non blocking way
+    * from a query and trained model.
+    *
+    * This method is implemented to just delegate to blocking predict() for
+    * backward compatibility reasons.
+    * Definitely overwrite it to implement your blocking prediction method, and leave
+    * the old blocking predict() as it is (throwing an exception), it won't be called from
+    * now on.
+    *
+    * @param model Trained model produced by [[train]].
+    * @param query An input query.
+    * @param ec ExecutionContext to use for async operations
+    * @return A Future of a prediction.
+    */
+  def predictAsync(model: M, query: Q)(implicit ec: ExecutionContext): Future[P] =
+    Future.successful(blocking(predict(model, query)))
 
   /** Implement this method to produce a prediction from a query and trained
     * model.
@@ -83,7 +107,9 @@ abstract class PAlgorithm[PD, M, Q, P]
     * @param query An input query.
     * @return A prediction.
     */
-  def predict(model: M, query: Q): P
+  @deprecated(message = "override non blocking predictAsync() instead", since = "0.14.0")
+  def predict(model: M, query: Q): P =
+    throw new NotImplementedError("predict() is deprecated, override predictAsync() instead")
 
   /** :: DeveloperApi ::
     * Engine developers should not use this directly (read on to see how parallel

diff --git a/core/src/main/scala/org/apache/predictionio/core/BaseAlgorithm.scala b/core/src/main/scala/org/apache/predictionio/core/BaseAlgorithm.scala
@@ -26,6 +26,8 @@ import net.jodah.typetools.TypeResolver
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 
+import scala.concurrent.{ExecutionContext, Future, blocking}
+
 /** :: DeveloperApi ::
   * Base trait with default custom query serializer, exposed to engine developer
   * via [[org.apache.predictionio.controller.CustomQuerySerializer]]
@@ -81,6 +83,19 @@ abstract class BaseAlgorithm[PD, M, Q, P]
   def batchPredictBase(sc: SparkContext, bm: Any, qs: RDD[(Long, Q)])
   : RDD[(Long, P)]
 
+  /** :: DeveloperApi ::
+    * Engine developers should not use this directly. Called by serving to
+    * perform a single prediction.
+    *
+    * @param bm Model
+    * @param q Query
+    * @param ec ExecutionContext to use for async operations
+    * @return Future of a Predicted result
+    */
+  @DeveloperApi
+  def predictBaseAsync(bm: Any, q: Q)(implicit ec: ExecutionContext): Future[P] =
+    Future.successful(blocking {predictBase(bm, q)})
+
   /** :: DeveloperApi ::
     * Engine developers should not use this directly. Called by serving to
     * perform a single prediction.
@@ -90,7 +105,11 @@ abstract class BaseAlgorithm[PD, M, Q, P]
     * @return Predicted result
     */
   @DeveloperApi
-  def predictBase(bm: Any, q: Q): P
+  @deprecated(message = "override non blocking predictBaseAsync() instead", since = "0.14.0")
+  def predictBase(bm: Any, q: Q): P =
+    throw new NotImplementedError(
+      "predictBase() is deprecated, override predictBaseAsync() instead"
+    )
 
   /** :: DeveloperApi ::
     * Engine developers should not use this directly. Prepare a model for

diff --git a/core/src/main/scala/org/apache/predictionio/workflow/BatchPredict.scala b/core/src/main/scala/org/apache/predictionio/workflow/BatchPredict.scala
@@ -32,7 +32,12 @@ import org.apache.predictionio.workflow.CleanupFunctions
 import org.apache.spark.rdd.RDD
 import org.json4s._
 import org.json4s.native.JsonMethods._
+import scala.concurrent.duration._
+import scala.language.postfixOps
+import scala.concurrent.blocking
+import scala.concurrent.{Await, Future}
 import scala.language.existentials
+import scala.concurrent.ExecutionContext.Implicits.global
 
 case class BatchPredictConfig(
   inputFilePath: String = "batchpredict-input.json",
@@ -207,23 +212,26 @@ object BatchPredict extends Logging {
         // Deploy logic. First call Serving.supplement, then Algo.predict,
         // finally Serving.serve.
         val supplementedQuery = serving.supplementBase(query)
-        // TODO: Parallelize the following.
-        val predictions = algorithms.zip(models).map { case (a, m) =>
-          a.predictBase(m, supplementedQuery)
-        }
+        val predictionsFuture = Future.sequence(algorithms.zip(models).map { case (a, m) =>
+          a.predictBaseAsync(m, supplementedQuery)
+        })
         // Notice that it is by design to call Serving.serve with the
         // *original* query.
-        val prediction = serving.serveBase(query, predictions)
-        // Combine query with prediction, so the batch results are
-        // self-descriptive.
-        val predictionJValue = JsonExtractor.toJValue(
-          jsonExtractorOption,
-          Map("query" -> query,
-              "prediction" -> prediction),
-          algorithms.head.querySerializer,
-          algorithms.head.gsonTypeAdapterFactories)
-        // Return JSON string
-        compact(render(predictionJValue))
+        val predFutureRdds = predictionsFuture.map {
+          predictions =>
+            val prediction = serving.serveBase(query, predictions)
+            // Combine query with prediction, so the batch results are
+            // self-descriptive.
+            val predictionJValue = JsonExtractor.toJValue(
+              jsonExtractorOption,
+              Map("query" -> query,
+                  "prediction" -> prediction),
+              algorithms.head.querySerializer,
+              algorithms.head.gsonTypeAdapterFactories)
+            // Return JSON string
+            compact(render(predictionJValue))
+        }
+        Await.result(predFutureRdds, 60 minutes)
       }
 
       predictionsRDD.saveAsTextFile(config.outputFilePath)