From 36aae3b569af58cd94a49f8b574abd6a9cc9c205 Mon Sep 17 00:00:00 2001 From: jtengyp Date: Tue, 17 Oct 2017 13:33:05 +0800 Subject: [PATCH 1/2] Keep only ml.linear --- travis/benchmarks.lst | 8 -------- 1 file changed, 8 deletions(-) diff --git a/travis/benchmarks.lst b/travis/benchmarks.lst index 678db67d8..aaaf32b34 100644 --- a/travis/benchmarks.lst +++ b/travis/benchmarks.lst @@ -1,9 +1 @@ -micro.sort -ml.bayes -ml.gbt -ml.rf -ml.svd ml.linear -websearch.nutchindexing -sql.scan -graph.nweight From b661a1ca30f091bb9c218a065cf602b8c9573b80 Mon Sep 17 00:00:00 2001 From: jtengyp Date: Wed, 18 Oct 2017 14:53:32 +0800 Subject: [PATCH 2/2] Change the Optimizer of LDA to Online method --- .../src/main/scala/com/intel/sparkbench/ml/LDAExample.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sparkbench/ml/src/main/scala/com/intel/sparkbench/ml/LDAExample.scala b/sparkbench/ml/src/main/scala/com/intel/sparkbench/ml/LDAExample.scala index db0e8ff70..d568bc92a 100644 --- a/sparkbench/ml/src/main/scala/com/intel/sparkbench/ml/LDAExample.scala +++ b/sparkbench/ml/src/main/scala/com/intel/sparkbench/ml/LDAExample.scala @@ -19,7 +19,7 @@ package com.intel.hibench.sparkbench.ml import org.apache.spark.{SparkConf, SparkContext} -import org.apache.spark.mllib.clustering.{DistributedLDAModel, LDA} +import org.apache.spark.mllib.clustering.{LDA, DistributedLDAModel, LocalLDAModel} import org.apache.spark.mllib.linalg.{Vector, Vectors} import org.apache.spark.rdd.RDD @@ -51,11 +51,11 @@ object LDAExample { val corpus: RDD[(Long, Vector)] = sc.objectFile(inputPath) // Cluster the documents into numTopics topics using LDA - val ldaModel = new LDA().setK(numTopics).run(corpus) + val ldaModel = new LDA().setK(numTopics).setOptimizer("online").run(corpus) // Save and load model. ldaModel.save(sc, outputPath) - val sameModel = DistributedLDAModel.load(sc, outputPath) + val savedModel = LocalLDAModel.load(sc, outputPath) sc.stop() }