Intel-bigdata · carsonwang · Oct 12, 2016 · Oct 11, 2016 · Oct 12, 2016
diff --git a/bin/workloads/graph/nweight/prepare/prepare.sh b/bin/workloads/graph/nweight/prepare/prepare.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+current_dir=`dirname "$0"`
+root_dir=${current_dir}/../../../../../
+workload_config=${root_dir}/conf/workloads/graph/nweight.conf
+. "${root_dir}/bin/functions/load-bench-config.sh"
+
+enter_bench NWeightPrepare ${workload_config}
+show_bannar start
+
+rmr-hdfs $INPUT_HDFS || true
+START_TIME=`timestamp`
+
+run-spark-job com.intel.hibench.sparkbench.graph.nweight.NWeightDataGenerator $MODEL_INPUT $INPUT_HDFS $EDGES
+
+END_TIME=`timestamp`
+
+show_bannar finish
+leave_bench
+
diff --git a/bin/workloads/graph/nweight/spark/run.sh b/bin/workloads/graph/nweight/spark/run.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+current_dir=`dirname "$0"`
+root_dir=${current_dir}/../../../../../
+workload_config=${root_dir}/conf/workloads/graph/nweight.conf
+. "${root_dir}/bin/functions/load-bench-config.sh"
+
+enter_bench ScalaSparkNWeight ${workload_config}
+show_bannar start
+
+rmr-hdfs $OUTPUT_HDFS || true
+
+SIZE=`dir_size $INPUT_HDFS`
+START_TIME=`timestamp`
+run-spark-job com.intel.hibench.sparkbench.graph.nweight.NWeight $INPUT_HDFS $OUTPUT_HDFS $DEGREE $MAX_OUT_EDGES $NUM_PARTITION $STORAGE_LEVEL $DISABLE_KRYO $MODEL
+END_TIME=`timestamp`
+
+gen_report ${START_TIME} ${END_TIME} ${SIZE}
+show_bannar finish
+leave_bench
diff --git a/conf/hibench.conf b/conf/hibench.conf
@@ -43,13 +43,13 @@ hibench.hive.release		hive-0.12.0-bin
 hibench.hivebench.template.dir	${hibench.dependency.dir}/hivebench/hive_template
 hibench.hive.dir.name.input	${hibench.workload.dir.name.input}
 hibench.hive.dir.name.ouput	${hibench.workload.dir.name.output}
+
 hibench.bayes.dir.name.input	${hibench.workload.dir.name.input}
 hibench.bayes.dir.name.output	${hibench.workload.dir.name.output}
 hibench.pagerank.dir.name.input		${hibench.workload.dir.name.input}
 hibench.pagerank.dir.name.output	${hibench.workload.dir.name.output}
 hibench.pagerank.pegasus.dir		${hibench.dependency.dir}/pegasus/target/pegasus-2.0-SNAPSHOT.jar
 
-
 hibench.mahout.release.apache   mahout-distribution-0.9
 hibench.mahout.release.hdp      mahout-distribution-0.9
 hibench.mahout.release.cdh5	    mahout-0.9-cdh5.1.0
@@ -66,4 +66,3 @@ hibench.nutch.home		${hibench.dependency.dir}/nutchindexing/target/${hibench.nut
 hibench.dfsioe.dir.name.input	${hibench.workload.dir.name.input}
 hibench.dfsioe.dir.name.output	${hibench.workload.dir.name.output}
 
-hibench.nweight.model_path	${hibench.dependency.dir}/sparkbench/src/main/scala/com/intel/sparkbench/nweight/model/user-features
diff --git a/conf/workloads/graph/nweight.conf b/conf/workloads/graph/nweight.conf
@@ -0,0 +1,38 @@
+hibench.nweight.tiny.edges			                100000
+hibench.nweight.tiny.degree                     3
+hibench.nweight.tiny.max_out_edges              30
+hibench.nweight.small.edges                     1000000
+hibench.nweight.small.degree                    3
+hibench.nweight.small.max_out_edges             30
+hibench.nweight.large.edges                     10000000
+hibench.nweight.large.degree                    3
+hibench.nweight.large.max_out_edges             30
+hibench.nweight.huge.edges                      100000000
+hibench.nweight.huge.degree                     3
+hibench.nweight.huge.max_out_edges              30
+hibench.nweight.gigantic.edges                  425000000
+hibench.nweight.gigantic.degree                 3
+hibench.nweight.gigantic.max_out_edges          30
+hibench.nweight.bigdata.edges                   4250000000
+hibench.nweight.bigdata.degree                  3
+hibench.nweight.bigdata.max_out_edges           30
+
+hibench.nweight.edges			${hibench.nweight.${hibench.scale.profile}.edges}
+hibench.nweight.degree			${hibench.nweight.${hibench.scale.profile}.degree}
+hibench.nweight.max_out_edges		${hibench.nweight.${hibench.scale.profile}.max_out_edges}
+
+hibench.nweight.partitions		${hibench.default.map.parallelism}
+# storageLevel for the RDDs 0: off-heap 1: disk_only 3: memory_only  5: memory_ser 7: memory_and_disk 9: memory_and_disk_ser
+hibench.nweight.storage_level           7
+hibench.nweight.disable_kryo            false
+# run nweight in graphx or pregel
+hibench.nweight.model                   graphx
+
+hibench.nweight.model_path	${hibench.home}/sparkbench/graph/src/main/resources/nweight-user-features
+
+hibench.sparkbench.jar      ${hibench.home}/sparkbench/graph/target/sparkbench-graph-6.0-SNAPSHOT-${hibench.spark.version}-jar-with-dependencies.jar
+
+# export for shell script
+hibench.workload.input			${hibench.hdfs.data.dir}/NWeight/Input
+hibench.workload.output			${hibench.hdfs.data.dir}/NWeight/Output
+hibench.workload.edges			${hibench.nweight.edges}
diff --git a/sparkbench/graph/pom.xml b/sparkbench/graph/pom.xml
@@ -13,4 +13,233 @@
   <packaging>jar</packaging>
   <version>6.0-SNAPSHOT</version>
   <name>sparkbench-graph</name>
+
+
+    <dependencies>
+      <dependency>
+            <groupId>com.intel.hibench.sparkbench</groupId>
+            <artifactId>sparkbench-common</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.scala-lang</groupId>
+            <artifactId>scala-library</artifactId>
+            <version>${scala.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>${junit.version}</version>
+            <scope>test</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-core_${scala.binary.version}</artifactId>
+            <version>${spark.version}</version>
+            <scope>provided</scope>
+        </dependency>
+	<dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-yarn_${scala.binary.version}</artifactId>
+            <version>${spark.version}</version>
+            <scope>provided</scope>
+	</dependency>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+            <version>${spark.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-mllib_${scala.binary.version}</artifactId>
+            <version>${spark.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-hive_${scala.binary.version}</artifactId>
+            <version>${spark.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-graphx_${scala.binary.version}</artifactId>
+            <version>${spark.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>com.github.scopt</groupId>
+            <artifactId>scopt_2.10</artifactId>
+            <version>${scopt.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>log4j</groupId>
+            <artifactId>log4j</artifactId>
+            <version>${log4j.version}</version>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.mahout</groupId>
+            <artifactId>mahout-core</artifactId>
+            <version>${mahout.version}</version>
+            <exclusions>
+              <exclusion>
+                <groupId>org.apache.hadoop</groupId>
+                <artifactId>hadoop-core</artifactId>
+              </exclusion>
+            </exclusions>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.mahout</groupId>
+            <artifactId>mahout-math</artifactId>
+            <version>${mahout.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>it.unimi.dsi</groupId>
+            <artifactId>fastutil</artifactId>
+            <version>${fastutil.version}</version>
+        </dependency>
+    </dependencies>
+    <build>
+        <plugins>
+            <plugin>
+	      <groupId>org.apache.maven.plugins</groupId>
+	      <artifactId>maven-compiler-plugin</artifactId>
+	      <version>${maven-compiler-plugin.version}</version>
+            </plugin>
+            <plugin>
+                <groupId>net.alchim31.maven</groupId>
+                <artifactId>scala-maven-plugin</artifactId>
+                <configuration>
+                    <scalaCompatVersion>${scala.binary.version}</scalaCompatVersion>
+                    <scalaVersion>${scala.version}</scalaVersion>
+                </configuration>
+                <executions>
+                    <execution>
+                        <id>scala-compile-first</id>
+                        <phase>process-resources</phase>
+                        <goals>
+                            <goal>add-source</goal>
+                            <goal>compile</goal>
+                        </goals>
+                    </execution>
+                    <execution>
+                        <id>scala-test-compile</id>
+                        <phase>process-test-resources</phase>
+                        <goals>
+                            <goal>testCompile</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+	    <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-jar-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>default-jar</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>jar</goal>
+                        </goals>
+                        <configuration>
+                            <finalName>${project.artifactId}-${project.version}-spark${spark.bin.version}</finalName>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+
+            <plugin>
+                <artifactId>maven-assembly-plugin</artifactId>
+                <version>${maven-assembly-plugin.version}</version>
+                <configuration>
+                    <finalName>${project.build.finalName}-spark${spark.bin.version}</finalName>
+                    <descriptorRefs>
+                        <descriptorRef>jar-with-dependencies</descriptorRef>
+                    </descriptorRefs>
+                </configuration>
+                <executions>
+                    <execution>
+                        <id>make-assembly</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>single</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+
+        </plugins>
+    </build>
+<profiles>
+      <profile>
+        <id>spark1.6</id>
+        <properties>
+          <spark.version>1.6.0</spark.version>
+          <spark.bin.version>1.6</spark.bin.version>
+        </properties>
+        <activation>
+        <property>
+                <name>spark</name>
+                <value>1.6</value>
+        </property>
+        </activation>
+      </profile>
+
+      <profile>
+        <id>spark2.0</id>
+        <properties>
+          <spark.version>2.0.0</spark.version>
+          <spark.bin.version>2.0</spark.bin.version>
+        </properties>
+        <activation>
+        <property>
+                <name>spark</name>
+                <value>2.0</value>
+        </property>
+        </activation>
+      </profile>
+
+      <profile>
+        <id>defaultspark</id>
+        <properties>
+          <spark.version>1.6.0</spark.version>
+          <spark.bin.version>1.6</spark.bin.version>
+        </properties>
+        <activation>
+        <property>
+                <name>!spark</name>
+        </property>
+        </activation>
+      </profile>
+
+      <profile>
+            <id>MR2</id>
+            <dependencies>
+                <dependency>
+                    <groupId>org.apache.hadoop</groupId>
+                    <artifactId>hadoop-mapreduce-examples</artifactId>
+                    <version>${hadoop.mr2.version}</version>
+                </dependency>
+                <dependency>
+                    <groupId>org.apache.hadoop</groupId>
+                    <artifactId>hadoop-client</artifactId>
+                    <version>${hadoop.mr2.version}</version>
+                    <exclusions>
+                        <exclusion>
+                            <groupId>org.apache.hadoop</groupId>
+                            <artifactId>hadoop-hdfs</artifactId>
+                        </exclusion>
+                    </exclusions>
+                </dependency>
+            </dependencies>
+            <activation>
+                <property><name>!MR1</name></property>
+            </activation>
+      </profile>
+
+</profiles>
 </project>
+
diff --git a/...el/sparkbench/nweight/model/user-features → .../src/main/resources/nweight-user-features b/...el/sparkbench/nweight/model/user-features → .../src/main/resources/nweight-user-features
diff --git a/...com/intel/sparkbench/nweight/Driver.scala → ...nch/sparkbench/graph/nweight/Driver.scala b/...com/intel/sparkbench/nweight/Driver.scala → ...nch/sparkbench/graph/nweight/Driver.scala
@@ -1,15 +1,26 @@
-package com.intel.sparkbench.nweight 
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.intel.hibench.sparkbench.graph.nweight
 
 import org.apache.spark.{SparkContext, SparkConf}
-import org.apache.spark.SparkContext._
-import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.scheduler.{JobLogger, StatsReportListener}
 
-import com.esotericsoftware.kryo.{Kryo, Serializer => KSerializer}
-import com.esotericsoftware.kryo.io.{Input => KryoInput, Output => KryoOutput}
-import com.esotericsoftware.kryo.serializers.{JavaSerializer => KryoJavaSerializer}
-
 /** 
  * Compute NWeight for Graph G(V, E) as defined below
  *     Weight(1)(u, v) = edge(u, v)