# Data scale profile. Available value is tiny, small, large, huge, gigantic and bigdata. # The definition of these profiles can be found in the workload's conf file i.e. conf/workloads/micro/wordcount.conf hibench.scale.profile tiny # Mapper number in hadoop, partition number in Spark hibench.default.map.parallelism 500 # Reducer nubmer in hadoop, shuffle partition number in Spark hibench.default.shuffle.parallelism 500 #====================================================== # Report files #====================================================== # default report formats hibench.report.formats "%-12s %-10s %-8s %-20s %-20s %-20s %-20s\n" # default report dir path hibench.report.dir ${hibench.home}/report # default report file name hibench.report.name hibench.report # input/output format settings. Available formats: Text, Sequence, Null. sparkbench.inputformat Sequence sparkbench.outputformat Sequence # hibench config folder hibench.configure.dir ${hibench.home}/conf # default hibench HDFS root hibench.hdfs.data.dir ${hibench.hdfs.master}/HiBench # path of hibench jars hibench.hibench.datatool.dir ${hibench.home}/autogen/target/autogen-8.0-SNAPSHOT-jar-with-dependencies.jar hibench.common.jar ${hibench.home}/common/target/hibench-common-8.0-SNAPSHOT-jar-with-dependencies.jar hibench.sparkbench.jar ${hibench.home}/sparkbench/assembly/target/sparkbench-assembly-8.0-SNAPSHOT-dist.jar hibench.streambench.stormbench.jar ${hibench.home}/stormbench/streaming/target/stormbench-streaming-8.0-SNAPSHOT.jar hibench.streambench.gearpump.jar ${hibench.home}/gearpumpbench/streaming/target/gearpumpbench-streaming-8.0-SNAPSHOT-jar-with-dependencies.jar hibench.streambench.flinkbench.jar ${hibench.home}/flinkbench/streaming/target/flinkbench-streaming-8.0-SNAPSHOT-jar-with-dependencies.jar hibench.hadoop.examples.test.jar /home/ruroy/hadoop/op/hadoop-3.2.2/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-3.2.2-tests.jar #====================================================== # workload home/input/ouput path #====================================================== hibench.hive.home ${hibench.home}/hadoopbench/sql/target/${hibench.hive.release} hibench.hive.release apache-hive-3.0.0-bin hibench.hivebench.template.dir ${hibench.home}/hadoopbench/sql/hive_template hibench.bayes.dir.name.input ${hibench.workload.dir.name.input} hibench.bayes.dir.name.output ${hibench.workload.dir.name.output} hibench.mahout.release.apache apache-mahout-distribution-0.11.0 hibench.mahout.release.hdp apache-mahout-distribution-0.11.0 hibench.mahout.release.cdh5 mahout-0.9-cdh5.1.0 hibench.mahout.release ${hibench.mahout.release.${hibench.hadoop.release}} hibench.masters.hostnames 10.10.15.78 hibench.slaves.hostnames 10.10.15.78 hibench.workload.input hibench.workload.output hibench.workload.dir.name.input Input hibench.workload.dir.name.output Output hibench.nutch.dir.name.input ${hibench.workload.dir.name.input} hibench.nutch.dir.name.output ${hibench.workload.dir.name.output} hibench.nutch.nutchindexing.dir ${hibench.home}/hadoopbench/nutchindexing/ hibench.nutch.release nutch-1.2 hibench.nutch.home ${hibench.home}/hadoopbench/nutchindexing/target/${hibench.nutch.release} hibench.dfsioe.dir.name.input ${hibench.workload.dir.name.input} hibench.dfsioe.dir.name.output ${hibench.workload.dir.name.output} #====================================================== # Streaming General #====================================================== # Indicate whether in debug mode for correctness verfication (default: false) hibench.streambench.debugMode false hibench.streambench.sampleProbability 0.1 hibench.streambench.fixWindowDuration 10000 hibench.streambench.fixWindowSlideStep 10000 #====================================================== # Kafka for streaming benchmarks #====================================================== hibench.streambench.kafka.home /PATH/TO/YOUR/KAFKA/HOME # zookeeper host:port of kafka cluster, host1:port1,host2:port2... hibench.streambench.zkHost # Kafka broker lists, written in mode host:port,host:port,.. hibench.streambench.kafka.brokerList hibench.streambench.kafka.consumerGroup HiBench # number of partitions of generated topic (default 20) hibench.streambench.kafka.topicPartitions 20 # consumer group of the consumer for kafka (default: HiBench) hibench.streambench.kafka.consumerGroup HiBench # Set the starting offset of kafkaConsumer (default: largest) hibench.streambench.kafka.offsetReset largest #====================================================== # Data generator for streaming benchmarks #====================================================== # Interval span in millisecond (default: 50) hibench.streambench.datagen.intervalSpan 50 # Number of records to generate per interval span (default: 5) hibench.streambench.datagen.recordsPerInterval 5 # fixed length of record (default: 200) hibench.streambench.datagen.recordsPerInterval 5 # fixed length of record (default: 200) hibench.streambench.datagen.recordLength 200 # Number of KafkaProducer running on different thread (default: 1) hibench.streambench.datagen.producerNumber 1 # Total round count of data send (default: -1 means infinity) hibench.streambench.datagen.totalRounds -1 # Number of total records that will be generated (default: -1 means infinity) hibench.streambench.datagen.totalRecords -1 # default path to store seed files (default: ${hibench.hdfs.data.dir}/Streaming) hibench.streambench.datagen.dir ${hibench.hdfs.data.dir}/Streaming # default path setting for genearate data1 & data2 hibench.streambench.datagen.data1.name Seed hibench.streambench.datagen.data1.dir ${hibench.streambench.datagen.dir}/${hibench.streambench.datagen.data1.name} hibench.streambench.datagen.data2_cluster.dir ${hibench.streambench.datagen.dir}/Kmeans/Cluster hibench.streambench.datagen.data2_samples.dir ${hibench.streambench.datagen.dir}/Kmeans/Samples #====================================================== # MetricsReader for streaming benchmarks #====================================================== # Number of sample records for `MetricsReader` (default: 5000000) hibench.streambench.metricsReader.sampleNum 5000000 # Number of thread for `MetricsReader` (default: 20) hibench.streambench.metricsReader.threadNum 20 # The dir where stored the report of benchmarks (default: ${hibench.home}/report) hibench.streambench.metricsReader.outputDir ${hibench.home}/report hibench.mahout.home ${hibench.home}/hadoopbench/mahout/target/${hibench.mahout.release}