Permalink
Fetching contributors…
Cannot retrieve contributors at this time
89 lines (81 sloc) 2.47 KB
# Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
#
# Cloudera, Inc. licenses this file to you under the Apache License,
# Version 2.0 (the "License"). You may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for
# the specific language governing permissions and limitations under the
# License.
# A very basic example config file configuring only the essential elements to
# run a decision forest-based application.
# It's possible to specify reusable values:
kafka-brokers = "b03.example.com:9092,b04.example.com:9092"
zk-servers = "b01.example.com:2181,b02.example.com:2181"
hdfs-base = "hdfs:///user/example/Oryx"
oryx {
id = "RDFRegressionExample"
input-topic {
broker = ${kafka-brokers}
lock = {
master = ${zk-servers}
}
}
update-topic {
broker = ${kafka-brokers}
lock = {
master = ${zk-servers}
}
}
batch {
streaming {
generation-interval-sec = 300
num-executors = 4
executor-cores = 8
executor-memory = "4g"
}
update-class = "com.cloudera.oryx.app.batch.mllib.rdf.RDFUpdate"
storage {
data-dir = ${hdfs-base}"/data/"
model-dir = ${hdfs-base}"/model/"
}
ui {
port = 4040
}
}
speed {
model-manager-class = "com.cloudera.oryx.app.speed.rdf.RDFSpeedModelManager"
ui {
port = 4041
}
}
serving {
model-manager-class = "com.cloudera.oryx.app.serving.rdf.model.RDFServingModelManager"
application-resources = "com.cloudera.oryx.app.serving,com.cloudera.oryx.app.serving.classreg,com.cloudera.oryx.app.serving.rdf"
api {
port = 8080
}
}
# This depends on the input data; example is an fictional dataset
input-schema = {
feature-names = ["county", "route", "start", "end", "length", "exposure", "threshold", "rate"]
categorical-features = ["county", "route", "threshold"]
numeric-features = ["start", "end", "length", "exposure", "rate"]
target-feature = "rate"
num-features = 8
}
# Parameters for rdf; for regression you must specify variance as the impurity measure
rdf {
hyperparams {
"max-depth" = 8
"min-node-size" = 16
"max-split-candidates" = 100
impurity = variance
"min-info-gain-nats" = 0.001
}
"num-trees" = 20
}
}