This repository was archived by the owner on May 3, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 405
/
Copy pathrdf-regression-example.conf
88 lines (81 loc) · 2.47 KB
/
rdf-regression-example.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# Copyright (c) 2015, Cloudera, Inc. All Rights Reserved.
#
# Cloudera, Inc. licenses this file to you under the Apache License,
# Version 2.0 (the "License"). You may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for
# the specific language governing permissions and limitations under the
# License.
# A very basic example config file configuring only the essential elements to
# run a decision forest-based application.
# It's possible to specify reusable values:
kafka-brokers = "b03.example.com:9092,b04.example.com:9092"
zk-servers = "b01.example.com:2181,b02.example.com:2181"
hdfs-base = "hdfs:///user/example/Oryx"
oryx {
id = "RDFRegressionExample"
input-topic {
broker = ${kafka-brokers}
lock = {
master = ${zk-servers}
}
}
update-topic {
broker = ${kafka-brokers}
lock = {
master = ${zk-servers}
}
}
batch {
streaming {
generation-interval-sec = 300
num-executors = 4
executor-cores = 8
executor-memory = "4g"
}
update-class = "com.cloudera.oryx.app.batch.mllib.rdf.RDFUpdate"
storage {
data-dir = ${hdfs-base}"/data/"
model-dir = ${hdfs-base}"/model/"
}
ui {
port = 4040
}
}
speed {
model-manager-class = "com.cloudera.oryx.app.speed.rdf.RDFSpeedModelManager"
ui {
port = 4041
}
}
serving {
model-manager-class = "com.cloudera.oryx.app.serving.rdf.model.RDFServingModelManager"
application-resources = "com.cloudera.oryx.app.serving,com.cloudera.oryx.app.serving.classreg,com.cloudera.oryx.app.serving.rdf"
api {
port = 8080
}
}
# This depends on the input data; example is an fictional dataset
input-schema = {
feature-names = ["county", "route", "start", "end", "length", "exposure", "threshold", "rate"]
categorical-features = ["county", "route", "threshold"]
numeric-features = ["start", "end", "length", "exposure", "rate"]
target-feature = "rate"
num-features = 8
}
# Parameters for rdf; for regression you must specify variance as the impurity measure
rdf {
hyperparams {
"max-depth" = 8
"min-node-size" = 16
"max-split-candidates" = 100
impurity = variance
"min-info-gain-nats" = 0.001
}
"num-trees" = 20
}
}