/
envelope-00.3.train.cfg
89 lines (77 loc) · 1.5 KB
/
envelope-00.3.train.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
[GENERAL]
domains = CamRestaurants
singledomain = True
tracedialog = 0
seed = 30061522
[exec_config]
domain = CamRestaurants
configdir = _morlconfigs
logfiledir = _morllogs
numtrainbatches = 5
traindialogsperbatch = 1000
numbatchtestdialogs = 10
trainsourceiteration = 0
numtestdialogs = 100
trainerrorrate = 0
testerrorrate = 0
testeverybatch = False
[logging]
usecolor = False
screen_level = results
file_level = dial
file = _morllogs/envelope-seed30061522-00.1-5.train.log
[agent]
maxturns = 25
[usermodel]
usenewgoalscenarios = True
oldstylepatience = False
patience = 5
configfile = config/sampledUM.cfg
[errormodel]
nbestsize = 1
nbestgeneratormodel = SampledNBestGenerator
confscorer = additive
[summaryacts]
maxinformslots = 5
informmask = True
requestmask = True
informcountaccepted = 4
byemask = True
[policy]
policydir = _morlpolicies
belieftype = focus
useconfreq = False
learning = True
policytype = morl
startwithhello = False
inpolicyfile = _morlpolicies/envelope-00.2
outpolicyfile = _morlpolicies/envelope-00.3
[morlpolicy]
n_rew = 2
learning_rate = 0.001
epsilon = 0.5
epsilon_decay = True
gamma = 0.999
batch_size = 64
weight_num = 32
mem_size = 1000
episode_num = 1000
optimizer = Adam
save_step = 100
update_freq = 50
training_freq = 1
algorithm = envelope
beta = 0.9
homotopy = True
[gppolicy]
kernel = polysort
[gpsarsa]
random = False
scale = 3
[eval]
rewardvenuerecommended = 0
penaliseallturns = True
wrongvenuepenalty = 0
notmentionedvaluepenalty = 0
successmeasure = objective
successreward = 20