/
sampler.proto
70 lines (63 loc) · 2.61 KB
/
sampler.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
// The file defines the protos for specificying CLgen samplers.
//
// Please ignore the "optional" proto syntax, ALL FIELDS MUST BE SET.
// This is to ensure consistent hashing of proto instances to a unique checksum,
// as default values are ommitted in serialized protos. Unfortunately, this
// means setting a value to any new field in all of the proto files across this
// entire repository (and any which are not tracked in this repo).
//
// Copyright (c) 2016-2020 Chris Cummins.
//
// clgen is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// clgen is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with clgen. If not, see <https://www.gnu.org/licenses/>.
syntax = "proto2";
package clgen;
option go_package = "clgenpb";
option java_multiple_files = true;
option java_outer_classname = "SamplerProto";
option java_package = "com.clgen";
// The specification of a CLgen sampler.
message Sampler {
// The initial text to the seed the language model with. Each sample will
// begin with this text.
optional string start_text = 1;
// The sampling batch size.
// TODO(cec): Always sample with max batch size.
optional int32 batch_size = 2;
// The length of sampling sequences.
optional int32 sequence_length = 5;
// The sampling temperature. Must be >= 0. A recommended starting value is
// 1000000 (i.e. 1.0 in real values).
optional int32 temperature_micros = 3;
// The criteria that determine when to terminate a sample, in the order in
// which they will be executed. Duplicates are allowed, for example if you
// would like to have symmetrical token depth counters for two pairs of
// tokens.
repeated SampleTerminationCriterion termination_criteria = 4;
}
// Criteria used for determining when to stop sampling.
message SampleTerminationCriterion {
oneof criterion {
MaxTokenLength maxlen = 1;
SymmetricalTokenDepth symtok = 2;
}
}
message MaxTokenLength {
// The maximum length of a sample, as a number of tokens. The length of the
// Sampler.start_text counts towards this total.
optional int32 maximum_tokens_in_sample = 1;
}
message SymmetricalTokenDepth {
optional string depth_increase_token = 1;
optional string depth_decrease_token = 2;
}