/
sg_env.go
388 lines (355 loc) · 11.9 KB
/
sg_env.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
// Copyright (c) 2019, The Emergent Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import (
"bytes"
"fmt"
"log"
"math/rand"
"strings"
"github.com/emer/emergent/env"
"github.com/emer/emergent/erand"
"github.com/emer/emergent/esg"
"github.com/emer/etable/etensor"
)
// SentGenEnv generates sentences using a grammar that is parsed from a
// text file. The core of the grammar is rules with various items
// chosen at random during generation -- these items can be
// more rules terminal tokens.
type SentGenEnv struct {
Nm string `desc:"name of this environment"`
Dsc string `desc:"description of this environment"`
Rules esg.Rules `desc:"core sent-gen rules -- loaded from a grammar / rules file -- Gen() here generates one sentence"`
PPassive float64 `desc:"probability of generating passive sentence forms"`
WordTrans map[string]string `desc:"translate unambiguous words into ambiguous words"`
Words []string `desc:"list of words used for activating state units according to index"`
WordMap map[string]int `desc:"map of words onto index in Words list"`
Roles []string `desc:"list of roles used for activating state units according to index"`
RoleMap map[string]int `desc:"map of roles onto index in Roles list"`
Fillers []string `desc:"list of filler concepts used for activating state units according to index"`
FillerMap map[string]int `desc:"map of roles onto index in Words list"`
AmbigVerbs []string `desc:"ambiguous verbs"`
AmbigNouns []string `desc:"ambiguous nouns"`
AmbigVerbsMap map[string]int `desc:"map of ambiguous verbs"`
AmbigNounsMap map[string]int `desc:"map of ambiguous nouns"`
CurSentOrig []string `desc:"original current sentence as generated from Rules"`
CurSent []string `desc:"current sentence, potentially transformed to passive form"`
NAmbigNouns int `desc:"number of ambiguous nouns"`
NAmbigVerbs int `desc:"number of ambiguous verbs (0 or 1)"`
SentInputs [][]string `desc:"generated sequence of sentence inputs including role-filler queries"`
SentIdx env.CurPrvInt `desc:"current index within sentence inputs"`
QType string `desc:"current question type -- from 4th value of SentInputs"`
WordState etensor.Float32 `desc:"current sentence activation state"`
RoleState etensor.Float32 `desc:"current role query activation state"`
FillerState etensor.Float32 `desc:"current filler query activation state"`
Run env.Ctr `view:"inline" desc:"current run of model as provided during Init"`
Epoch env.Ctr `view:"inline" desc:"number of times through Seq.Max number of sequences"`
Seq env.Ctr `view:"inline" desc:"sequence counter within epoch"`
Tick env.Ctr `view:"inline" desc:"tick counter within sequence"`
Trial env.Ctr `view:"inline" desc:"trial is the step counter within sequence - how many steps taken within current sequence -- it resets to 0 at start of each sequence"`
}
func (ev *SentGenEnv) Name() string { return ev.Nm }
func (ev *SentGenEnv) Desc() string { return ev.Dsc }
// InitTMat initializes matrix and labels to given size
func (ev *SentGenEnv) Validate() error {
ev.Rules.Validate()
return nil
}
func (ev *SentGenEnv) Counters() []env.TimeScales {
return []env.TimeScales{env.Run, env.Epoch, env.Sequence, env.Tick, env.Trial}
}
func (ev *SentGenEnv) States() env.Elements {
els := env.Elements{
{"Input", []int{len(ev.Words)}, nil},
{"Role", []int{len(ev.Roles)}, nil},
{"Filler", []int{len(ev.Fillers)}, nil},
}
return els
}
func (ev *SentGenEnv) State(element string) etensor.Tensor {
switch element {
case "Input":
return &ev.WordState
case "Role":
return &ev.RoleState
case "Filler":
return &ev.FillerState
}
return nil
}
func (ev *SentGenEnv) Actions() env.Elements {
return nil
}
func (ev *SentGenEnv) OpenRulesFromAsset(fnm string) {
ab, err := Asset(fnm) // embedded in executable
if err != nil {
log.Println(err)
}
ev.Rules.ReadRules(bytes.NewBuffer(ab))
}
func (ev *SentGenEnv) Init(run int) {
ev.Run.Scale = env.Run
ev.Epoch.Scale = env.Epoch
ev.Seq.Scale = env.Sequence
ev.Tick.Scale = env.Tick
ev.Trial.Scale = env.Trial
ev.Run.Init()
ev.Epoch.Init()
ev.Seq.Init()
ev.Tick.Init()
ev.Trial.Init()
ev.Run.Cur = run
ev.Trial.Cur = -1 // init state -- key so that first Step() = 0
ev.SentIdx.Set(-1)
ev.Rules.Init()
ev.MapsFmWords()
ev.WordState.SetShape([]int{len(ev.Words)}, nil, []string{"Words"})
ev.RoleState.SetShape([]int{len(ev.Roles)}, nil, []string{"Roles"})
ev.FillerState.SetShape([]int{len(ev.Fillers)}, nil, []string{"Fillers"})
}
func (ev *SentGenEnv) MapsFmWords() {
ev.WordMap = make(map[string]int, len(ev.Words))
for i, wrd := range ev.Words {
ev.WordMap[wrd] = i
}
ev.RoleMap = make(map[string]int, len(ev.Roles))
for i, wrd := range ev.Roles {
ev.RoleMap[wrd] = i
}
ev.FillerMap = make(map[string]int, len(ev.Fillers))
for i, wrd := range ev.Fillers {
ev.FillerMap[wrd] = i
}
ev.AmbigVerbsMap = make(map[string]int, len(ev.AmbigVerbs))
for i, wrd := range ev.AmbigVerbs {
ev.AmbigVerbsMap[wrd] = i
}
ev.AmbigNounsMap = make(map[string]int, len(ev.AmbigNouns))
for i, wrd := range ev.AmbigNouns {
ev.AmbigNounsMap[wrd] = i
}
}
// CurInputs returns current inputs triple from SentInputs
func (ev *SentGenEnv) CurInputs() []string {
if ev.SentIdx.Cur >= 0 && ev.SentIdx.Cur < len(ev.SentInputs) {
return ev.SentInputs[ev.SentIdx.Cur]
}
return nil
}
// String returns the current state as a string
func (ev *SentGenEnv) String() string {
cur := ev.CurInputs()
if cur != nil {
return fmt.Sprintf("%s %s=%s %s", cur[0], cur[1], cur[2], cur[3])
}
return ""
}
// NextSent generates the next sentence and all the queries for it
func (ev *SentGenEnv) NextSent() {
// ev.Rules.Trace = true
ev.CurSent = ev.Rules.Gen()
// fmt.Printf("%v\n", ev.CurSent)
ev.Rules.States.TrimQualifiers()
ev.SentStats()
ev.SentIdx.Set(0)
if cs, has := ev.Rules.States["Case"]; has {
if cs == "Passive" {
ev.SentSeqPassive()
} else {
ev.SentSeqActive()
}
} else {
if erand.BoolProb(ev.PPassive, -1) {
ev.SentSeqPassive()
} else {
ev.SentSeqActive()
}
}
}
// TransWord gets the translated word
func (ev *SentGenEnv) TransWord(word string) string {
word = strings.ToLower(word)
if tr, has := ev.WordTrans[word]; has {
return tr
}
return word
}
// SentStats computes stats on sentence (ambig words)
func (ev *SentGenEnv) SentStats() {
ev.NAmbigNouns = 0
ev.NAmbigVerbs = 0
for _, wrd := range ev.CurSent {
wrd = ev.TransWord(wrd)
if _, has := ev.AmbigVerbsMap[wrd]; has {
ev.NAmbigVerbs++
}
if _, has := ev.AmbigNounsMap[wrd]; has {
ev.NAmbigNouns++
}
}
}
// CheckWords reports errors if words not found, if not empty
func (ev *SentGenEnv) CheckWords(wrd, role, fill string) []error {
var errs []error
if _, ok := ev.WordMap[wrd]; !ok {
errs = append(errs, fmt.Errorf("word not found in WordMap: %s, sent: %v", wrd, ev.CurSent))
}
if _, ok := ev.RoleMap[role]; !ok {
errs = append(errs, fmt.Errorf("word not found in RoleMap: %s, sent: %v", role, ev.CurSent))
}
if _, ok := ev.FillerMap[fill]; !ok {
errs = append(errs, fmt.Errorf("word not found in FillerMap: %s, sent: %v", fill, ev.CurSent))
}
if errs != nil {
for _, err := range errs {
fmt.Println(err)
}
}
return errs
}
func (ev *SentGenEnv) NewInputs() {
ev.SentInputs = make([][]string, 0, 16)
}
// AddRawInput adds raw input
func (ev *SentGenEnv) AddRawInput(word, role, fill, stat string) {
ev.SentInputs = append(ev.SentInputs, []string{word, role, fill, stat})
}
// AddInput adds a new input with given sentence index word and role query
// stat is an extra status var: "revq" or "curq" (review question, vs. current question)
func (ev *SentGenEnv) AddInput(sidx int, role string, stat string) {
wrd := ev.TransWord(ev.CurSent[sidx])
fill := ev.Rules.States[role]
ev.CheckWords(wrd, role, fill)
ev.AddRawInput(wrd, role, fill, stat)
}
// AddQuestion adds a new input with 'question' word and role query
// automatically marked as a "revq"
func (ev *SentGenEnv) AddQuestion(role string) {
wrd := "question"
fill := ev.Rules.States[role]
ev.CheckWords(wrd, role, fill)
ev.AddRawInput(wrd, role, fill, "revq")
}
// SentSeqActive active form sentence sequence, with incremental review questions
func (ev *SentGenEnv) SentSeqActive() {
ev.NewInputs()
ev.AddRawInput("start", "Action", "None", "curq") // start question helps in long run!
mod := ev.Rules.States["Mod"]
seq := []string{"Agent", "Action", "Patient", mod}
for si := 0; si < 3; si++ {
sq := seq[si]
ev.AddInput(si, sq, "curq")
switch si { // these additional questions are key for revq perf
case 1:
ev.AddInput(si, "Agent", "revq")
case 2:
ev.AddInput(si, "Action", "revq")
}
}
slen := len(ev.CurSent)
if slen == 3 {
return
}
// get any modifier words with random query
for si := 3; si < slen-1; si++ {
ri := rand.Intn(3) // choose a role to query at random
ev.AddInput(si, seq[ri], "revq")
}
ev.AddInput(slen-1, mod, "curq")
ri := rand.Intn(3) // choose a role to query at random
if fq, has := ev.Rules.States["FinalQ"]; has {
for i := range seq {
if seq[i] == fq {
ri = i
break
}
}
}
ev.AddInput(slen-1, seq[ri], "revq")
}
// SentSeqPassive passive form sentence sequence, with incremental review questions
func (ev *SentGenEnv) SentSeqPassive() {
ev.NewInputs()
ev.AddRawInput("start", "Action", "None", "curq") // start question helps in long run!
mod := ev.Rules.States["Mod"]
seq := []string{"Agent", "Action", "Patient", mod}
ev.AddInput(2, "Patient", "curq") // 2 = patient word in active form
ev.AddRawInput("was", "Patient", ev.Rules.States["Patient"], "revq")
ev.AddInput(1, "Action", "curq") // 1 = action word in active form
ev.AddRawInput("by", "Action", ev.Rules.States["Action"], "revq")
ev.AddInput(0, "Agent", "curq") // 0 = agent word in active form
// note: we already get review questions for free with was and by
// get any modifier words with random query
slen := len(ev.CurSent)
for si := 3; si < slen-1; si++ {
ri := rand.Intn(3) // choose a role to query at random
ev.AddInput(si, seq[ri], "revq")
}
ev.AddInput(slen-1, mod, "curq")
ri := rand.Intn(3) // choose a role to query at random
// ev.AddQuestion(seq[ri])
ev.AddInput(slen-1, seq[ri], "revq")
}
// RenderState renders the current state
func (ev *SentGenEnv) RenderState() {
ev.WordState.SetZeros()
ev.RoleState.SetZeros()
ev.FillerState.SetZeros()
cur := ev.CurInputs()
if cur == nil {
return
}
widx := ev.WordMap[cur[0]]
ev.WordState.SetFloat1D(widx, 1)
ridx := ev.RoleMap[cur[1]]
ev.RoleState.SetFloat1D(ridx, 1)
fidx := ev.FillerMap[cur[2]]
ev.FillerState.SetFloat1D(fidx, 1)
ev.QType = cur[3]
}
// NextState generates the next inputs
func (ev *SentGenEnv) NextState() {
if ev.SentIdx.Cur < 0 {
ev.NextSent()
} else {
ev.SentIdx.Incr()
}
if ev.SentIdx.Cur >= len(ev.SentInputs) {
ev.NextSent()
}
ev.RenderState()
}
func (ev *SentGenEnv) Step() bool {
ev.Epoch.Same() // good idea to just reset all non-inner-most counters at start
ev.NextState()
ev.Trial.Incr()
ev.Tick.Incr()
if ev.SentIdx.Cur == 0 {
ev.Tick.Init()
if ev.Seq.Incr() {
ev.Epoch.Incr()
}
}
return true
}
func (ev *SentGenEnv) Action(element string, input etensor.Tensor) {
// nop
}
func (ev *SentGenEnv) Counter(scale env.TimeScales) (cur, prv int, chg bool) {
switch scale {
case env.Run:
return ev.Run.Query()
case env.Epoch:
return ev.Epoch.Query()
case env.Sequence:
return ev.Seq.Query()
case env.Tick:
return ev.Tick.Query()
case env.Trial:
return ev.Trial.Query()
}
return -1, -1, false
}
// Compile-time check that implements Env interface
var _ env.Env = (*SentGenEnv)(nil)