-
Notifications
You must be signed in to change notification settings - Fork 0
/
grammar.go
129 lines (115 loc) · 3.57 KB
/
grammar.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
package search
import (
"bufio"
"fmt"
"os"
"path"
"path/filepath"
"regexp"
"strings"
"github.com/pkg/errors"
"gopkg.in/olivere/elastic.v6"
"github.com/Bnei-Baruch/archive-backend/consts"
)
type Grammar struct {
HitType string
Language string
Intent string
Patterns [][]*TokenNode
Filters map[string][]string
Esc *elastic.Client
Variables VariablesByName
}
type Grammars = map[string]map[string]*Grammar
func FoldGrammars(first Grammars, second Grammars) {
for lang, secondByIntent := range second {
for intent, secondGrammar := range secondByIntent {
if _, ok := first[lang]; !ok {
first[lang] = make(map[string]*Grammar)
}
if firstGrammars, ok := first[lang][intent]; !ok {
first[lang][intent] = secondGrammar
} else {
first[lang][intent].Patterns = append(firstGrammars.Patterns, secondGrammar.Patterns...)
}
}
}
}
func ReadGrammarFile(grammarFile string, esc *elastic.Client, tc *TokensCache, variables VariablesByLang) (Grammars, error) {
re := regexp.MustCompile(`^(.*).grammar$`)
matches := re.FindStringSubmatch(path.Base(grammarFile))
if len(matches) != 2 {
return nil, errors.New(fmt.Sprintf("Bad gramamr file: %s, expected: <hit-type>.grammar", grammarFile))
}
hitType := matches[1]
file, err := os.Open(grammarFile)
if err != nil {
return nil, errors.Wrapf(err, "Error reading grammar file: %s", grammarFile)
}
defer file.Close()
scanner := bufio.NewScanner(file)
scanner.Split(bufio.ScanLines)
lineNum := 1
grammars := make(Grammars)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
// Ignore comments and empty lines.
if line == "" || strings.HasPrefix(line, "#") {
continue
}
re := regexp.MustCompile(`^(.*),(.*) => (.*)$`)
matches := re.FindStringSubmatch(line)
if len(matches) != 4 {
return nil, errors.New(fmt.Sprintf("[%s:%d] Error reading pattern: [%s]", grammarFile, lineNum, line))
}
lang := matches[1]
intent := matches[2]
pattern := matches[3]
if lang == "" || intent == "" || pattern == "" {
return nil, errors.New(fmt.Sprintf("[%s:%d] Error reading pattern: [%s]", grammarFile, lineNum, line))
}
if _, ok := grammars[lang]; !ok {
grammars[lang] = make(map[string]*Grammar)
}
if _, ok := grammars[lang][intent]; !ok {
filters, filterExist := consts.GRAMMAR_INTENTS_TO_FILTER_VALUES[intent]
if !filterExist {
return nil, errors.New(fmt.Sprintf("[%s:%d] Filters not found for intent: [%s]", grammarFile, lineNum, intent))
}
grammars[lang][intent] = &Grammar{
HitType: hitType,
Language: lang,
Intent: intent,
Patterns: [][]*TokenNode{},
Filters: filters,
Esc: esc,
Variables: variables[lang],
}
}
tokens, err := MakeTokensFromPhrase(pattern, lang, esc, tc)
if err != nil {
return nil, errors.Wrapf(err, "Error generating tokens from pattern: [%s] in %s.", pattern, lang)
}
grammars[lang][intent].Patterns = append(grammars[lang][intent].Patterns, tokens)
lineNum++
}
if err := scanner.Err(); err != nil {
return nil, errors.Wrapf(err, "Error reading grammar file: %s", grammarFile)
}
return grammars, nil
}
func MakeGrammars(grammarsDir string, esc *elastic.Client, tc *TokensCache, variables VariablesByLang) (Grammars, error) {
matches, err := filepath.Glob(filepath.Join(grammarsDir, "*.grammar"))
if err != nil {
return nil, err
}
grammars := make(Grammars)
for _, grammarFile := range matches {
grammarsFromFile, err := ReadGrammarFile(grammarFile, esc, tc, variables)
if err != nil {
return nil, err
}
FoldGrammars(grammars, grammarsFromFile)
}
return grammars, nil
}