-
Notifications
You must be signed in to change notification settings - Fork 0
/
grammar.go
120 lines (106 loc) · 3.4 KB
/
grammar.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
package search
import (
"bufio"
"fmt"
"os"
"path"
"path/filepath"
"regexp"
"strings"
"github.com/pkg/errors"
"gopkg.in/olivere/elastic.v6"
"github.com/Bnei-Baruch/archive-backend/consts"
)
type Grammar struct {
HitType string
Language string
Intent string
Patterns [][]*TokenNode
Filters map[string][]string
Esc *elastic.Client
}
type Grammars = map[string]map[string]*Grammar
func FoldGrammars(first Grammars, second Grammars) {
for lang, secondByIntent := range second {
for intent, secondGrammar := range secondByIntent {
if _, ok := first[lang]; !ok {
first[lang] = make(map[string]*Grammar)
}
if firstGrammars, ok := first[lang][intent]; !ok {
first[lang][intent] = secondGrammar
} else {
first[lang][intent].Patterns = append(firstGrammars.Patterns, secondGrammar.Patterns...)
}
}
}
}
func ReadGrammarFile(grammarFile string, esc *elastic.Client, tc *TokensCache) (Grammars, error) {
re := regexp.MustCompile(`^(.*).grammar$`)
matches := re.FindStringSubmatch(path.Base(grammarFile))
if len(matches) != 2 {
return nil, errors.New(fmt.Sprintf("Bad gramamr file: %s, expected: <hit-type>.grammar", grammarFile))
}
hitType := matches[1]
file, err := os.Open(grammarFile)
if err != nil {
return nil, errors.Wrapf(err, "Error reading grammar file: %s", grammarFile)
}
defer file.Close()
scanner := bufio.NewScanner(file)
scanner.Split(bufio.ScanLines)
lineNum := 1
grammars := make(Grammars)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
// Ignore comments and empty lines.
if line == "" || strings.HasPrefix(line, "#") {
continue
}
re := regexp.MustCompile(`^(.*),(.*) => (.*)$`)
matches := re.FindStringSubmatch(line)
if len(matches) != 4 {
return nil, errors.New(fmt.Sprintf("[%s:%d] Error reading pattern: [%s]", grammarFile, lineNum, line))
}
lang := matches[1]
intent := matches[2]
pattern := matches[3]
if lang == "" || intent == "" || pattern == "" {
return nil, errors.New(fmt.Sprintf("[%s:%d] Error reading pattern: [%s]", grammarFile, lineNum, line))
}
if _, ok := grammars[lang]; !ok {
grammars[lang] = make(map[string]*Grammar)
}
if _, ok := grammars[lang][intent]; !ok {
filters, filterExist := consts.GRAMMAR_INTENTS_TO_FILTER_VALUES[intent]
if !filterExist {
return nil, errors.New(fmt.Sprintf("[%s:%d] Filters not found for intent: [%s]", grammarFile, lineNum, intent))
}
grammars[lang][intent] = &Grammar{HitType: hitType, Language: lang, Intent: intent, Patterns: [][]*TokenNode{}, Filters: filters, Esc: esc}
}
tokens, err := MakeTokensFromPhrase(pattern, lang, esc, tc)
if err != nil {
return nil, errors.Wrapf(err, "Error generating tokens from pattern: [%s] in %s.", pattern, lang)
}
grammars[lang][intent].Patterns = append(grammars[lang][intent].Patterns, tokens)
lineNum++
}
if err := scanner.Err(); err != nil {
return nil, errors.Wrapf(err, "Error reading grammar file: %s", grammarFile)
}
return grammars, nil
}
func MakeGrammars(grammarsDir string, esc *elastic.Client, tc *TokensCache) (Grammars, error) {
matches, err := filepath.Glob(filepath.Join(grammarsDir, "*.grammar"))
if err != nil {
return nil, err
}
grammars := make(Grammars)
for _, grammarFile := range matches {
grammarsFromFile, err := ReadGrammarFile(grammarFile, esc, tc)
if err != nil {
return nil, err
}
FoldGrammars(grammars, grammarsFromFile)
}
return grammars, nil
}