-
Notifications
You must be signed in to change notification settings - Fork 43
/
pre-ranking.go
123 lines (109 loc) · 3.23 KB
/
pre-ranking.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
// vim:ts=4:sw=4:noexpandtab
// XXX: Unsure whether it actually *should* happen on dcs-web or on the index
// backends. a pro argument for ranking on dcs-web is that we could batch
// queries to the database together and maybe have an advantage that way?
// Pre-ranking happens on dcs-web after the index backends provided their
// results. Without looking at file contents at all, we assign a preliminary
// ranking to each file based on its database entries and whether the query
// string matches parts of the filename.
package ranking
import (
"encoding/json"
"log"
"os"
"path"
"strings"
)
// Represents an entry from our ranking database (determined by using the
// meta information about source packages).
type StoredRanking struct {
Inst float32
Rdep float32
}
// Consumes a few hundred kilobytes of memory
// ((sizeof(StoredRanking) = 8) * ≈ 17000).
var storedRanking = make(map[string]StoredRanking)
// ReadRankingData reads the pre-computed rankings from |path|. It must be
// called before ResultPath.Rank() is called, otherwise Rank() won’t return
// meaningful results.
func ReadRankingData(path string) error {
f, err := os.Open(path)
if err != nil {
return err
}
defer f.Close()
return json.NewDecoder(f).Decode(&storedRanking)
}
// The regular expression trigram index provides us a path to a potential
// result. This data structure represents such a path and allows for ranking
// and sorting each path.
type ResultPath struct {
Path string
Position int
SourcePkgIdx [2]int
Ranking float32
}
func (rp *ResultPath) Rank(opts *RankingOpts) {
// No ranking at all: 807ms
// query.Match(&rp.Path): 4.96s
// query.Match(&rp.Path) * query.Match(&sourcePackage): 6.7s
// full ranking: 24s
// lookup table: 6.8s
rp.SourcePkgIdx[0] = 0
rp.SourcePkgIdx[1] = 0
for i := 0; i < len(rp.Path); i++ {
if rp.Path[i] == '_' {
rp.SourcePkgIdx[1] = i
break
}
}
if rp.SourcePkgIdx[1] == 0 {
log.Fatalf("Invalid path in result: %q", rp.Path)
}
sourcePackage := rp.Path[rp.SourcePkgIdx[0]:rp.SourcePkgIdx[1]]
ranking := storedRanking[sourcePackage]
rp.Ranking = 1
if opts.Inst {
rp.Ranking += ranking.Inst
}
if opts.Rdep {
rp.Ranking += ranking.Rdep
}
if (opts.Filetype || opts.Weighted) && len(opts.Suffixes) > 0 {
suffix := strings.ToLower(path.Ext(rp.Path))
if val, exists := opts.Suffixes[suffix]; exists {
rp.Ranking += val
} else {
// With a ranking of -1, the result will be thrown away.
rp.Ranking = -1
return
}
}
if (opts.Filetype || opts.Weighted) && len(opts.Nsuffixes) > 0 {
suffix := strings.ToLower(path.Ext(rp.Path))
if _, exists := opts.Nsuffixes[suffix]; exists {
rp.Ranking = -1
return
}
}
if opts.Weighted {
rp.Ranking += 0.3840 * ranking.Inst
rp.Ranking += 0.3427 * ranking.Rdep
}
}
type ResultPaths []ResultPath
func (r ResultPaths) Len() int {
return len(r)
}
func (r ResultPaths) Less(i, j int) bool {
if r[i].Ranking == r[j].Ranking {
// On a tie, we use the path to make the order of results stable over
// multiple queries (which can have different results depending on
// which index backend reacts quicker).
return r[i].Path > r[j].Path
}
return r[i].Ranking > r[j].Ranking
}
func (r ResultPaths) Swap(i, j int) {
r[i], r[j] = r[j], r[i]
}