-
-
Notifications
You must be signed in to change notification settings - Fork 22
/
ResultValuatorTest.java
92 lines (73 loc) · 3.54 KB
/
ResultValuatorTest.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
package nu.marginalia.ranking;
import nu.marginalia.index.client.model.results.ResultRankingContext;
import nu.marginalia.index.client.model.results.ResultRankingParameters;
import nu.marginalia.index.client.model.results.SearchResultKeywordScore;
import nu.marginalia.model.idx.DocumentFlags;
import nu.marginalia.model.idx.WordFlags;
import nu.marginalia.model.crawl.PubDate;
import nu.marginalia.model.idx.DocumentMetadata;
import nu.marginalia.model.idx.WordMetadata;
import nu.marginalia.ranking.factors.*;
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.Mockito;
import java.util.*;
import static org.mockito.Mockito.when;
class ResultValuatorTest {
TermFrequencyDict dict;
ResultValuator valuator;
@BeforeEach
public void setUp() {
dict = Mockito.mock(TermFrequencyDict.class);
when(dict.docCount()).thenReturn(100_000);
valuator = new ResultValuator(
new Bm25Factor(),
new TermCoherenceFactor(),
new PriorityTermBonus()
);
}
List<SearchResultKeywordScore> titleOnlyLowCountSet = List.of(
new SearchResultKeywordScore(0, "bob",
wordMetadata(Set.of(1), EnumSet.of(WordFlags.Title)),
docMetadata(0, 2010, 5, EnumSet.noneOf(DocumentFlags.class)),
0, false)
);
List<SearchResultKeywordScore> highCountNoTitleSet = List.of(
new SearchResultKeywordScore(0, "bob",
wordMetadata(Set.of(1,3,4,6,7,9,10,11,12,14,15,16), EnumSet.of(WordFlags.TfIdfHigh)),
docMetadata(0, 2010, 5, EnumSet.noneOf(DocumentFlags.class)),
0, false)
);
List<SearchResultKeywordScore> highCountSubjectSet = List.of(
new SearchResultKeywordScore(0, "bob",
wordMetadata(Set.of(1,3,4,6,7,9,10,11,12,14,15,16), EnumSet.of(WordFlags.TfIdfHigh, WordFlags.Subjects)),
docMetadata(0, 2010, 5, EnumSet.noneOf(DocumentFlags.class)),
0, false)
);
@Test
void evaluateTerms() {
when(dict.getTermFreq("bob")).thenReturn(10);
ResultRankingContext context = new ResultRankingContext(100000,
ResultRankingParameters.sensibleDefaults(),
Map.of("bob", 10), Collections.emptyMap());
double titleOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, 10_000, context);
double titleLongOnlyLowCount = valuator.calculateSearchResultValue(titleOnlyLowCountSet, 10_000, context);
double highCountNoTitle = valuator.calculateSearchResultValue(highCountNoTitleSet, 10_000, context);
double highCountSubject = valuator.calculateSearchResultValue(highCountSubjectSet, 10_000, context);
System.out.println(titleOnlyLowCount);
System.out.println(titleLongOnlyLowCount);
System.out.println(highCountNoTitle);
System.out.println(highCountSubject);
}
private long docMetadata(int topology, int year, int quality, EnumSet<DocumentFlags> flags) {
return new DocumentMetadata(topology, PubDate.toYearByte(year), quality, flags).encode();
}
private long wordMetadata(Set<Integer> positions, Set<WordFlags> wordFlags) {
long posBits = positions.stream()
.mapToLong(i -> ((1L << i) & 0xFF_FFFF_FFFF_FFFFL))
.reduce((a,b) -> a|b)
.orElse(0L);
return new WordMetadata(posBits, wordFlags).encode();
}
}