forked from isoboroff/trec-demo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
TMPL.java
133 lines (114 loc) · 2.95 KB
/
TMPL.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.SmallFloat;
import java.io.IOException;
import java.util.Collections;
public class TMPL extends Similarity {
public TMPL() {}
public float log(double x)
{
return (float)(Math.log(x) / Math.log(2.0D));
}
public float coord(int overlap, int maxOverlap)
{
return 1f;
}
public float queryNorm(float valueForNormalization)
{
return 1f;
}
@Override
public final SimWeight computeWeight(CollectionStatistics collectionStats,
TermStatistics... termStats)
{
float N, n, idf, adl;
idf = 1.0f;
N = collectionStats.maxDoc();
adl = collectionStats.sumTotalTermFreq() / N;
if (termStats.length == 1) {
n = termStats[0].docFreq();
idf = log(N/n);
}
else {
for (final TermStatistics stat : termStats) {
n = stat.docFreq();
idf += log(N/n);
}
}
return new TFIDFWeight(collectionStats.field(), idf, adl);
}
@Override
public final SimScorer simScorer(SimWeight sw, LeafReaderContext context)
throws IOException
{
TFIDFWeight tw = (TFIDFWeight) sw;
return new TFIDFScorer(tw, context.reader().getNormValues(tw.field));
}
public class TFIDFScorer extends SimScorer
{
private final TFIDFWeight tw;
private final NumericDocValues norms;
TFIDFScorer(TFIDFWeight tw, NumericDocValues norms)
throws IOException
{
this.tw = tw;
this.norms = norms;
}
@Override
public float score(int doc, float tf)
{
float idf, dl, adl, K, w;
idf = tw.idf;
adl = tw.adl;
dl = (float)norms.get(doc);
K = 1.0f;
w = 1.0f;
return w;
}
@Override
public float computeSlopFactor(int distance)
{
return 1.0f / (distance + 1);
}
@Override
public float computePayloadFactor(int doc, int start, int end, BytesRef payload)
{
return 1.0f;
}
}
public static class TFIDFWeight extends SimWeight
{
private final String field;
private final float idf;
private final float adl;
public TFIDFWeight(String field, float idf, float adl)
{
this.field = field;
this.idf = idf;
this.adl = adl;
}
@Override
public float getValueForNormalization()
{
return 1.0f;
}
@Override
public void normalize(float queryNorm, float boost) {}
}
@Override
public final long computeNorm(FieldInvertState state)
{
return state.getLength();
}
}