forked from apache/lucenenet
-
Notifications
You must be signed in to change notification settings - Fork 3
/
MergeState.cs
285 lines (248 loc) · 9.75 KB
/
MergeState.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
using Lucene.Net.Support;
using System.Collections.Generic;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
namespace Lucene.Net.Index
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using IBits = Lucene.Net.Util.IBits;
using Directory = Lucene.Net.Store.Directory;
using InfoStream = Lucene.Net.Util.InfoStream;
using MonotonicAppendingInt64Buffer = Lucene.Net.Util.Packed.MonotonicAppendingInt64Buffer;
/// <summary>
/// Holds common state used during segment merging.
/// <para/>
/// @lucene.experimental
/// </summary>
public class MergeState
{
/// <summary>
/// Remaps docids around deletes during merge
/// </summary>
public abstract class DocMap
{
internal DocMap()
{
}
/// <summary>
/// Returns the mapped docID corresponding to the provided one. </summary>
public abstract int Get(int docID);
/// <summary>
/// Returns the total number of documents, ignoring
/// deletions.
/// </summary>
public abstract int MaxDoc { get; }
/// <summary>
/// Returns the number of not-deleted documents. </summary>
public int NumDocs
{
get { return MaxDoc - NumDeletedDocs; }
}
/// <summary>
/// Returns the number of deleted documents. </summary>
public abstract int NumDeletedDocs { get; }
/// <summary>
/// Returns <c>true</c> if there are any deletions. </summary>
public virtual bool HasDeletions
{
get { return NumDeletedDocs > 0; }
}
/// <summary>
/// Creates a <see cref="DocMap"/> instance appropriate for
/// this reader.
/// </summary>
public static DocMap Build(AtomicReader reader)
{
int maxDoc = reader.MaxDoc;
if (!reader.HasDeletions)
{
return new NoDelDocMap(maxDoc);
}
IBits liveDocs = reader.LiveDocs;
return Build(maxDoc, liveDocs);
}
internal static DocMap Build(int maxDoc, IBits liveDocs)
{
Debug.Assert(liveDocs != null);
MonotonicAppendingInt64Buffer docMap = new MonotonicAppendingInt64Buffer();
int del = 0;
for (int i = 0; i < maxDoc; ++i)
{
docMap.Add(i - del);
if (!liveDocs.Get(i))
{
++del;
}
}
docMap.Freeze();
int numDeletedDocs = del;
Debug.Assert(docMap.Count == maxDoc);
return new DocMapAnonymousInnerClassHelper(maxDoc, liveDocs, docMap, numDeletedDocs);
}
private class DocMapAnonymousInnerClassHelper : DocMap
{
private int maxDoc;
private IBits liveDocs;
private MonotonicAppendingInt64Buffer docMap;
private int numDeletedDocs;
public DocMapAnonymousInnerClassHelper(int maxDoc, IBits liveDocs, MonotonicAppendingInt64Buffer docMap, int numDeletedDocs)
{
this.maxDoc = maxDoc;
this.liveDocs = liveDocs;
this.docMap = docMap;
this.numDeletedDocs = numDeletedDocs;
}
public override int Get(int docID)
{
if (!liveDocs.Get(docID))
{
return -1;
}
return (int)docMap.Get(docID);
}
public override int MaxDoc
{
get { return maxDoc; }
}
public override int NumDeletedDocs
{
get { return numDeletedDocs; }
}
}
}
private sealed class NoDelDocMap : DocMap
{
private readonly int maxDoc;
internal NoDelDocMap(int maxDoc)
{
this.maxDoc = maxDoc;
}
public override int Get(int docID)
{
return docID;
}
public override int MaxDoc
{
get { return maxDoc; }
}
public override int NumDeletedDocs
{
get { return 0; }
}
}
/// <summary>
/// <see cref="Index.SegmentInfo"/> of the newly merged segment. </summary>
public SegmentInfo SegmentInfo { get; private set; }
/// <summary>
/// <see cref="Index.FieldInfos"/> of the newly merged segment. </summary>
public FieldInfos FieldInfos { get; set; }
/// <summary>
/// Readers being merged. </summary>
public IList<AtomicReader> Readers { get; private set; }
/// <summary>
/// Maps docIDs around deletions. </summary>
[WritableArray]
[SuppressMessage("Microsoft.Performance", "CA1819", Justification = "Lucene's design requires some writable array properties")]
public DocMap[] DocMaps { get; set; }
/// <summary>
/// New docID base per reader. </summary>
[WritableArray]
[SuppressMessage("Microsoft.Performance", "CA1819", Justification = "Lucene's design requires some writable array properties")]
public int[] DocBase { get; set; }
/// <summary>
/// Holds the <see cref="Index.CheckAbort"/> instance, which is invoked
/// periodically to see if the merge has been aborted.
/// </summary>
public CheckAbort CheckAbort { get; private set; }
/// <summary>
/// <see cref="Util.InfoStream"/> for debugging messages. </summary>
public InfoStream InfoStream { get; private set; }
// TODO: get rid of this? it tells you which segments are 'aligned' (e.g. for bulk merging)
// but is this really so expensive to compute again in different components, versus once in SM?
/// <summary>
/// <see cref="SegmentReader"/>s that have identical field
/// name/number mapping, so their stored fields and term
/// vectors may be bulk merged.
/// </summary>
[WritableArray]
[SuppressMessage("Microsoft.Performance", "CA1819", Justification = "Lucene's design requires some writable array properties")]
public SegmentReader[] MatchingSegmentReaders { get; set; }
/// <summary>
/// How many <see cref="MatchingSegmentReaders"/> are set. </summary>
public int MatchedCount { get; set; }
/// <summary>
/// Sole constructor. </summary>
internal MergeState(IList<AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, CheckAbort checkAbort)
{
this.Readers = readers;
this.SegmentInfo = segmentInfo;
this.InfoStream = infoStream;
this.CheckAbort = checkAbort;
}
}
/// <summary>
/// Class for recording units of work when merging segments.
/// </summary>
public class CheckAbort // LUCENENET Specific: De-nested this class to fix CLS naming issue
{
private double workCount;
private readonly MergePolicy.OneMerge merge;
private readonly Directory dir;
/// <summary>
/// Creates a <see cref="CheckAbort"/> instance. </summary>
public CheckAbort(MergePolicy.OneMerge merge, Directory dir)
{
this.merge = merge;
this.dir = dir;
}
/// <summary>
/// Records the fact that roughly units amount of work
/// have been done since this method was last called.
/// When adding time-consuming code into <see cref="SegmentMerger"/>,
/// you should test different values for units to ensure
/// that the time in between calls to merge.CheckAborted
/// is up to ~ 1 second.
/// </summary>
public virtual void Work(double units)
{
workCount += units;
if (workCount >= 10000.0)
{
merge.CheckAborted(dir);
workCount = 0;
}
}
/// <summary>
/// If you use this: IW.Dispose(false) cannot abort your merge!
/// <para/>
/// @lucene.internal
/// </summary>
public static readonly CheckAbort NONE = new CheckAbortAnonymousInnerClassHelper();
private class CheckAbortAnonymousInnerClassHelper : CheckAbort
{
public CheckAbortAnonymousInnerClassHelper()
: base(null, null)
{
}
public override void Work(double units)
{
// do nothing
}
}
}
}