forked from apache/lucenenet
-
Notifications
You must be signed in to change notification settings - Fork 3
/
FSDirectory.cs
564 lines (514 loc) · 22.4 KB
/
FSDirectory.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
using Lucene.Net.Support.IO;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.IO;
using System.Linq;// Used only for WRITE_LOCK_NAME in deprecated create=true case:
namespace Lucene.Net.Store
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using Constants = Lucene.Net.Util.Constants;
using IOUtils = Lucene.Net.Util.IOUtils;
/// <summary>
/// Base class for <see cref="Directory"/> implementations that store index
/// files in the file system.
/// <para/>
/// There are currently three core
/// subclasses:
///
/// <list type="bullet">
///
/// <item><description> <see cref="SimpleFSDirectory"/> is a straightforward
/// implementation using <see cref="System.IO.FileStream"/>.
/// However, it has poor concurrent performance
/// (multiple threads will bottleneck) as it
/// synchronizes when multiple threads read from the
/// same file.</description></item>
///
/// <item><description> <see cref="NIOFSDirectory"/> uses java.nio's
/// FileChannel's positional io when reading to avoid
/// synchronization when reading from the same file.
/// Unfortunately, due to a Windows-only <a
/// href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6265734">Sun
/// JRE bug</a> this is a poor choice for Windows, but
/// on all other platforms this is the preferred
/// choice. Applications using <see cref="System.Threading.Thread.Interrupt()"/> or
/// <see cref="System.Threading.Tasks.Task{TResult}"/> should use
/// <see cref="SimpleFSDirectory"/> instead. See <see cref="NIOFSDirectory"/> java doc
/// for details.</description></item>
///
/// <item><description> <see cref="MMapDirectory"/> uses memory-mapped IO when
/// reading. This is a good choice if you have plenty
/// of virtual memory relative to your index size, eg
/// if you are running on a 64 bit runtime, or you are
/// running on a 32 bit runtime but your index sizes are
/// small enough to fit into the virtual memory space.
/// <para/>
/// Applications using <see cref="System.Threading.Thread.Interrupt()"/> or
/// <see cref="System.Threading.Tasks.Task"/> should use
/// <see cref="SimpleFSDirectory"/> instead. See <see cref="MMapDirectory"/>
/// doc for details.</description></item>
/// </list>
///
/// Unfortunately, because of system peculiarities, there is
/// no single overall best implementation. Therefore, we've
/// added the <see cref="Open(string)"/> method (or one of its overloads), to allow Lucene to choose
/// the best <see cref="FSDirectory"/> implementation given your
/// environment, and the known limitations of each
/// implementation. For users who have no reason to prefer a
/// specific implementation, it's best to simply use
/// <see cref="Open(string)"/> (or one of its overloads). For all others, you should instantiate the
/// desired implementation directly.
///
/// <para/>The locking implementation is by default
/// <see cref="NativeFSLockFactory"/>, but can be changed by
/// passing in a custom <see cref="LockFactory"/> instance.
/// </summary>
/// <seealso cref="Directory"/>
public abstract class FSDirectory : BaseDirectory
{
/// <summary>
/// Default read chunk size: 8192 bytes (this is the size up to which the runtime
/// does not allocate additional arrays while reading/writing) </summary>
[Obsolete("this constant is no longer used since Lucene 4.5.")]
public const int DEFAULT_READ_CHUNK_SIZE = 8192;
protected readonly DirectoryInfo m_directory; // The underlying filesystem directory
// LUCENENET specific: No such thing as "stale files" in .NET, since Flush(true) writes everything to disk before
// our FileStream is disposed.
//protected readonly ISet<string> m_staleFiles = new ConcurrentHashSet<string>(); // Files written, but not yet sync'ed
#pragma warning disable 612, 618
private int chunkSize = DEFAULT_READ_CHUNK_SIZE;
#pragma warning restore 612, 618
protected FSDirectory(DirectoryInfo dir)
: this(dir, null)
{
}
/// <summary>
/// Create a new <see cref="FSDirectory"/> for the named location (ctor for subclasses). </summary>
/// <param name="path"> the path of the directory </param>
/// <param name="lockFactory"> the lock factory to use, or null for the default
/// (<seealso cref="NativeFSLockFactory"/>); </param>
/// <exception cref="IOException"> if there is a low-level I/O error </exception>
protected internal FSDirectory(DirectoryInfo path, LockFactory lockFactory)
{
// new ctors use always NativeFSLockFactory as default:
if (lockFactory == null)
{
lockFactory = new NativeFSLockFactory();
}
m_directory = new DirectoryInfo(path.GetCanonicalPath());
if (File.Exists(path.FullName))
{
throw new DirectoryNotFoundException("file '" + path.FullName + "' exists but is not a directory");
}
SetLockFactory(lockFactory);
}
/// <summary>
/// Creates an <see cref="FSDirectory"/> instance, trying to pick the
/// best implementation given the current environment.
/// The directory returned uses the <see cref="NativeFSLockFactory"/>.
///
/// <para/>Currently this returns <see cref="MMapDirectory"/> for most Solaris
/// and Windows 64-bit runtimes, <see cref="NIOFSDirectory"/> for other
/// non-Windows runtimes, and <see cref="SimpleFSDirectory"/> for other
/// runtimes on Windows. It is highly recommended that you consult the
/// implementation's documentation for your platform before
/// using this method.
///
/// <para/><b>NOTE</b>: this method may suddenly change which
/// implementation is returned from release to release, in
/// the event that higher performance defaults become
/// possible; if the precise implementation is important to
/// your application, please instantiate it directly,
/// instead. For optimal performance you should consider using
/// <see cref="MMapDirectory"/> on 64 bit runtimes.
///
/// <para/>See <see cref="FSDirectory"/>.
/// </summary>
public static FSDirectory Open(DirectoryInfo path)
{
return Open(path, null);
}
/// <summary>
/// Just like <see cref="Open(DirectoryInfo)"/>, but
/// allows you to specify the directory as a <see cref="string"/>.
/// </summary>
/// <param name="path">The path (to a directory) to open</param>
/// <returns>An open <see cref="FSDirectory"/></returns>
public static FSDirectory Open(string path) // LUCENENET specific overload for ease of use with .NET
{
return Open(new DirectoryInfo(path), null);
}
/// <summary>
/// Just like <see cref="Open(DirectoryInfo)"/>, but allows you to
/// also specify a custom <see cref="LockFactory"/>.
/// </summary>
public static FSDirectory Open(DirectoryInfo path, LockFactory lockFactory)
{
if ((Constants.WINDOWS || Constants.SUN_OS || Constants.LINUX) && Constants.RUNTIME_IS_64BIT /*&&
MMapDirectory.UNMAP_SUPPORTED*/) // LUCENENET specific - unmap hack not needed
{
return new MMapDirectory(path, lockFactory);
}
else if (Constants.WINDOWS)
{
return new SimpleFSDirectory(path, lockFactory);
}
else
{
return new NIOFSDirectory(path, lockFactory);
}
}
/// <summary>
/// Just like <see cref="Open(DirectoryInfo, LockFactory)"/>, but
/// allows you to specify the directory as a <see cref="string"/>.
/// </summary>
/// <param name="path">The path (to a directory) to open</param>
/// <param name="lockFactory"></param>
/// <returns>An open <see cref="FSDirectory"/></returns>
public static FSDirectory Open(string path, LockFactory lockFactory) // LUCENENET specific overload for ease of use with .NET
{
return Open(new DirectoryInfo(path), lockFactory);
}
public override void SetLockFactory(LockFactory lockFactory)
{
base.SetLockFactory(lockFactory);
// for filesystem based LockFactory, delete the lockPrefix, if the locks are placed
// in index dir. If no index dir is given, set ourselves
if (lockFactory is FSLockFactory)
{
FSLockFactory lf = (FSLockFactory)lockFactory;
DirectoryInfo dir = lf.LockDir;
// if the lock factory has no lockDir set, use the this directory as lockDir
if (dir == null)
{
lf.SetLockDir(m_directory);
lf.LockPrefix = null;
}
else if (dir.GetCanonicalPath().Equals(m_directory.GetCanonicalPath(), StringComparison.Ordinal))
{
lf.LockPrefix = null;
}
}
}
/// <summary>
/// Lists all files (not subdirectories) in the
/// directory. This method never returns <c>null</c> (throws
/// <seealso cref="IOException"/> instead).
/// </summary>
/// <exception cref="DirectoryNotFoundException"> if the directory
/// does not exist, or does exist but is not a
/// directory or is invalid (for example, it is on an unmapped drive). </exception>
/// <exception cref="System.Security.SecurityException">The caller does not have the required permission.</exception>
public static string[] ListAll(DirectoryInfo dir)
{
if (!System.IO.Directory.Exists(dir.FullName))
{
throw new DirectoryNotFoundException("directory '" + dir + "' does not exist");
}
else if (File.Exists(dir.FullName))
{
throw new DirectoryNotFoundException("file '" + dir + "' exists but is not a directory");
}
// Exclude subdirs
FileInfo[] files = dir.EnumerateFiles().ToArray();
string[] result = new string[files.Length];
for (int i = 0; i < files.Length; i++)
{
result[i] = files[i].Name;
}
// LUCENENET NOTE: this can never happen in .NET
//if (result == null)
//{
// throw new System.IO.IOException("directory '" + dir + "' exists and is a directory, but cannot be listed: list() returned null");
//}
return result;
}
/// <summary>
/// Lists all files (not subdirectories) in the
/// directory. </summary>
/// <seealso cref="ListAll(DirectoryInfo)"/>
public override string[] ListAll()
{
EnsureOpen();
return ListAll(m_directory);
}
/// <summary>
/// Returns true iff a file with the given name exists. </summary>
[Obsolete("this method will be removed in 5.0")]
public override bool FileExists(string name)
{
EnsureOpen();
return File.Exists(Path.Combine(m_directory.FullName, name));
}
/// <summary>
/// Returns the length in bytes of a file in the directory. </summary>
public override long FileLength(string name)
{
EnsureOpen();
FileInfo file = new FileInfo(Path.Combine(m_directory.FullName, name));
long len = file.Length;
if (len == 0 && !file.Exists)
{
throw new FileNotFoundException(name);
}
else
{
return len;
}
}
/// <summary>
/// Removes an existing file in the directory. </summary>
public override void DeleteFile(string name)
{
EnsureOpen();
FileInfo file = new FileInfo(Path.Combine(m_directory.FullName, name));
// LUCENENET specific: We need to explicitly throw when the file has already been deleted,
// since FileInfo doesn't do that for us.
// (An enhancement carried over from Lucene 8.2.0)
if (!File.Exists(file.FullName))
{
throw new FileNotFoundException("Cannot delete " + file + " because it doesn't exist.");
}
try
{
file.Delete();
if (File.Exists(file.FullName))
{
throw new IOException("Cannot delete " + file);
}
}
catch (Exception e)
{
throw new IOException("Cannot delete " + file, e);
}
// LUCENENET specific: No such thing as "stale files" in .NET, since Flush(true) writes everything to disk before
// our FileStream is disposed.
//m_staleFiles.Remove(name);
}
/// <summary>
/// Creates an <see cref="IndexOutput"/> for the file with the given name. </summary>
public override IndexOutput CreateOutput(string name, IOContext context)
{
EnsureOpen();
EnsureCanWrite(name);
return new FSIndexOutput(this, name);
}
protected virtual void EnsureCanWrite(string name)
{
if (!m_directory.Exists)
{
try
{
m_directory.Create();
}
catch
{
throw new IOException("Cannot create directory: " + m_directory);
}
}
FileInfo file = new FileInfo(Path.Combine(m_directory.FullName, name));
if (file.Exists) // delete existing, if any
{
try
{
file.Delete();
}
catch
{
throw new IOException("Cannot overwrite: " + file);
}
}
}
protected virtual void OnIndexOutputClosed(FSIndexOutput io)
{
// LUCENENET specific: No such thing as "stale files" in .NET, since Flush(true) writes everything to disk before
// our FileStream is disposed.
//m_staleFiles.Add(io.name);
}
public override void Sync(ICollection<string> names)
{
EnsureOpen();
// LUCENENET specific: No such thing as "stale files" in .NET, since Flush(true) writes everything to disk before
// our FileStream is disposed. Therefore, there is nothing else to do in this method.
//ISet<string> toSync = new HashSet<string>(names);
//toSync.IntersectWith(m_staleFiles);
//// LUCENENET specific: Fsync breaks concurrency here.
//// Part of a solution suggested by Vincent Van Den Berghe: http://apache.markmail.org/message/hafnuhq2ydhfjmi2
////foreach (var name in toSync)
////{
//// Fsync(name);
////}
//// fsync the directory itsself, but only if there was any file fsynced before
//// (otherwise it can happen that the directory does not yet exist)!
//if (toSync.Count > 0)
//{
// IOUtils.Fsync(m_directory.FullName, true);
//}
//m_staleFiles.ExceptWith(toSync);
}
public override string GetLockID()
{
EnsureOpen();
string dirName; // name to be hashed
try
{
dirName = m_directory.GetCanonicalPath();
}
catch (IOException e)
{
throw new Exception(e.ToString(), e);
}
int digest = 0;
for (int charIDX = 0; charIDX < dirName.Length; charIDX++)
{
char ch = dirName[charIDX];
digest = 31*digest + ch;
}
return "lucene-" + digest.ToString("x", CultureInfo.InvariantCulture);
}
/// <summary>
/// Closes the store to future operations. </summary>
protected override void Dispose(bool disposing)
{
if (disposing)
{
IsOpen = false;
}
}
/// <summary> the underlying filesystem directory </summary>
public virtual DirectoryInfo Directory
{
get
{
EnsureOpen();
return m_directory;
}
}
/// <summary>
/// For debug output. </summary>
public override string ToString()
{
return this.GetType().Name + "@" + m_directory + " lockFactory=" + LockFactory;
}
/// <summary>
/// this setting has no effect anymore. </summary>
[Obsolete("this is no longer used since Lucene 4.5.")]
public int ReadChunkSize
{
set
{
if (value <= 0)
{
throw new System.ArgumentException("chunkSize must be positive");
}
this.chunkSize = value;
}
get { return chunkSize; }
}
/// <summary>
/// Writes output with <see cref="FileStream.Write(byte[], int, int)"/>
/// </summary>
protected class FSIndexOutput : BufferedIndexOutput
{
// LUCENENET specific: chunk size not needed
///// <summary>
///// The maximum chunk size is 8192 bytes, because <seealso cref="RandomAccessFile"/> mallocs
///// a native buffer outside of stack if the write buffer size is larger.
///// </summary>
//private const int CHUNK_SIZE = 8192;
private readonly FSDirectory parent;
internal readonly string name;
private readonly FileStream file;
private volatile bool isOpen; // remember if the file is open, so that we don't try to close it more than once
public FSIndexOutput(FSDirectory parent, string name)
: base(/*CHUNK_SIZE*/)
{
this.parent = parent;
this.name = name;
file = new FileStream(Path.Combine(parent.m_directory.FullName, name), FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.ReadWrite);
isOpen = true;
}
protected internal override void FlushBuffer(byte[] b, int offset, int size)
{
//Debug.Assert(isOpen);
//while (size > 0)
//{
// int toWrite = Math.Min(CHUNK_SIZE, size);
// file.Write(b, offset, toWrite);
// offset += toWrite;
// size -= toWrite;
//}
// LUCENENET specific: FileStream is already optimized to write natively
// if over the buffer size that is passed through its constructor. So,
// all we need to do is Write().
file.Write(b, offset, size);
//Debug.Assert(size == 0);
}
protected override void Dispose(bool disposing)
{
if (disposing)
{
parent.OnIndexOutputClosed(this);
// only close the file if it has not been closed yet
if (isOpen)
{
IOException priorE = null;
try
{
base.Dispose(disposing);
// LUCENENET specific - file.Flush(flushToDisk: true) required in .NET for concurrency
// Part of a solution suggested by Vincent Van Den Berghe: http://apache.markmail.org/message/hafnuhq2ydhfjmi2
file.Flush(flushToDisk: true);
}
catch (IOException ioe)
{
priorE = ioe;
}
finally
{
isOpen = false;
IOUtils.DisposeWhileHandlingException(priorE, file);
}
}
}
}
/// <summary>
/// Random-access methods </summary>
[Obsolete("(4.1) this method will be removed in Lucene 5.0")]
public override void Seek(long pos)
{
base.Seek(pos);
file.Seek(pos, SeekOrigin.Begin);
}
public override long Length
{
get { return file.Length; }
}
// LUCENENET NOTE: FileStream doesn't have a way to set length
}
// LUCENENET specific: Fsync is pointless in .NET, since we are
// calling FileStream.Flush(true) before the stream is disposed
// which means we never need it at the point in Java where it is called.
//protected virtual void Fsync(string name)
//{
// IOUtils.Fsync(Path.Combine(m_directory.FullName, name), false);
//}
}
}