Permalink
Browse files

* Fixing bug in indexer which pulled in too many packages

* Making the IndexWriter a singleton
  • Loading branch information...
1 parent 5f635f7 commit 5e750724f8321a01ab6a12b6adecfea7be5382c7 @pranavkm pranavkm committed Aug 21, 2012
@@ -32,47 +32,5 @@ public void CamelCaseTokenizer(string term, IEnumerable<string> tokens)
// Assert
Assert.Equal(tokens.OrderBy(p => p), result.OrderBy(p => p));
}
-
- [Fact]
- public void UpdateIndexCreatesIndexDirectoryIfNotPresent()
- {
- // Arrange
- var indexingService = new Mock<LuceneIndexingService>() { CallBase = true };
- indexingService.Setup(s => s.CreateContext()).Returns<DbContext>(null);
- indexingService.Setup(s => s.GetPackages(null, null)).Returns(new List<PackageIndexEntity> { new PackageIndexEntity() });
- indexingService.Setup(s => s.GetIndexCreationTime()).Returns<DateTime?>(null);
- indexingService.Setup(s => s.GetLastWriteTime()).Returns<DateTime?>(null);
-
- indexingService.Setup(s => s.WriteIndex(true, It.IsAny<List<PackageIndexEntity>>())).Verifiable();
- indexingService.Setup(s => s.UpdateLastWriteTime()).Verifiable();
- indexingService.Setup(s => s.EnsureIndexDirectory()).Verifiable();
-
- // Act
- indexingService.Object.UpdateIndex();
-
- // Assert
- indexingService.Verify();
- }
-
- [Fact]
- public void UpdateIndexRecreatesIndexIfOld()
- {
- // Arrange
- var lastWriteTime = DateTime.UtcNow.AddMinutes(-3);
- var indexingService = new Mock<LuceneIndexingService>() { CallBase = true };
- indexingService.Setup(s => s.CreateContext()).Returns<DbContext>(null);
- indexingService.Setup(s => s.GetPackages(null, lastWriteTime)).Returns(new List<PackageIndexEntity>());
- indexingService.Setup(s => s.GetIndexCreationTime()).Returns(DateTime.UtcNow.AddDays(-3).AddMinutes(-1));
- indexingService.Setup(s => s.GetLastWriteTime()).Returns(lastWriteTime);
-
- indexingService.Setup(s => s.ClearLuceneDirectory()).Verifiable();
- indexingService.Setup(s => s.UpdateLastWriteTime()).Verifiable();
-
- // Act
- indexingService.Object.UpdateIndex();
-
- // Assert
- indexingService.Verify();
- }
}
}
@@ -14,26 +14,33 @@ namespace NuGetGallery
{
public class LuceneIndexingService : IIndexingService
{
- private static readonly TimeSpan indexRecreateTime = TimeSpan.FromDays(3);
+ private static readonly object indexWriterLock = new object();
+ private static readonly TimeSpan indexRecreateInterval = TimeSpan.FromDays(3);
private static readonly char[] idSeparators = new[] { '.', '-' };
+ private static IndexWriter indexWriter;
public void UpdateIndex()
{
- DateTime? createdTime = GetIndexCreationTime();
- if (createdTime.HasValue && (DateTime.UtcNow - createdTime > indexRecreateTime))
+ DateTime? lastWriteTime = GetLastWriteTime();
+ bool creatingIndex = lastWriteTime == null;
+
+ EnsureIndexWriter(creatingIndex);
+
+ if (IndexRequiresRefresh())
{
- ClearLuceneDirectory();
+ indexWriter.DeleteAll();
+ indexWriter.Commit();
+
+ // Reset the lastWriteTime to null. This will allow us to get a fresh copy of all the latest \ latest successful packages
+ lastWriteTime = null;
}
- DateTime? lastWriteTime = GetLastWriteTime();
- bool creatingIndex = lastWriteTime == null;
using (var context = CreateContext())
{
var packages = GetPackages(context, lastWriteTime);
- if (packages.Any())
+ if (packages.Count > 0)
{
- EnsureIndexDirectory();
- WriteIndex(creatingIndex, packages);
+ AddPackages(packages);
}
}
UpdateLastWriteTime();
@@ -64,18 +71,7 @@ protected internal virtual List<PackageIndexEntity> GetPackages(DbContext contex
}
}
- protected internal virtual void WriteIndex(bool creatingIndex, List<PackageIndexEntity> packages)
- {
- using (var directory = new LuceneFileSystem(LuceneCommon.IndexDirectory))
- {
- var analyzer = new StandardAnalyzer(LuceneCommon.LuceneVersion);
- var indexWriter = new IndexWriter(directory, analyzer, create: creatingIndex, mfl: IndexWriter.MaxFieldLength.UNLIMITED);
- AddPackages(indexWriter, packages);
- indexWriter.Close();
- }
- }
-
- private static void AddPackages(IndexWriter indexWriter, List<PackageIndexEntity> packages)
+ private static void AddPackages(List<PackageIndexEntity> packages)
{
foreach (var package in packages)
{
@@ -117,37 +113,45 @@ private static void AddPackages(IndexWriter indexWriter, List<PackageIndexEntity
indexWriter.AddDocument(document);
}
+ indexWriter.Commit();
}
- protected internal virtual void EnsureIndexDirectory()
+ protected static void EnsureIndexWriter(bool creatingIndex)
{
- if (!Directory.Exists(LuceneCommon.IndexDirectory))
+ if (indexWriter == null)
{
- Directory.CreateDirectory(LuceneCommon.IndexDirectory);
+ lock (indexWriterLock)
+ {
+ if (indexWriter == null)
+ {
+ EnsureIndexWriterCore(creatingIndex);
+ }
+ }
}
}
- protected internal virtual DateTime? GetIndexCreationTime()
+ private static void EnsureIndexWriterCore(bool creatingIndex)
{
- if (File.Exists(LuceneCommon.IndexMetadataPath))
+ if (!Directory.Exists(LuceneCommon.IndexDirectory))
{
- var text = File.ReadLines(LuceneCommon.IndexMetadataPath).FirstOrDefault();
- DateTime dateTime;
- if (!String.IsNullOrEmpty(text) && DateTime.TryParseExact(text, "R", CultureInfo.InvariantCulture, DateTimeStyles.None, out dateTime))
- {
- return dateTime;
- }
+ Directory.CreateDirectory(LuceneCommon.IndexDirectory);
}
-
- return null;
+
+ var analyzer = new StandardAnalyzer(LuceneCommon.LuceneVersion);
+ var directoryInfo = new DirectoryInfo(LuceneCommon.IndexDirectory);
+ var directory = new Lucene.Net.Store.SimpleFSDirectory(directoryInfo);
+ indexWriter = new IndexWriter(directory, analyzer, create: creatingIndex, mfl: IndexWriter.MaxFieldLength.UNLIMITED);
}
- protected internal virtual void ClearLuceneDirectory()
+ protected internal bool IndexRequiresRefresh()
{
- if (Directory.Exists(LuceneCommon.IndexDirectory))
+ if (File.Exists(LuceneCommon.IndexMetadataPath))
{
- Directory.Delete(LuceneCommon.IndexDirectory, recursive: true);
+ var creationTime = File.GetCreationTimeUtc(LuceneCommon.IndexMetadataPath);
+ return (DateTime.UtcNow - creationTime) > indexRecreateInterval;
}
+ // If we've never created the index, it needs to be refreshed.
+ return true;
}
protected internal virtual DateTime? GetLastWriteTime()
@@ -163,11 +167,8 @@ protected internal virtual void UpdateLastWriteTime()
{
if (!File.Exists(LuceneCommon.IndexMetadataPath))
{
- if (Directory.Exists(LuceneCommon.IndexMetadataPath))
- {
- // If the directoey exists, then assume that the index has been created.
- File.WriteAllText(LuceneCommon.IndexMetadataPath, DateTime.UtcNow.ToString("R"));
- }
+ // Create the index and add a timestamp to it that specifies the time at which it was created.
+ File.WriteAllBytes(LuceneCommon.IndexMetadataPath, new byte[0]);
}
else
{
@@ -208,7 +209,7 @@ private static IEnumerable<string> CamelCaseTokenize(string term)
// If the remainder is smaller than 2 chars, just return the entire string
i = 0;
}
-
+
yield return term.Substring(i, tokenEnd - i);
tokenEnd = i;
}

0 comments on commit 5e75072

Please sign in to comment.