Permalink
Browse files

* Modifying Lucene to store latest and latest stable packages for

  prerelease filtering
* Fixing bug where using multiple IndexWriters would lock the index
* Improving search perf by adding boosts to fields at index time
* Removing the need to recreate index by updating the index when
  aggregate download counts are updated.
  • Loading branch information...
1 parent 08211b2 commit 3baaa034c17e229002810c7120b447e50c5b81a9 @pranavkm pranavkm committed Jul 14, 2012
@@ -307,7 +307,9 @@ public void TrimsSearchTerm()
{
var fakeIdentity = new Mock<IIdentity>();
var httpContext = new Mock<HttpContextBase>();
- var controller = CreateController(fakeIdentity: fakeIdentity, httpContext: httpContext);
+ var searchService = new Mock<ISearchService>();
+
+ var controller = CreateController(fakeIdentity: fakeIdentity, httpContext: httpContext, searchService: searchService);
var result = controller.ListPackages(" test ") as ViewResult;
@@ -1263,8 +1265,8 @@ public void RedirectsToUploadPageAfterDelete()
private static Mock<ISearchService> CreateSearchService()
{
var searchService = new Mock<ISearchService>();
- searchService.Setup(s => s.Search(It.IsAny<IQueryable<Package>>(), It.IsAny<string>())).Returns((IQueryable<Package> p, string searchTerm) => p);
- searchService.Setup(s => s.SearchWithRelevance(It.IsAny<IQueryable<Package>>(), It.IsAny<string>())).Returns((IQueryable<Package> p, string searchTerm) => p);
+ int total;
+ searchService.Setup(s => s.Search(It.IsAny<IQueryable<Package>>(), It.IsAny<SearchFilter>(), out total)).Returns((IQueryable<Package> p, string searchTerm) => p);
return searchService;
}
@@ -22,7 +22,8 @@ public void V1FeedSearchDoesNotReturnPrereleasePackages()
var configuration = new Mock<IConfiguration>(MockBehavior.Strict);
configuration.Setup(c => c.GetSiteRoot(It.IsAny<bool>())).Returns("https://localhost:8081/");
var searchService = new Mock<ISearchService>(MockBehavior.Strict);
- searchService.Setup(s => s.SearchWithRelevance(It.IsAny<IQueryable<Package>>(), It.IsAny<String>())).Returns<IQueryable<Package>, string>((_, __) => _);
+ int total;
+ searchService.Setup(s => s.Search(It.IsAny<IQueryable<Package>>(), It.IsAny<SearchFilter>(), out total)).Returns<IQueryable<Package>, string>((_, __) => _);
var v1Service = new TestableV1Feed(repo.Object, configuration.Object, searchService.Object);
// Act
@@ -47,7 +48,8 @@ public void V1FeedSearchDoesNotReturnUnlistedPackages()
new Package { PackageRegistration = new PackageRegistration { Id ="baz" }, Version = "2.0", Listed = false, DownloadStatistics = new List<PackageStatistics>() },
}.AsQueryable());
var searchService = new Mock<ISearchService>(MockBehavior.Strict);
- searchService.Setup(s => s.SearchWithRelevance(It.IsAny<IQueryable<Package>>(), It.IsAny<String>())).Returns<IQueryable<Package>, string>((_, __) => _);
+ int total;
+ searchService.Setup(s => s.Search(It.IsAny<IQueryable<Package>>(), It.IsAny<SearchFilter>(), out total)).Returns<IQueryable<Package>, string>((_, __) => _);
var configuration = new Mock<IConfiguration>(MockBehavior.Strict);
configuration.Setup(c => c.GetSiteRoot(It.IsAny<bool>())).Returns("http://test.nuget.org/");
var v1Service = new TestableV1Feed(repo.Object, configuration.Object, searchService.Object);
@@ -75,7 +77,8 @@ public void V2FeedSearchDoesNotReturnPrereleasePackagesIfFlagIsFalse()
new Package { PackageRegistration = packageRegistration, Version = "1.0.1-a", IsPrerelease = true, Listed = true, DownloadStatistics = new List<PackageStatistics>() },
}.AsQueryable());
var searchService = new Mock<ISearchService>(MockBehavior.Strict);
- searchService.Setup(s => s.SearchWithRelevance(It.IsAny<IQueryable<Package>>(), It.IsAny<String>())).Returns<IQueryable<Package>, string>((_, __) => _);
+ int total;
+ searchService.Setup(s => s.Search(It.IsAny<IQueryable<Package>>(), It.IsAny<SearchFilter>(), out total)).Returns<IQueryable<Package>, string>((_, __) => _);
var configuration = new Mock<IConfiguration>(MockBehavior.Strict);
configuration.Setup(c => c.GetSiteRoot(It.IsAny<bool>())).Returns("https://staged.nuget.org/");
var v2Service = new TestableV2Feed(repo.Object, configuration.Object, searchService.Object);
@@ -136,7 +136,6 @@ public virtual ActionResult ListPackages(string q, string sortOrder = null, int
page = 1;
}
-
IQueryable<Package> packageVersions = packageSvc.GetLatestPackageVersions(allowPrerelease: true);
q = (q ?? "").Trim();
@@ -151,20 +150,12 @@ public virtual ActionResult ListPackages(string q, string sortOrder = null, int
int totalHits;
if (!String.IsNullOrEmpty(q))
{
- if (sortOrder.Equals(Constants.RelevanceSortOrder, StringComparison.OrdinalIgnoreCase))
- {
- packageVersions = searchSvc.SearchWithRelevance(packageVersions, q, take: page * Constants.DefaultPackageListPageSize, totalHits: out totalHits);
- if (page == 1 && !packageVersions.Any())
- {
- // In the event the index wasn't updated, we may get an incorrect count.
- totalHits = 0;
- }
- }
- else
+ var searchFilter = GetSearchFilter(q, sortOrder, page);
+ packageVersions = searchSvc.Search(packageVersions, searchFilter, out totalHits);
+ if (page == 1 && !packageVersions.Any())
{
- packageVersions = searchSvc.Search(packageVersions, q)
- .SortBy(GetSortExpression(sortOrder));
- totalHits = packageVersions.Count();
+ // In the event the index wasn't updated, we may get an incorrect count.
+ totalHits = 0;
}
}
else
@@ -186,20 +177,6 @@ public virtual ActionResult ListPackages(string q, string sortOrder = null, int
return View(viewModel);
}
- private static string GetSortExpression(string sortOrder)
- {
- switch (sortOrder)
- {
- case Constants.AlphabeticSortOrder:
- return "PackageRegistration.Id";
- case Constants.RecentSortOrder:
- return "Published desc";
- case Constants.PopularitySortOrder:
- default:
- return "PackageRegistration.DownloadCount desc";
- }
- }
-
// NOTE: Intentionally NOT requiring authentication
public virtual ActionResult ReportAbuse(string id, string version)
{
@@ -543,5 +520,47 @@ protected internal virtual IPackage ReadNuGetPackage(Stream stream)
{
return new ZipPackage(stream);
}
+
+ private SearchFilter GetSearchFilter(string q, string sortOrder, int page)
+ {
+ var searchFilter = new SearchFilter
+ {
+ SearchTerm = q,
+ Skip = (page - 1) * Constants.DefaultPackageListPageSize, // pages are 1-based.
+ Take = Constants.DefaultPackageListPageSize
+ };
+
+ switch (sortOrder)
+ {
+ case Constants.AlphabeticSortOrder:
+ searchFilter.SortProperty = SortProperty.DisplayName;
+ searchFilter.SortDirection = SortDirection.Ascending;
+ break;
+ case Constants.RecentSortOrder:
+ searchFilter.SortProperty = SortProperty.Recent;
+ break;
+ case Constants.PopularitySortOrder:
+ searchFilter.SortProperty = SortProperty.DownloadCount;
+ break;
+ default:
+ searchFilter.SortProperty = SortProperty.Relevance;
+ break;
+ }
+ return searchFilter;
+ }
+
+ private static string GetSortExpression(string sortOrder)
+ {
+ switch (sortOrder)
+ {
+ case Constants.AlphabeticSortOrder:
+ return "PackageRegistration.Id";
+ case Constants.RecentSortOrder:
+ return "Published desc";
+ case Constants.PopularitySortOrder:
+ default:
+ return "PackageRegistration.DownloadCount desc";
+ }
+ }
}
}
@@ -70,8 +70,8 @@ public IQueryable<V1FeedPackage> Search(string searchTerm, string targetFramewor
{
return packages.ToV1FeedPackageQuery(Configuration.GetSiteRoot(UseHttps()));
}
- return SearchService.Search(packages, searchTerm)
- .ToV1FeedPackageQuery(Configuration.GetSiteRoot(UseHttps()));
+ return packages.Search(searchTerm)
+ .ToV1FeedPackageQuery(Configuration.GetSiteRoot(UseHttps()));
}
}
}
@@ -5,10 +5,10 @@
using System.Globalization;
using System.IO;
using System.Linq;
+using System.Threading.Tasks;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Documents;
using Lucene.Net.Index;
-using Lucene.Net.Search;
namespace NuGetGallery
{
@@ -54,69 +54,92 @@ protected internal virtual DbContext CreateContext()
return new EntitiesContext();
}
- protected internal virtual List<PackageIndexEntity> GetPackages(DbContext context, DateTime? dateTime)
+ protected internal virtual List<PackageIndexEntity> GetPackages(DbContext context, DateTime? lastIndexTime)
{
- if (dateTime == null)
+ string sql = @"SELECT p.[Key], pr.Id, p.Title, p.Description, p.Tags, p.FlattenedAuthors as Authors, pr.DownloadCount,
+ p.IsLatestStable, p.IsLatest, p.Published
+ FROM Packages p JOIN PackageRegistrations pr on p.PackageRegistrationKey = pr.[Key]";
+
+ object[] parameters;
+
+ if (lastIndexTime == null)
{
- // If we're creating the index for the first time, fetch the new packages.
- string sql = @"Select p.[Key], pr.Id, p.Title, p.Description, p.Tags, p.FlattenedAuthors as Authors, pr.DownloadCount, p.[Key] as LatestKey
- from Packages p join PackageRegistrations pr on p.PackageRegistrationKey = pr.[Key]
- where p.IsLatestStable = 1 or (p.IsLatest = 1 and Not exists (Select 1 from Packages iP where iP.PackageRegistrationKey = p.PackageRegistrationKey and iP.IsLatestStable = 1))";
- return context.Database.SqlQuery<PackageIndexEntity>(sql).ToList();
+ // First time creation. Only pull the latest packages.
+ sql += " WHERE ((p.IsLatest = 1) or (p.IsLatestStable = 1)) ";
+ parameters = new object[0];
}
else
{
- string sql = @"Select p.[Key], pr.Id, p.Title, p.Description, p.Tags, p.FlattenedAuthors as Authors, pr.DownloadCount,
- LatestKey = CASE When p.IsLatest = 1 then p.[Key] Else (Select pLatest.[Key] from Packages pLatest where pLatest.PackageRegistrationKey = pr.[Key] and pLatest.IsLatest = 1) End
- from Packages p join PackageRegistrations pr on p.PackageRegistrationKey = pr.[Key]
- where p.LastUpdated > @UpdatedDate";
- return context.Database.SqlQuery<PackageIndexEntity>(sql, new SqlParameter("UpdatedDate", dateTime.Value)).ToList();
+ sql += " WHERE p.LastUpdated > @UpdatedDate";
+ parameters = new[] { new SqlParameter("UpdatedDate", lastIndexTime.Value) };
}
+ return context.Database.SqlQuery<PackageIndexEntity>(sql, parameters).ToList();
}
private static void AddPackages(List<PackageIndexEntity> packages)
{
- foreach (var package in packages)
- {
- if (package.Key != package.LatestKey)
- {
- indexWriter.DeleteDocuments(new TermQuery(new Term("Key", package.Key.ToString(CultureInfo.InvariantCulture))));
- continue;
- }
+ var packagesToDelete = from package in packages
+ where !(package.IsLatest || package.IsLatestStable)
+ select new Term("Key", package.Key.ToString(CultureInfo.InvariantCulture));
+ indexWriter.DeleteDocuments(packagesToDelete.ToArray());
- // If there's an older entry for this package, remove it.
- var document = new Document();
+ // As per http://stackoverflow.com/a/3894582. The IndexWriter is CPU bound, so we can try and write multiple packages in parallel.
+ var packagesToUpdate = from package in packages
+ where package.IsLatest || package.IsLatestStable
+ select package;
+ // The IndexWriter is thread safe and is primarily CPU-bound.
+ Parallel.ForEach(packagesToUpdate, UpdatePackage);
- document.Add(new Field("Key", package.Key.ToString(CultureInfo.InvariantCulture), Field.Store.YES, Field.Index.NO));
- document.Add(new Field("Id-Exact", package.Id.ToLowerInvariant(), Field.Store.NO, Field.Index.NOT_ANALYZED));
+ indexWriter.Commit();
+ }
- document.Add(new Field("Description", package.Description, Field.Store.NO, Field.Index.ANALYZED));
+ private static void UpdatePackage(PackageIndexEntity package)
+ {
+ string key = package.Key.ToString(CultureInfo.InvariantCulture);
+ var document = new Document();
- var tokenizedId = TokenizeId(package.Id);
- foreach (var idToken in tokenizedId)
- {
- document.Add(new Field("Id", idToken, Field.Store.NO, Field.Index.ANALYZED));
- }
+ var field = new Field("Id-Exact", package.Id.ToLowerInvariant(), Field.Store.NO, Field.Index.NOT_ANALYZED);
+ field.SetBoost(2.5f);
+ document.Add(field);
- // If an element does not have a Title, then add all the tokenized Id components as Title.
- // Lucene's StandardTokenizer does not tokenize items of the format a.b.c which does not play well with things like "xunit.net".
- // We will feed it values that are already tokenized.
- var titleTokens = String.IsNullOrEmpty(package.Title) ? tokenizedId : package.Title.Split(idSeparators, StringSplitOptions.RemoveEmptyEntries);
- foreach (var idToken in titleTokens)
- {
- document.Add(new Field("Title", idToken, Field.Store.NO, Field.Index.ANALYZED));
- }
+ field = new Field("Description", package.Description, Field.Store.NO, Field.Index.ANALYZED);
+ field.SetBoost(0.1f);
+ document.Add(field);
- if (!String.IsNullOrEmpty(package.Tags))
- {
- document.Add(new Field("Tags", package.Tags, Field.Store.NO, Field.Index.ANALYZED));
- }
- document.Add(new Field("Author", package.Authors, Field.Store.NO, Field.Index.ANALYZED));
- document.Add(new Field("DownloadCount", package.DownloadCount.ToString(CultureInfo.InvariantCulture), Field.Store.NO, Field.Index.ANALYZED_NO_NORMS));
+ var tokenizedId = TokenizeId(package.Id);
+ foreach (var idToken in tokenizedId)
+ {
+ field = new Field("Id", idToken, Field.Store.NO, Field.Index.ANALYZED);
+ field.SetBoost(1.2f);
+ document.Add(field);
+ }
- indexWriter.AddDocument(document);
+ // If an element does not have a Title, then add all the tokenized Id components as Title.
+ // Lucene's StandardTokenizer does not tokenize items of the format a.b.c which does not play well with things like "xunit.net".
+ // We will feed it values that are already tokenized.
+ var titleTokens = String.IsNullOrEmpty(package.Title) ? tokenizedId : package.Title.Split(idSeparators, StringSplitOptions.RemoveEmptyEntries);
+ foreach (var idToken in titleTokens)
+ {
+ document.Add(new Field("Title", idToken, Field.Store.NO, Field.Index.ANALYZED));
}
- indexWriter.Commit();
+
+ if (!String.IsNullOrEmpty(package.Tags))
+ {
+ field = new Field("Tags", package.Tags, Field.Store.NO, Field.Index.ANALYZED);
+ field.SetBoost(0.8f);
+ document.Add(field);
+ }
+ document.Add(new Field("Author", package.Authors, Field.Store.NO, Field.Index.ANALYZED));
+
+ // Fields meant for filtering and sorting
+ document.Add(new Field("Key", key, Field.Store.YES, Field.Index.NO));
+ document.Add(new Field("IsLatestStable", package.IsLatestStable.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED));
+ document.Add(new Field("PublishedDate", package.Published.Ticks.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED));
+ document.Add(new Field("DownloadCount", package.DownloadCount.ToString(CultureInfo.InvariantCulture), Field.Store.NO, Field.Index.NOT_ANALYZED));
+ string displayName = String.IsNullOrEmpty(package.Title) ? package.Id : package.Title;
+ document.Add(new Field("DisplayName", displayName.ToLower(CultureInfo.CurrentCulture), Field.Store.NO, Field.Index.NOT_ANALYZED));
+
+ indexWriter.UpdateDocument(new Term("Key", key), document);
}
protected static void EnsureIndexWriter(bool creatingIndex)
Oops, something went wrong.

0 comments on commit 3baaa03

Please sign in to comment.