Permalink
Browse files

Prevent duplication of entries in the index

Prior to updating a package, remove corresponding package registration
entries from the index to ensure we can safely add to it.
Fixes bug #593
  • Loading branch information...
1 parent 40e0762 commit 11bf2ee1236a3508abf4b42dea84f2500f976789 @pranavkm pranavkm committed Sep 18, 2012
@@ -54,54 +54,55 @@ public void UpdateIndex()
var packages = GetPackages(_entitiesContext, lastWriteTime);
if (packages.Count > 0)
{
- AddPackages(packages);
+ AddPackages(packages, creatingIndex: lastWriteTime == null);
}
}
UpdateLastWriteTime();
}
protected internal virtual List<PackageIndexEntity> GetPackages(DbContext context, DateTime? lastIndexTime)
{
- string sql = @"SELECT p.[Key], pr.Id, p.Title, p.Description, p.Tags, p.FlattenedAuthors as Authors, pr.DownloadCount,
+ string sql = @"SELECT p.[Key], p.PackageRegistrationKey, pr.Id, p.Title, p.Description, p.Tags, p.FlattenedAuthors as Authors, pr.DownloadCount,
p.IsLatestStable, p.IsLatest, p.Published
- FROM Packages p JOIN PackageRegistrations pr on p.PackageRegistrationKey = pr.[Key]";
+ FROM Packages p JOIN PackageRegistrations pr on p.PackageRegistrationKey = pr.[Key]
+ WHERE ((p.IsLatest = 1) or (p.IsLatestStable = 1)) ";
object[] parameters;
-
if (lastIndexTime == null)
{
- // First time creation. Only pull the latest packages.
- sql += " WHERE ((p.IsLatest = 1) or (p.IsLatestStable = 1)) ";
+ // First time creation. Pull latest packages without filtering
parameters = new object[0];
}
else
{
- sql += " WHERE p.LastUpdated > @UpdatedDate";
+ // Retrieve the Latest and LatestStable version of packages if any package for that registration changed since we last updated the index.
+ // We need to do this because some attributes that we index such as DownloadCount are values in the PackageRegistration table that may
+ // update independent of the package.
+ sql += " AND Exists (Select 1 from dbo.Packages iP where iP.LastUpdated > @UpdatedDate and iP.PackageRegistrationKey = p.PackageRegistrationKey) ";
parameters = new[] { new SqlParameter("UpdatedDate", lastIndexTime.Value) };
}
return context.Database.SqlQuery<PackageIndexEntity>(sql, parameters).ToList();
}
- private static void AddPackages(List<PackageIndexEntity> packages)
+ private static void AddPackages(List<PackageIndexEntity> packages, bool creatingIndex)
{
- var packagesToDelete = from package in packages
- where !(package.IsLatest || package.IsLatestStable)
- select new Term("Key", package.Key.ToString(CultureInfo.InvariantCulture));
- indexWriter.DeleteDocuments(packagesToDelete.ToArray());
+ if (!creatingIndex)
+ {
+ // If this is not the first time we're creating the index, clear clear any package registrations for packages we are going to updating.
+ var packagesToDelete = from packageRegistrationKey in packages.Select(p => p.PackageRegistrationKey).Distinct()
+ select new Term("PackageRegistrationKey", packageRegistrationKey.ToString(CultureInfo.InvariantCulture));
+ indexWriter.DeleteDocuments(packagesToDelete.ToArray());
+ }
// As per http://stackoverflow.com/a/3894582. The IndexWriter is CPU bound, so we can try and write multiple packages in parallel.
- var packagesToUpdate = from package in packages
- where package.IsLatest || package.IsLatestStable
- select package;
// The IndexWriter is thread safe and is primarily CPU-bound.
- Parallel.ForEach(packagesToUpdate, UpdatePackage);
+ Parallel.ForEach(packages, AddPackage);
indexWriter.Commit();
}
- private static void UpdatePackage(PackageIndexEntity package)
+ private static void AddPackage(PackageIndexEntity package)
{
- string key = package.Key.ToString(CultureInfo.InvariantCulture);
var document = new Document();
var field = new Field("Id-Exact", package.Id.ToLowerInvariant(), Field.Store.NO, Field.Index.NOT_ANALYZED);
@@ -139,15 +140,16 @@ private static void UpdatePackage(PackageIndexEntity package)
document.Add(new Field("Author", package.Authors, Field.Store.NO, Field.Index.ANALYZED));
// Fields meant for filtering and sorting
- document.Add(new Field("Key", key, Field.Store.YES, Field.Index.NO));
+ document.Add(new Field("Key", package.Key.ToString(CultureInfo.InvariantCulture), Field.Store.YES, Field.Index.NO));
+ document.Add(new Field("PackageRegistrationKey", package.PackageRegistrationKey.ToString(CultureInfo.InvariantCulture), Field.Store.NO, Field.Index.NOT_ANALYZED));
document.Add(new Field("IsLatest", package.IsLatest.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED));
document.Add(new Field("IsLatestStable", package.IsLatestStable.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED));
document.Add(new Field("PublishedDate", package.Published.Ticks.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED));
document.Add(new Field("DownloadCount", package.DownloadCount.ToString(CultureInfo.InvariantCulture), Field.Store.NO, Field.Index.NOT_ANALYZED));
string displayName = String.IsNullOrEmpty(package.Title) ? package.Id : package.Title;
document.Add(new Field("DisplayName", displayName.ToLower(CultureInfo.CurrentCulture), Field.Store.NO, Field.Index.NOT_ANALYZED));
- indexWriter.UpdateDocument(new Term("Key", key), document);
+ indexWriter.AddDocument(document);
}
protected static void EnsureIndexWriter(bool creatingIndex)
@@ -5,6 +5,8 @@ public class PackageIndexEntity
{
public int Key { get; set; }
+ public int PackageRegistrationKey { get; set; }
+
public string Id { get; set; }
public string Title { get; set; }
@@ -112,7 +112,7 @@ public virtual Package FindPackageByIdAndVersion(string id, string version, bool
.Include(p => p.Authors)
.Include(p => p.PackageRegistration)
.Where(p => (p.PackageRegistration.Id == id));
- if (String.IsNullOrEmpty(version) && !allowPrerelease)
+ if (String.IsNullOrEmpty(version) && !allowPrerelease)
{
// If there's a specific version given, don't bother filtering by prerelease. You could be asking for a prerelease package.
packagesQuery = packagesQuery.Where(p => !p.IsPrerelease);
@@ -154,7 +154,7 @@ public IQueryable<Package> GetPackagesForListing(bool includePrerelease)
.Include(x => x.PackageRegistration.Owners)
.Where(p => p.Listed);
- return includePrerelease ? packages.Where(p => p.IsLatest) :
+ return includePrerelease ? packages.Where(p => p.IsLatest) :
packages.Where(p => p.IsLatestStable);
}
@@ -287,16 +287,16 @@ Package CreatePackageFromNuGetPackage(PackageRegistration packageRegistration, I
var supportedFrameworks = GetSupportedFrameworks(nugetPackage).Select(fn => fn.ToShortNameOrNull()).ToArray();
if (!supportedFrameworks.AnySafe(sf => sf == null))
- foreach(var supportedFramework in supportedFrameworks)
- package.SupportedFrameworks.Add(new PackageFramework{ TargetFramework = supportedFramework });
+ foreach (var supportedFramework in supportedFrameworks)
+ package.SupportedFrameworks.Add(new PackageFramework { TargetFramework = supportedFramework });
- foreach(var dependencySet in nugetPackage.DependencySets)
+ foreach (var dependencySet in nugetPackage.DependencySets)
{
if (dependencySet.Dependencies.Count == 0)
package.Dependencies.Add(new PackageDependency
{
- Id = null,
- VersionSpec = null,
+ Id = null,
+ VersionSpec = null,
TargetFramework = dependencySet.TargetFramework.ToShortNameOrNull()
});
else
@@ -358,12 +358,12 @@ static void ValidateNuGetPackage(IPackage nugetPackage)
{
if (dependency.Id != null && dependency.Id.Length > 128)
{
- throw new EntityException(Strings.NuGetPackagePropertyTooLong, "Dependency.Id", "128");
+ throw new EntityException(Strings.NuGetPackagePropertyTooLong, "Dependency.Id", "128");
}
if (dependency.VersionSpec != null && dependency.VersionSpec.ToString().Length > 256)
{
- throw new EntityException(Strings.NuGetPackagePropertyTooLong, "Dependency.VersionSpec", "256");
+ throw new EntityException(Strings.NuGetPackagePropertyTooLong, "Dependency.VersionSpec", "256");
}
}
@@ -378,12 +378,12 @@ private static void UpdateIsLatest(PackageRegistration packageRegistration)
return;
}
- // TODO: improve setting the latest bit; this is horrible. Trigger maybe?
- // NOTE: EF is suprisingly smart about doing this. It doesn't issue queries for the vast majority of packages that did not have either flags changed.
- foreach (var pv in packageRegistration.Packages)
+ // TODO: improve setting the latest bit; this is horrible. Trigger maybe?
+ foreach (var pv in packageRegistration.Packages.Where(p => p.IsLatest || p.IsLatestStable))
{
pv.IsLatest = false;
pv.IsLatestStable = false;
+ pv.LastUpdated = DateTime.UtcNow;
}
// If the last listed package was just unlisted, then we won't find another one

0 comments on commit 11bf2ee

Please sign in to comment.