Skip to content
This repository
Browse code

Changing default sort order to relevance

  • Loading branch information...
commit fc0fcf44535566eb8015915aca41f45cdf7da359 1 parent 01da197
Pranav K authored June 20, 2012
27  Facts/Infrastructure/LuceneIndexingServiceFacts.cs
... ...
@@ -1,6 +1,7 @@
1 1
 using System;
2 2
 using System.Collections.Generic;
3 3
 using System.Data.Entity;
  4
+using System.Linq;
4 5
 using Moq;
5 6
 using Xunit;
6 7
 using Xunit.Extensions;
@@ -10,24 +11,26 @@ namespace NuGetGallery.Infrastructure
10 11
     public class LuceneIndexingServiceFacts
11 12
     {
12 13
         [Theory]
13  
-        [InlineData(new object[] { "NHibernate", new string[0] })]
14  
-        [InlineData(new object[] { "NUnit", new string[0] })]
15  
-        [InlineData(new object[] { "SisoDb", new[] { "Siso", "Db" } })]
16  
-        [InlineData(new object[] { "EntityFramework", new[] { "Entity", "Framework" } })]
17  
-        [InlineData(new object[] { "Sys-netFX", new[] { "Sys", "net", "FX" } })]
18  
-        [InlineData(new object[] { "xUnit", new string[0] })]
19  
-        [InlineData(new object[] { "jQueryUI", new[] { "jQuery", "UI" } })]
20  
-        [InlineData(new object[] { "jQuery-UI", new[] { "jQuery", "UI" } })]
21  
-        [InlineData(new object[] { "NuGetPowerTools", new[] { "NuGet", "Power", "Tools" } })]
22  
-        [InlineData(new object[] { "microsoft-web-helpers", new[] { "microsoft", "web", "helpers" } })]
23  
-        [InlineData(new object[] { "EntityFramework.sample", new[] { "Entity", "Framework", "sample" } })]
  14
+        [InlineData("NHibernate", new string[0])]
  15
+        [InlineData("NUnit", new string[0])]
  16
+        [InlineData("EntityFramework", new[] { "Framework", "Entity" })]
  17
+        [InlineData("Sys-netFX", new[] { "Sys", "netFX" })]
  18
+        [InlineData("xUnit", new string[0])]
  19
+        [InlineData("jQueryUI", new string[0])]
  20
+        [InlineData("jQuery-UI", new[] { "jQuery", "UI" })]
  21
+        [InlineData("NuGetPowerTools", new[] { "NuGet", "Power", "Tools" } )]
  22
+        [InlineData("microsoft-web-helpers", new[] { "microsoft", "web", "helpers" } )]
  23
+        [InlineData("EntityFramework.sample", new[] { "EntityFramework", "sample", "Framework", "Entity" })]
  24
+        [InlineData("SignalR.MicroSliver", new[] { "SignalR", "MicroSliver", "Micro", "Sliver" })]
  25
+        [InlineData("ABCMicroFramework", new[] { "ABC", "Micro", "Framework" })]
  26
+        [InlineData("SignalR.Hosting.AspNet", new[] { "SignalR", "Hosting", "AspNet", "Asp", "Net"})] 
24 27
         public void CamelCaseTokenizer(string term, IEnumerable<string> tokens)
25 28
         {
26 29
             // Act
27 30
             var result = LuceneIndexingService.TokenizeId(term);
28 31
 
29 32
             // Assert
30  
-            Assert.Equal(tokens, result);
  33
+            Assert.Equal(tokens.OrderBy(p => p), result.OrderBy(p => p));
31 34
         }
32 35
 
33 36
         [Fact]
10  Website/Controllers/PackagesController.cs
@@ -129,17 +129,25 @@ public virtual ActionResult DisplayPackage(string id, string version)
129 129
             return View(model);
130 130
         }
131 131
 
132  
-        public virtual ActionResult ListPackages(string q, string sortOrder = Constants.PopularitySortOrder, int page = 1)
  132
+        public virtual ActionResult ListPackages(string q, string sortOrder = null, int page = 1)
133 133
         {
134 134
             if (page < 1)
135 135
             {
136 136
                 page = 1;
137 137
             }
138 138
 
  139
+
139 140
             IQueryable<Package> packageVersions = packageSvc.GetLatestPackageVersions(allowPrerelease: true);
140 141
 
141 142
             q = (q ?? "").Trim();
142 143
 
  144
+            if (String.IsNullOrEmpty(sortOrder))
  145
+            {
  146
+                // Determine the default sort order. If no query string is specified, then the sortOrder is DownloadCount
  147
+                // If we are searching for something, sort by relevance.
  148
+                sortOrder = q.IsEmpty() ? Constants.PopularitySortOrder : Constants.RelevanceSortOrder;
  149
+            }
  150
+
143 151
             if (GetIdentity().IsAuthenticated)
144 152
             {
145 153
                 // Only show listed packages. For unlisted packages, only show them if the owner is viewing it.
39  Website/Infrastructure/Lucene/LuceneIndexingService.cs
@@ -171,7 +171,16 @@ protected internal virtual void UpdateLastWriteTime()
171 171
 
172 172
         internal static IEnumerable<string> TokenizeId(string term)
173 173
         {
174  
-            var result = CamelCaseTokenize(term).SelectMany(s => s.Split(idSeparators, StringSplitOptions.RemoveEmptyEntries)).ToList();
  174
+
  175
+            // First tokenize the result by id-separators. For e.g. tokenize SignalR.EventStream as SignalR and EventStream
  176
+            var tokens = term.Split(idSeparators, StringSplitOptions.RemoveEmptyEntries);
  177
+
  178
+            // For each token, further attempt to tokenize camelcase values. e.g. .EventStream -> Event, Stream. 
  179
+            // Skip the exact term since we index it indep
  180
+            var result = tokens.Concat(tokens.SelectMany(CamelCaseTokenize))
  181
+                               .Distinct(StringComparer.OrdinalIgnoreCase)
  182
+                               .Where(t => !term.Equals(t))
  183
+                               .ToList();
175 184
             if (result.Count == 1)
176 185
             {
177 186
                 return Enumerable.Empty<string>();
@@ -181,25 +190,31 @@ internal static IEnumerable<string> TokenizeId(string term)
181 190
 
182 191
         private static IEnumerable<string> CamelCaseTokenize(string term)
183 192
         {
184  
-            if (term.Length < 2)
  193
+            const int MinTokenLength = 3;
  194
+            if (term.Length < MinTokenLength)
185 195
             {
186 196
                 yield break;
187 197
             }
188 198
 
189  
-            int tokenStart = 0;
190  
-            for (int i = 1; i < term.Length; i++)
  199
+            int tokenEnd = term.Length;
  200
+            for (int i = term.Length - 1; i > 0; i--)
191 201
             {
192  
-                if (Char.IsUpper(term[i]) && (i - tokenStart > 2))
  202
+                // If the remainder is fewer than 2 chars or we have a token that is at least 2 chars long, tokenize it.
  203
+                if (i < MinTokenLength || (Char.IsUpper(term[i]) && (tokenEnd - i >= MinTokenLength)))
193 204
                 {
194  
-                    yield return term.Substring(tokenStart, i - tokenStart);
195  
-                    tokenStart = i;
  205
+                    if (i < MinTokenLength)
  206
+                    {
  207
+                        // If the remainder is smaller than 2 chars, just return the entire string
  208
+                        i = 0;
  209
+                    }
  210
+                        
  211
+                    yield return term.Substring(i, tokenEnd - i);
  212
+                    tokenEnd = i;
196 213
                 }
197 214
             }
198  
-            if (term.Length - tokenStart < 2)
199  
-            {
200  
-                yield break;
201  
-            }
202  
-            yield return term.Substring(tokenStart);
  215
+
  216
+            // Finally return the term in entirety
  217
+            yield return term;
203 218
         }
204 219
     }
205 220
 }
19  Website/Infrastructure/Lucene/LuceneSearchService.cs
@@ -4,9 +4,9 @@
4 4
 using System.IO;
5 5
 using System.Linq;
6 6
 using Lucene.Net.Analysis.Standard;
  7
+using Lucene.Net.Index;
7 8
 using Lucene.Net.QueryParsers;
8 9
 using Lucene.Net.Search;
9  
-using Lucene.Net.Index;
10 10
 
11 11
 namespace NuGetGallery
12 12
 {
@@ -86,25 +86,28 @@ private static IEnumerable<int> SearchCore(string searchTerm)
86 86
 
87 87
         private static Query ParseQuery(string searchTerm)
88 88
         {
89  
-            var fields = new Dictionary<string, float> { { "Id", 1.2f }, { "Title", 1.0f }, { "Tags", 1.0f}, { "Description", 0.8f }, { "Author", 0.6f } };
  89
+            var fields = new Dictionary<string, float> { { "Id", 1.2f }, { "Title", 1.0f }, { "Tags", 0.8f }, { "Description", 0.3f }, 
  90
+                                                         { "Author", 1.0f } };
90 91
             var analyzer = new StandardAnalyzer(LuceneCommon.LuceneVersion);
91 92
             searchTerm = QueryParser.Escape(searchTerm).ToLowerInvariant();
92 93
 
93 94
             var queryParser = new MultiFieldQueryParser(LuceneCommon.LuceneVersion, fields.Keys.ToArray(), analyzer, fields);
94 95
 
95 96
             var conjuctionQuery = new BooleanQuery();
96  
-            conjuctionQuery.SetBoost(1.5f);
  97
+            conjuctionQuery.SetBoost(1.2f);
97 98
             var disjunctionQuery = new BooleanQuery();
  99
+            disjunctionQuery.SetBoost(0.3f);
98 100
             var wildCardQuery = new BooleanQuery();
99  
-            wildCardQuery.SetBoost(0.7f);
  101
+            wildCardQuery.SetBoost(0.5f);
100 102
             var exactIdQuery = new TermQuery(new Term("Id-Exact", searchTerm));
101 103
             exactIdQuery.SetBoost(2.5f);
102  
-            
103  
-            foreach(var term in searchTerm.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries))
  104
+            var wildCardIdQuery = new WildcardQuery(new Term("Id-Exact", "*" + searchTerm + "*"));
  105
+
  106
+            foreach (var term in searchTerm.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries))
104 107
             {
105 108
                 conjuctionQuery.Add(queryParser.Parse(term), BooleanClause.Occur.MUST);
106 109
                 disjunctionQuery.Add(queryParser.Parse(term), BooleanClause.Occur.SHOULD);
107  
-                
  110
+
108 111
                 foreach (var field in fields)
109 112
                 {
110 113
                     var wildCardTermQuery = new WildcardQuery(new Term(field.Key, term + "*"));
@@ -113,7 +116,7 @@ private static Query ParseQuery(string searchTerm)
113 116
                 }
114 117
             }
115 118
 
116  
-            return conjuctionQuery.Combine(new Query[] { exactIdQuery, conjuctionQuery, disjunctionQuery, wildCardQuery });
  119
+            return conjuctionQuery.Combine(new Query[] { exactIdQuery, wildCardIdQuery, conjuctionQuery, disjunctionQuery, wildCardQuery });
117 120
         }
118 121
     }
119 122
 }
5  Website/Views/Packages/ListPackages.cshtml
@@ -22,11 +22,14 @@
22 22
     <fieldset class="form search">
23 23
         <legend>Sort Order</legend>
24 24
         <input type="hidden" name="q" value="@Model.SearchTerm" />
  25
+
25 26
         <div class="form-field">
26 27
             <label for="sortOrder">Sort By</label>
27 28
             <select name="sortOrder" id="sortOrder">
  29
+                @if (!Model.SearchTerm.IsEmpty()) {
  30
+                    @ViewHelpers.Option(Constants.RelevanceSortOrder, "Relevance", Model.SortOrder)
  31
+                }
28 32
                 @ViewHelpers.Option(Constants.PopularitySortOrder, "Popularity", Model.SortOrder)
29  
-                @ViewHelpers.Option(Constants.RelevanceSortOrder, "Relevance", Model.SortOrder)
30 33
                 @ViewHelpers.Option(Constants.AlphabeticSortOrder, "A-Z", Model.SortOrder)
31 34
                 @ViewHelpers.Option(Constants.RecentSortOrder, "Recent", Model.SortOrder)
32 35
             </select>

0 notes on commit fc0fcf4

Please sign in to comment.
Something went wrong with that request. Please try again.