Fix bug in searches with both sorting and relevance.

Stratio · Aug 22, 2015 · 465cbcc · 465cbcc
1 parent 653073f
commit 465cbcc
Show file tree

Hide file tree

Showing 13 changed files with 71 additions and 43 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,7 @@
 # Changelog
 
 ## 2.1.8.4-SNAPSHOT ()
+ * Fix bug in searches with both sorting and relevance
 
 ## 2.1.8.3 (20 August 2015)
  * Fix analyzer selection in maps (issue#18)

diff --git a/plugin/src/main/java/com/stratio/cassandra/lucene/search/Search.java b/plugin/src/main/java/com/stratio/cassandra/lucene/search/Search.java
@@ -25,6 +25,8 @@
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.SortField;
 
+import java.util.List;
+
 /**
  * Class representing an Lucene index search. It is formed by an optional querying {@link Condition} and an optional
  * filtering {@link Condition}. It can be translated to a Lucene {@link Query} using a {@link Schema}.
@@ -127,7 +129,7 @@ public boolean refresh() {
      * @param schema A {@link Schema}.
      * @return The Lucene {@link SortField}s represented by this using {@code schema}.
      */
-    public SortField[] sortFields(Schema schema) {
+    public List<SortField> sortFields(Schema schema) {
         return sort == null ? null : sort.sortFields(schema);
     }
 

diff --git a/plugin/src/main/java/com/stratio/cassandra/lucene/search/sort/Sort.java b/plugin/src/main/java/com/stratio/cassandra/lucene/search/sort/Sort.java
@@ -68,10 +68,10 @@ public List<SortField> getSortFields() {
      * @param schema The {@link Schema} to be used.
      * @return the Lucene {@link Sort} representing this {@link Sort}.
      */
-    public org.apache.lucene.search.SortField[] sortFields(Schema schema) {
-        org.apache.lucene.search.SortField[] fields = new org.apache.lucene.search.SortField[sortFields.size()];
-        for (int i = 0; i < sortFields.size(); i++) {
-            fields[i] = sortFields.get(i).sortField(schema);
+    public List<org.apache.lucene.search.SortField> sortFields(Schema schema) {
+        List<org.apache.lucene.search.SortField> fields = new ArrayList<>(sortFields.size());
+        for (SortField sortField : sortFields) {
+            fields.add(sortField.sortField(schema));
         }
         return fields;
     }

diff --git a/plugin/src/main/java/com/stratio/cassandra/lucene/service/ClusteringKeyMapper.java b/plugin/src/main/java/com/stratio/cassandra/lucene/service/ClusteringKeyMapper.java
@@ -317,8 +317,8 @@ public int compare(CellName o1, CellName o2) {
      *
      * @return A Lucene {@link SortField} array for sorting documents/rows according to the column family name.
      */
-    public SortField[] sortFields() {
-        return new SortField[]{new SortField(FIELD_NAME, new FieldComparatorSource() {
+    public List<SortField> sortFields() {
+        return Collections.singletonList(new SortField(FIELD_NAME, new FieldComparatorSource() {
             @Override
             public FieldComparator<?> newComparator(String field, int hits, int sort, boolean reversed)
             throws IOException {
@@ -331,7 +331,7 @@ public int compareValues(BytesRef val1, BytesRef val2) {
                     }
                 };
             }
-        })};
+        }));
     }
 
     /**

diff --git a/plugin/src/main/java/com/stratio/cassandra/lucene/service/RowMapper.java b/plugin/src/main/java/com/stratio/cassandra/lucene/service/RowMapper.java
@@ -163,7 +163,7 @@ public Term term(DecoratedKey partitionKey) {
      *
      * @return The Lucene {@link SortField}s to get {@link Document}s in the same order that is used in Cassandra.
      */
-    public abstract SortField[] sortFields();
+    public abstract List<SortField> sortFields();
 
     /**
      * Returns a {@link CellName} for the indexed column in the specified column family.

diff --git a/plugin/src/main/java/com/stratio/cassandra/lucene/service/RowMapperSkinny.java b/plugin/src/main/java/com/stratio/cassandra/lucene/service/RowMapperSkinny.java
@@ -41,6 +41,7 @@
 
 import java.nio.ByteBuffer;
 import java.util.Comparator;
+import java.util.List;
 
 /**
  * {@link RowMapper} for skinny rows.
@@ -88,7 +89,7 @@ public Document document(DecoratedKey partitionKey, Columns columns) {
 
     /** {@inheritDoc} */
     @Override
-    public SortField[] sortFields() {
+    public List<SortField> sortFields() {
         return tokenMapper.sortFields();
     }
 

diff --git a/plugin/src/main/java/com/stratio/cassandra/lucene/service/RowMapperWide.java b/plugin/src/main/java/com/stratio/cassandra/lucene/service/RowMapperWide.java
@@ -46,6 +46,7 @@
 import org.apache.lucene.search.TermQuery;
 
 import java.nio.ByteBuffer;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.List;
@@ -112,10 +113,11 @@ public Document document(DecoratedKey partitionKey, CellName clusteringKey, Colu
 
     /** {@inheritDoc} */
     @Override
-    public SortField[] sortFields() {
-        SortField[] partitionKeySort = tokenMapper.sortFields();
-        SortField[] clusteringKeySort = clusteringKeyMapper.sortFields();
-        return ArrayUtils.addAll(partitionKeySort, clusteringKeySort);
+    public List<SortField> sortFields() {
+        List<SortField> sortFields = new ArrayList<>();
+        sortFields.addAll(tokenMapper.sortFields());
+        sortFields.addAll(clusteringKeyMapper.sortFields());
+        return sortFields;
     }
 
     /** {@inheritDoc} */

diff --git a/plugin/src/main/java/com/stratio/cassandra/lucene/service/RowService.java b/plugin/src/main/java/com/stratio/cassandra/lucene/service/RowService.java
@@ -78,6 +78,7 @@ public abstract class RowService {
     final RowMapper rowMapper;
     final CFMetaData metadata;
     final LuceneIndex luceneIndex;
+    final List<SortField> keySortFields;
 
     protected final Schema schema;
 
@@ -91,12 +92,13 @@ public abstract class RowService {
     protected RowService(ColumnFamilyStore baseCfs, ColumnDefinition columnDefinition) throws IOException {
 
         this.baseCfs = baseCfs;
-        this.metadata = baseCfs.metadata;
+        metadata = baseCfs.metadata;
 
         IndexConfig config = new IndexConfig(metadata, columnDefinition);
 
-        this.schema = config.getSchema();
-        this.rowMapper = RowMapper.build(metadata, columnDefinition, schema);
+        schema = config.getSchema();
+        rowMapper = RowMapper.build(metadata, columnDefinition, schema);
+        keySortFields = rowMapper.sortFields();
 
         this.luceneIndex = new LuceneIndex(columnDefinition.ksName,
                                            columnDefinition.cfName,
@@ -248,6 +250,7 @@ public final List<Row> search(Search search,
             // Setup search arguments
             Query query = query(search, dataRange);
             Sort sort = sort(search);
+            int scorePosition = scorePosition(search);
             ScoreDoc last = after(searcher, after, query, sort);
             int page = Math.min(limit, MAX_PAGE_SIZE);
             boolean maybeMore;
@@ -269,7 +272,7 @@ public final List<Row> search(Search search,
 
                 // Collect rows from Cassandra
                 collectTime.start();
-                for (Row row : rows(searchResults, timestamp, search.usesRelevance())) {
+                for (Row row : rows(searchResults, timestamp, scorePosition)) {
                     if (accepted(row, expressions)) {
                         rows.add(row);
                         numRows++;
@@ -337,12 +340,22 @@ public Query query(Search search, DataRange dataRange) {
      * @return A {@link Sort} for the specified {@link Search}.
      */
     private Sort sort(Search search) {
+        List<SortField> sortFields = new ArrayList<>();
         if (search.usesSorting()) {
-            return new Sort(ArrayUtils.addAll(search.sortFields(schema), rowMapper.sortFields()));
-        } else if (search.usesRelevance()) {
-            return new Sort(ArrayUtils.addAll(new SortField[]{FIELD_SCORE}, rowMapper.sortFields()));
+            sortFields.addAll(search.sortFields(schema));
+        }
+        if (search.usesRelevance()) {
+            sortFields.add(FIELD_SCORE);
+        }
+        sortFields.addAll(keySortFields);
+        return new Sort(sortFields.toArray(new SortField[sortFields.size()]));
+    }
+
+    private int scorePosition(Search search) {
+        if (search.usesRelevance()) {
+            return search.usesSorting() ? search.sortFields(schema).size() : 0;
         } else {
-            return new Sort(rowMapper.sortFields());
+            return -1;
         }
     }
 
@@ -459,12 +472,12 @@ private boolean accepted(Column<?> column, AbstractType<?> validator, Operator o
      * Returns the {@link Row}s identified by the specified {@link Document}s, using the specified time stamp to ignore
      * deleted columns. The {@link Row}s are retrieved from the storage engine, so it involves IO operations.
      *
-     * @param searchResults The {@link SearchResult}s
-     * @param timestamp     The time stamp to ignore deleted columns.
-     * @param relevance     If the search uses relevance.
+     * @param results   The {@link SearchResult}s
+     * @param timestamp The time stamp to ignore deleted columns.
+     * @param scorePosition The position where score column is placed.
      * @return The {@link Row}s identified by the specified {@link Document}s
      */
-    protected abstract List<Row> rows(List<SearchResult> searchResults, long timestamp, boolean relevance);
+    protected abstract List<Row> rows(List<SearchResult> results, long timestamp, int scorePosition);
 
     /**
      * Returns a {@link ColumnFamily} composed by the non expired {@link Cell}s of the specified  {@link ColumnFamily}.
@@ -489,15 +502,18 @@ protected ColumnFamily cleanExpired(ColumnFamily columnFamily, long timestamp) {
      * @param row       A {@link Row}.
      * @param timestamp The score column timestamp.
      * @param scoreDoc  The score column value.
+     * @param scorePosition The position where score column is placed.
      * @return The {@link Row} with the score.
      */
-    protected Row addScoreColumn(Row row, long timestamp, ScoreDoc scoreDoc) {
+    protected Row addScoreColumn(Row row, long timestamp, ScoreDoc scoreDoc, int scorePosition) {
+
         ColumnFamily cf = row.cf;
         CellName cellName = rowMapper.makeCellName(cf);
-        Float score = Float.parseFloat(((FieldDoc) scoreDoc).fields[0].toString());
-        ByteBuffer cellValue = UTF8Type.instance.decompose(score.toString());
+        FieldDoc fieldDoc = (FieldDoc) scoreDoc;
+        Float score = Float.parseFloat(fieldDoc.fields[scorePosition].toString());
 
         ColumnFamily dcf = ArrayBackedSortedColumns.factory.create(baseCfs.metadata);
+        ByteBuffer cellValue = UTF8Type.instance.decompose(score.toString());
         dcf.addColumn(cellName, cellValue, timestamp);
         dcf.addAll(row.cf);
 

diff --git a/plugin/src/main/java/com/stratio/cassandra/lucene/service/RowServiceSkinny.java b/plugin/src/main/java/com/stratio/cassandra/lucene/service/RowServiceSkinny.java
@@ -113,7 +113,7 @@ public Map<Term, Document> documents(DecoratedKey partitionKey, ColumnFamily col
 
     /** {@inheritDoc} */
     @Override
-    protected List<Row> rows(List<SearchResult> searchResults, long timestamp, boolean relevance) {
+    protected List<Row> rows(List<SearchResult> searchResults, long timestamp, int scorePosition) {
         List<Row> rows = new ArrayList<>(searchResults.size());
         for (SearchResult searchResult : searchResults) {
 
@@ -126,9 +126,9 @@ protected List<Row> rows(List<SearchResult> searchResults, long timestamp, boole
             Row row = new Row(partitionKey, columnFamily);
 
             // Return decorated row
-            if (relevance) {
+            if (scorePosition >= 0) {
                 ScoreDoc scoreDoc = searchResult.getScoreDoc();
-                row = addScoreColumn(row, timestamp, scoreDoc);
+                row = addScoreColumn(row, timestamp, scoreDoc, scorePosition);
             }
             rows.add(row);
         }

diff --git a/plugin/src/main/java/com/stratio/cassandra/lucene/service/RowServiceWide.java b/plugin/src/main/java/com/stratio/cassandra/lucene/service/RowServiceWide.java
@@ -157,7 +157,7 @@ public Map<Term, Document> documents(DecoratedKey partitionKey, ColumnFamily col
      * The {@link Row} is a logical one.
      */
     @Override
-    protected List<Row> rows(List<SearchResult> searchResults, long timestamp, boolean relevance) {
+    protected List<Row> rows(List<SearchResult> searchResults, long timestamp, int scorePosition) {
 
         // Group key queries by partition keys
         Map<String, ScoreDoc> scoresByClusteringKey = new HashMap<>(searchResults.size());
@@ -185,10 +185,10 @@ protected List<Row> rows(List<SearchResult> searchResults, long timestamp, boole
                     CellName clusteringKey = entry1.getKey();
                     ColumnFamily columnFamily = entry1.getValue();
                     Row row = new Row(partitionKey, columnFamily);
-                    if (relevance) {
+                    if (scorePosition >= 0) {
                         String rowHash = rowMapper.hash(partitionKey, clusteringKey);
                         ScoreDoc scoreDoc = scoresByClusteringKey.get(rowHash);
-                        row = addScoreColumn(row, timestamp, scoreDoc);
+                        row = addScoreColumn(row, timestamp, scoreDoc, scorePosition);
                     }
                     rows.add(row);
                 }

diff --git a/plugin/src/main/java/com/stratio/cassandra/lucene/service/TokenMapper.java b/plugin/src/main/java/com/stratio/cassandra/lucene/service/TokenMapper.java
@@ -30,6 +30,7 @@
 import org.apache.lucene.search.SortField;
 
 import java.util.Comparator;
+import java.util.List;
 
 /**
  * Class for several row partitioning {@link Token} mappings between Cassandra and Lucene.
@@ -111,11 +112,11 @@ public boolean isMinimum(Token token) {
     protected abstract Query makeQuery(Token lower, Token upper, boolean includeLower, boolean includeUpper);
 
     /**
-     * Returns a Lucene {@link SortField} array for sorting documents/rows according to the current partitioner.
+     * Returns a Lucene {@link SortField} list for sorting documents/rows according to the current partitioner.
      *
-     * @return A Lucene {@link SortField} array for sorting documents/rows according to the current partitioner.
+     * @return A Lucene {@link SortField} list for sorting documents/rows according to the current partitioner.
      */
-    public abstract SortField[] sortFields();
+    public abstract List<SortField> sortFields();
 
     /**
      * Returns {@code true} if the specified lower row position kind must be included in the filtered range, {@code

diff --git a/plugin/src/main/java/com/stratio/cassandra/lucene/service/TokenMapperGeneric.java b/plugin/src/main/java/com/stratio/cassandra/lucene/service/TokenMapperGeneric.java
@@ -37,6 +37,8 @@
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.util.Collections;
+import java.util.List;
 
 /**
  * {@link TokenMapper} to be used when any {@link org.apache.cassandra.dht.IPartitioner} when there is not a more
@@ -84,8 +86,8 @@ public Query query(Token token) {
 
     /** {@inheritDoc} */
     @Override
-    public SortField[] sortFields() {
-        return new SortField[]{new SortField(FIELD_NAME, new FieldComparatorSource() {
+    public List<SortField> sortFields() {
+        return Collections.singletonList(new SortField(FIELD_NAME, new FieldComparatorSource() {
             @Override
             public FieldComparator<?> newComparator(String field, int hits, int sort, boolean reversed)
             throws IOException {
@@ -96,7 +98,7 @@ public int compareValues(BytesRef val1, BytesRef val2) {
                     }
                 };
             }
-        })};
+        }));
     }
 
     /**

diff --git a/plugin/src/main/java/com/stratio/cassandra/lucene/service/TokenMapperMurmur.java b/plugin/src/main/java/com/stratio/cassandra/lucene/service/TokenMapperMurmur.java
@@ -29,6 +29,9 @@
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.SortField;
 
+import java.util.Collections;
+import java.util.List;
+
 /**
  * {@link PartitionKeyMapper} to be used when {@link org.apache.cassandra.dht.Murmur3Partitioner} is used. It indexes
  * the token long value as a Lucene long field.
@@ -92,8 +95,8 @@ protected Query makeQuery(Token lower, Token upper, boolean includeLower, boolea
 
     /** {@inheritDoc} */
     @Override
-    public SortField[] sortFields() {
-        return new SortField[]{new SortField(FIELD_NAME, SortField.Type.LONG)};
+    public List<SortField> sortFields() {
+        return Collections.singletonList(new SortField(FIELD_NAME, SortField.Type.LONG));
     }
 
 }