Skip to content
This repository has been archived by the owner on May 27, 2020. It is now read-only.

Commit

Permalink
Fix bug in searches with both sorting and relevance.
Browse files Browse the repository at this point in the history
  • Loading branch information
adelapena committed Aug 22, 2015
1 parent 653073f commit 465cbcc
Show file tree
Hide file tree
Showing 13 changed files with 71 additions and 43 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Changelog

## 2.1.8.4-SNAPSHOT ()
* Fix bug in searches with both sorting and relevance

## 2.1.8.3 (20 August 2015)
* Fix analyzer selection in maps (issue#18)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;

import java.util.List;

/**
* Class representing an Lucene index search. It is formed by an optional querying {@link Condition} and an optional
* filtering {@link Condition}. It can be translated to a Lucene {@link Query} using a {@link Schema}.
Expand Down Expand Up @@ -127,7 +129,7 @@ public boolean refresh() {
* @param schema A {@link Schema}.
* @return The Lucene {@link SortField}s represented by this using {@code schema}.
*/
public SortField[] sortFields(Schema schema) {
public List<SortField> sortFields(Schema schema) {
return sort == null ? null : sort.sortFields(schema);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,10 @@ public List<SortField> getSortFields() {
* @param schema The {@link Schema} to be used.
* @return the Lucene {@link Sort} representing this {@link Sort}.
*/
public org.apache.lucene.search.SortField[] sortFields(Schema schema) {
org.apache.lucene.search.SortField[] fields = new org.apache.lucene.search.SortField[sortFields.size()];
for (int i = 0; i < sortFields.size(); i++) {
fields[i] = sortFields.get(i).sortField(schema);
public List<org.apache.lucene.search.SortField> sortFields(Schema schema) {
List<org.apache.lucene.search.SortField> fields = new ArrayList<>(sortFields.size());
for (SortField sortField : sortFields) {
fields.add(sortField.sortField(schema));
}
return fields;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,8 @@ public int compare(CellName o1, CellName o2) {
*
* @return A Lucene {@link SortField} array for sorting documents/rows according to the column family name.
*/
public SortField[] sortFields() {
return new SortField[]{new SortField(FIELD_NAME, new FieldComparatorSource() {
public List<SortField> sortFields() {
return Collections.singletonList(new SortField(FIELD_NAME, new FieldComparatorSource() {
@Override
public FieldComparator<?> newComparator(String field, int hits, int sort, boolean reversed)
throws IOException {
Expand All @@ -331,7 +331,7 @@ public int compareValues(BytesRef val1, BytesRef val2) {
}
};
}
})};
}));
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ public Term term(DecoratedKey partitionKey) {
*
* @return The Lucene {@link SortField}s to get {@link Document}s in the same order that is used in Cassandra.
*/
public abstract SortField[] sortFields();
public abstract List<SortField> sortFields();

/**
* Returns a {@link CellName} for the indexed column in the specified column family.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@

import java.nio.ByteBuffer;
import java.util.Comparator;
import java.util.List;

/**
* {@link RowMapper} for skinny rows.
Expand Down Expand Up @@ -88,7 +89,7 @@ public Document document(DecoratedKey partitionKey, Columns columns) {

/** {@inheritDoc} */
@Override
public SortField[] sortFields() {
public List<SortField> sortFields() {
return tokenMapper.sortFields();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import org.apache.lucene.search.TermQuery;

import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
Expand Down Expand Up @@ -112,10 +113,11 @@ public Document document(DecoratedKey partitionKey, CellName clusteringKey, Colu

/** {@inheritDoc} */
@Override
public SortField[] sortFields() {
SortField[] partitionKeySort = tokenMapper.sortFields();
SortField[] clusteringKeySort = clusteringKeyMapper.sortFields();
return ArrayUtils.addAll(partitionKeySort, clusteringKeySort);
public List<SortField> sortFields() {
List<SortField> sortFields = new ArrayList<>();
sortFields.addAll(tokenMapper.sortFields());
sortFields.addAll(clusteringKeyMapper.sortFields());
return sortFields;
}

/** {@inheritDoc} */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ public abstract class RowService {
final RowMapper rowMapper;
final CFMetaData metadata;
final LuceneIndex luceneIndex;
final List<SortField> keySortFields;

protected final Schema schema;

Expand All @@ -91,12 +92,13 @@ public abstract class RowService {
protected RowService(ColumnFamilyStore baseCfs, ColumnDefinition columnDefinition) throws IOException {

this.baseCfs = baseCfs;
this.metadata = baseCfs.metadata;
metadata = baseCfs.metadata;

IndexConfig config = new IndexConfig(metadata, columnDefinition);

this.schema = config.getSchema();
this.rowMapper = RowMapper.build(metadata, columnDefinition, schema);
schema = config.getSchema();
rowMapper = RowMapper.build(metadata, columnDefinition, schema);
keySortFields = rowMapper.sortFields();

this.luceneIndex = new LuceneIndex(columnDefinition.ksName,
columnDefinition.cfName,
Expand Down Expand Up @@ -248,6 +250,7 @@ public final List<Row> search(Search search,
// Setup search arguments
Query query = query(search, dataRange);
Sort sort = sort(search);
int scorePosition = scorePosition(search);
ScoreDoc last = after(searcher, after, query, sort);
int page = Math.min(limit, MAX_PAGE_SIZE);
boolean maybeMore;
Expand All @@ -269,7 +272,7 @@ public final List<Row> search(Search search,

// Collect rows from Cassandra
collectTime.start();
for (Row row : rows(searchResults, timestamp, search.usesRelevance())) {
for (Row row : rows(searchResults, timestamp, scorePosition)) {
if (accepted(row, expressions)) {
rows.add(row);
numRows++;
Expand Down Expand Up @@ -337,12 +340,22 @@ public Query query(Search search, DataRange dataRange) {
* @return A {@link Sort} for the specified {@link Search}.
*/
private Sort sort(Search search) {
List<SortField> sortFields = new ArrayList<>();
if (search.usesSorting()) {
return new Sort(ArrayUtils.addAll(search.sortFields(schema), rowMapper.sortFields()));
} else if (search.usesRelevance()) {
return new Sort(ArrayUtils.addAll(new SortField[]{FIELD_SCORE}, rowMapper.sortFields()));
sortFields.addAll(search.sortFields(schema));
}
if (search.usesRelevance()) {
sortFields.add(FIELD_SCORE);
}
sortFields.addAll(keySortFields);
return new Sort(sortFields.toArray(new SortField[sortFields.size()]));
}

private int scorePosition(Search search) {
if (search.usesRelevance()) {
return search.usesSorting() ? search.sortFields(schema).size() : 0;
} else {
return new Sort(rowMapper.sortFields());
return -1;
}
}

Expand Down Expand Up @@ -459,12 +472,12 @@ private boolean accepted(Column<?> column, AbstractType<?> validator, Operator o
* Returns the {@link Row}s identified by the specified {@link Document}s, using the specified time stamp to ignore
* deleted columns. The {@link Row}s are retrieved from the storage engine, so it involves IO operations.
*
* @param searchResults The {@link SearchResult}s
* @param timestamp The time stamp to ignore deleted columns.
* @param relevance If the search uses relevance.
* @param results The {@link SearchResult}s
* @param timestamp The time stamp to ignore deleted columns.
* @param scorePosition The position where score column is placed.
* @return The {@link Row}s identified by the specified {@link Document}s
*/
protected abstract List<Row> rows(List<SearchResult> searchResults, long timestamp, boolean relevance);
protected abstract List<Row> rows(List<SearchResult> results, long timestamp, int scorePosition);

/**
* Returns a {@link ColumnFamily} composed by the non expired {@link Cell}s of the specified {@link ColumnFamily}.
Expand All @@ -489,15 +502,18 @@ protected ColumnFamily cleanExpired(ColumnFamily columnFamily, long timestamp) {
* @param row A {@link Row}.
* @param timestamp The score column timestamp.
* @param scoreDoc The score column value.
* @param scorePosition The position where score column is placed.
* @return The {@link Row} with the score.
*/
protected Row addScoreColumn(Row row, long timestamp, ScoreDoc scoreDoc) {
protected Row addScoreColumn(Row row, long timestamp, ScoreDoc scoreDoc, int scorePosition) {

ColumnFamily cf = row.cf;
CellName cellName = rowMapper.makeCellName(cf);
Float score = Float.parseFloat(((FieldDoc) scoreDoc).fields[0].toString());
ByteBuffer cellValue = UTF8Type.instance.decompose(score.toString());
FieldDoc fieldDoc = (FieldDoc) scoreDoc;
Float score = Float.parseFloat(fieldDoc.fields[scorePosition].toString());

ColumnFamily dcf = ArrayBackedSortedColumns.factory.create(baseCfs.metadata);
ByteBuffer cellValue = UTF8Type.instance.decompose(score.toString());
dcf.addColumn(cellName, cellValue, timestamp);
dcf.addAll(row.cf);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ public Map<Term, Document> documents(DecoratedKey partitionKey, ColumnFamily col

/** {@inheritDoc} */
@Override
protected List<Row> rows(List<SearchResult> searchResults, long timestamp, boolean relevance) {
protected List<Row> rows(List<SearchResult> searchResults, long timestamp, int scorePosition) {
List<Row> rows = new ArrayList<>(searchResults.size());
for (SearchResult searchResult : searchResults) {

Expand All @@ -126,9 +126,9 @@ protected List<Row> rows(List<SearchResult> searchResults, long timestamp, boole
Row row = new Row(partitionKey, columnFamily);

// Return decorated row
if (relevance) {
if (scorePosition >= 0) {
ScoreDoc scoreDoc = searchResult.getScoreDoc();
row = addScoreColumn(row, timestamp, scoreDoc);
row = addScoreColumn(row, timestamp, scoreDoc, scorePosition);
}
rows.add(row);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ public Map<Term, Document> documents(DecoratedKey partitionKey, ColumnFamily col
* The {@link Row} is a logical one.
*/
@Override
protected List<Row> rows(List<SearchResult> searchResults, long timestamp, boolean relevance) {
protected List<Row> rows(List<SearchResult> searchResults, long timestamp, int scorePosition) {

// Group key queries by partition keys
Map<String, ScoreDoc> scoresByClusteringKey = new HashMap<>(searchResults.size());
Expand Down Expand Up @@ -185,10 +185,10 @@ protected List<Row> rows(List<SearchResult> searchResults, long timestamp, boole
CellName clusteringKey = entry1.getKey();
ColumnFamily columnFamily = entry1.getValue();
Row row = new Row(partitionKey, columnFamily);
if (relevance) {
if (scorePosition >= 0) {
String rowHash = rowMapper.hash(partitionKey, clusteringKey);
ScoreDoc scoreDoc = scoresByClusteringKey.get(rowHash);
row = addScoreColumn(row, timestamp, scoreDoc);
row = addScoreColumn(row, timestamp, scoreDoc, scorePosition);
}
rows.add(row);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.apache.lucene.search.SortField;

import java.util.Comparator;
import java.util.List;

/**
* Class for several row partitioning {@link Token} mappings between Cassandra and Lucene.
Expand Down Expand Up @@ -111,11 +112,11 @@ public boolean isMinimum(Token token) {
protected abstract Query makeQuery(Token lower, Token upper, boolean includeLower, boolean includeUpper);

/**
* Returns a Lucene {@link SortField} array for sorting documents/rows according to the current partitioner.
* Returns a Lucene {@link SortField} list for sorting documents/rows according to the current partitioner.
*
* @return A Lucene {@link SortField} array for sorting documents/rows according to the current partitioner.
* @return A Lucene {@link SortField} list for sorting documents/rows according to the current partitioner.
*/
public abstract SortField[] sortFields();
public abstract List<SortField> sortFields();

/**
* Returns {@code true} if the specified lower row position kind must be included in the filtered range, {@code
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Collections;
import java.util.List;

/**
* {@link TokenMapper} to be used when any {@link org.apache.cassandra.dht.IPartitioner} when there is not a more
Expand Down Expand Up @@ -84,8 +86,8 @@ public Query query(Token token) {

/** {@inheritDoc} */
@Override
public SortField[] sortFields() {
return new SortField[]{new SortField(FIELD_NAME, new FieldComparatorSource() {
public List<SortField> sortFields() {
return Collections.singletonList(new SortField(FIELD_NAME, new FieldComparatorSource() {
@Override
public FieldComparator<?> newComparator(String field, int hits, int sort, boolean reversed)
throws IOException {
Expand All @@ -96,7 +98,7 @@ public int compareValues(BytesRef val1, BytesRef val2) {
}
};
}
})};
}));
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;

import java.util.Collections;
import java.util.List;

/**
* {@link PartitionKeyMapper} to be used when {@link org.apache.cassandra.dht.Murmur3Partitioner} is used. It indexes
* the token long value as a Lucene long field.
Expand Down Expand Up @@ -92,8 +95,8 @@ protected Query makeQuery(Token lower, Token upper, boolean includeLower, boolea

/** {@inheritDoc} */
@Override
public SortField[] sortFields() {
return new SortField[]{new SortField(FIELD_NAME, SortField.Type.LONG)};
public List<SortField> sortFields() {
return Collections.singletonList(new SortField(FIELD_NAME, SortField.Type.LONG));
}

}

0 comments on commit 465cbcc

Please sign in to comment.