Skip to content

Commit

Permalink
Range Filtering, Deleting and Counting Improvements
Browse files Browse the repository at this point in the history
There was an error in how the backend determines wether or not a range query can be evaluated efficiently by Cassandra. It was allowing attempts of efficent filtering on non-clustering columns.

Had some errors in how I was handling deleting and counting methods. I thought the queries were build at that point in code execution. I was mistaken but now the queries are build correctly and the results are as expected.
  • Loading branch information
Seth Denner committed May 6, 2015
1 parent f8ea55b commit 45538f4
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 31 deletions.
56 changes: 38 additions & 18 deletions djangocassandra/db/backends/cassandra/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,11 @@ def __init__(
else:
self.cassandra_meta = None

self.pk_column = self.meta.pk.column
self.pk_column = (
self.meta.pk.db_column
if self.meta.pk.db_column
else self.meta.pk.column
)
self.column_family = self.meta.db_table
self.columns = self.meta.fields
self.where = None
Expand All @@ -91,12 +95,12 @@ def __init__(
]

if hasattr(self.cassandra_meta, 'clustering_keys'):
self.clustering_keys = (
self.clustering_columns = (
self.query.model.CassandraMeta.clustering_keys
)

else:
self.clustering_keys = []
self.clustering_columns = []

self.cql_query = self.column_family_class.objects.values_list(
*self.column_names
Expand All @@ -106,7 +110,7 @@ def __init__(
def filterable_columns(self):
return itertools.chain(
[self.pk_column],
self.clustering_keys,
self.clustering_columns,
self.indexed_columns
)

Expand All @@ -122,7 +126,7 @@ def _get_rows_by_indexed_column(self, range_predicates):
predicates_by_column[self.pk_column]
)

for column in self.clustering_keys:
for column in self.clustering_columns:
if column in predicates_by_column:
sorted_predicates.append(
predicates_by_column[column]
Expand Down Expand Up @@ -180,20 +184,19 @@ def filter_range(query, predicate):
end_op: predicate.end
})

query = self.cql_query
for predicate in sorted_predicates:
query = filter_range(
query,
self.cql_query = filter_range(
self.cql_query,
predicate
)

for predicate in indexed_predicates:
query = filter_range(
query,
self.cql_query = filter_range(
self.cql_query,
predicate
)

return query
return self.cql_query

def get_row_range(self, range_predicates):
'''
Expand Down Expand Up @@ -242,13 +245,30 @@ def count(
self,
limit=None
):
return self.cql_query.count()
return len(
self.root_predicate.get_matching_rows(self)
)


def delete(
self,
columns=set()
):
return self.cql_query.delete()
if self.root_predicate.can_evaluate_efficiently(
self.pk_column,
self.clustering_columns,
self.indexed_columns
):
self.root_predicate.get_matching_rows(self)
for r in self.cql_query:
r.delete()

else:
rows = self.root_predicate.get_matching_rows(self)
for row in rows:
self.column_family_class.get(**{
self.pk_column: row[self.pk_column]
}).delete()

def order_by(
self,
Expand Down Expand Up @@ -291,14 +311,14 @@ def order_by(
)

if (
not partition_key_filtered or
field_name not in self.clustering_keys or
self.inefficient_ordering
partition_key_filtered
and field_name in self.clustering_columns
and not self.inefficient_ordering
):
self.add_inefficient_order_by(order_string)
self.ordering.append(order_string)

else:
self.ordering.append(order_string)
self.add_inefficient_order_by(order_string)

@safe_call
def add_inefficient_order_by(self, ordering):
Expand Down
56 changes: 43 additions & 13 deletions djangocassandra/db/backends/cassandra/predicate.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,21 @@ def _is_exact(self):
self.end_inclusive
)

def can_evaluate_efficiently(self, pk_column, indexed_columns):
return (
self.column == pk_column or
self.column in indexed_columns
)
def can_evaluate_efficiently(
self,
pk_column,
clustering_columns,
indexed_columns
):
if self._is_exact():
return (
self.column == pk_column or
self.column in indexed_columns or
self.column in clustering_columns
)

else:
return self.column in clustering_columns

def incorporate_range_op(self, column, op, value, parent_compound_op):
if column != self.column:
Expand Down Expand Up @@ -213,7 +223,12 @@ def __init__(self, column, op, value=None):
def __repr__(self):
return '(OP: ' + self.op + ':' + unicode(self.value) + ')'

def can_evaluate_efficiently(self, pk_column, indexed_columns):
def can_evaluate_efficiently(
self,
pk_column,
clustering_columns,
indexed_columns
):
return False

def row_matches(self, row):
Expand Down Expand Up @@ -275,18 +290,31 @@ def __repr__(self):
s += ')'
return s

def can_evaluate_efficiently(self, pk_column, indexed_columns):
def can_evaluate_efficiently(
self,
pk_column,
clustering_columns,
indexed_columns
):
if self.negated:
return False
if self.op == COMPOUND_OP_AND:
for child in self.children:
if child.can_evaluate_efficiently(pk_column, indexed_columns):
if child.can_evaluate_efficiently(
pk_column,
clustering_columns,
indexed_columns
):
return True
else:
return False
elif self.op == COMPOUND_OP_OR:
for child in self.children:
if not child.can_evaluate_efficiently(pk_column, indexed_columns):
if not child.can_evaluate_efficiently(
pk_column,
clustering_columns,
indexed_columns
):
return False
else:
return True
Expand Down Expand Up @@ -378,16 +406,18 @@ def get_matching_rows(self, query):
# of rows so we only have to run the inefficient query predicates
# over this smaller number of rows.
if self.can_evaluate_efficiently(
pk_column,
query.filterable_columns
pk_column,
query.clustering_columns,
query.indexed_columns
):
range_predicates = []
inefficient_predicates = []
result = None
for predicate in self.children:
if predicate.can_evaluate_efficiently(
pk_column,
query.filterable_columns
pk_column,
query.clustering_columns,
query.filterable_columns
):
range_predicates.append(predicate)

Expand Down
13 changes: 13 additions & 0 deletions djangocassandra/db/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,16 @@ def get_internal_type(self):

def get_auto_value(self):
return uuid.uuid4()

def value_to_string(self, value):
if isinstance(value, basestring):
return value

try:
return value.hex
except (TypeError, ValueError):
raise exceptions.ValidationError(
self.error_messages['invalid'],
code='invalid',
params={'value': value},
)

0 comments on commit 45538f4

Please sign in to comment.