From 4789bc5f3ea5e4728373d0621c48ccb47bc3979f Mon Sep 17 00:00:00 2001 From: Yuce Tekol Date: Fri, 29 Sep 2017 16:22:08 +0300 Subject: [PATCH] Added range fields --- README.md | 3 +- integration_tests/test_client_it.py | 14 ++-- pilosa/orm.py | 107 +++++++++++++++++++++++++--- tests/test_orm.py | 56 ++++++++++++--- 4 files changed, 153 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index b394323..b50a09c 100644 --- a/README.md +++ b/README.md @@ -14,8 +14,9 @@ Python client for Pilosa high performance distributed bitmap index. * **Next**: * Added support for creating range encoded frames. - * Added `SetFieldValue`, `Sum` and `Xor` calls. + * Added `Xor` call. * Added support for excluding bits or attributes from bitmap calls. In order to exclude bits, call `setExcludeBits(true)` in your `QueryOptions.Builder`. In order to exclude attributes, call `setExcludeAttributes(true)`. + * Added range field operations. * Customizable CSV timestamp format (Contributed by @lachlanorr). * **Deprecation** Row and column labels are deprecated, and will be removed in a future release of this library. Do not use `column_label` field when creating `Index` objects and do not use `row_label` field when creating `Frame` objects for new code. See: https://github.com/pilosa/pilosa/issues/752 for more info. diff --git a/integration_tests/test_client_it.py b/integration_tests/test_client_it.py index 6c4212b..287fd8f 100644 --- a/integration_tests/test_client_it.py +++ b/integration_tests/test_client_it.py @@ -41,7 +41,7 @@ from pilosa.client import Client, URI, Cluster from pilosa.exceptions import PilosaError -from pilosa.orm import Index, TimeQuantum, Schema, RangeField +from pilosa.orm import Index, TimeQuantum, Schema, IntField from pilosa.imports import csv_bit_reader SERVER_ADDRESS = ":10101" @@ -256,18 +256,22 @@ def test_failover_fail(self): def test_range_frame(self): client = self.get_client() - frame = self.col_db.frame("rangeframe", fields=[RangeField.int("foo", 10, 20)]) + frame = self.col_db.frame("rangeframe", fields=[IntField.int("foo", 10, 20)]) client.ensure_frame(frame) client.query(self.col_db.batch_query( frame.setbit(1, 10), frame.setbit(1, 100), - frame.set_field_value(10, "foo", 11), - frame.set_field_value(100, "foo", 15) + frame.field("foo").set_value(10, 11), + frame.field("foo").set_value(100, 15), )) - response = client.query(frame.sum(frame.bitmap(1), "foo")) + response = client.query(frame.field("foo").sum(frame.bitmap(1))) self.assertEquals(26, response.result.sum) self.assertEquals(2, response.result.count) + response = client.query(frame.field("foo").lt(15)) + self.assertEquals(1, len(response.results)) + self.assertEquals(10, response.result.bitmap.bits[0]) + def test_exclude_attrs_bits(self): client = self.get_client() client.query(self.col_db.batch_query( diff --git a/pilosa/orm.py b/pilosa/orm.py index 93398ac..3eb6d66 100644 --- a/pilosa/orm.py +++ b/pilosa/orm.py @@ -36,7 +36,7 @@ from .exceptions import PilosaError, ValidationError from .validator import validate_index_name, validate_frame_name, validate_label -__all__ = ("TimeQuantum", "CacheType", "Schema", "Index", "PQLQuery", "PQLBatchQuery", "RangeField") +__all__ = ("TimeQuantum", "CacheType", "Schema", "Index", "PQLQuery", "PQLBatchQuery", "IntField") _TIME_FORMAT = "%Y-%m-%dT%H:%M" @@ -214,7 +214,7 @@ def frame(self, name, row_label="rowID", time_quantum=TimeQuantum.NONE, :param bool inverse_enabled: :param pilosa.CacheType cache_type: ``CacheType.DEFAULT``, ``CacheType.LRU`` or ``CacheType.RANKED`` :param int cache_size: Values greater than 0 sets the cache size. Otherwise uses the default cache size - :param list(RangeField) fields: List of ``RangeField`` objects. E.g.: ``[RangeField.int("rate", 0, 100)]`` + :param list(IntField) fields: List of ``IntField`` objects. E.g.: ``[IntField.int("rate", 0, 100)]`` :return: Pilosa frame :rtype: pilosa.Frame @@ -362,6 +362,7 @@ def __init__(self, index, name, row_label, time_quantum, inverse_enabled, self.row_label = row_label self.column_label = index.column_label self.fields = fields + self.range_fields = {} def __eq__(self, other): if id(self) == id(other): @@ -540,14 +541,19 @@ def set_row_attrs(self, row_id, attrs): (self.row_label, row_id, self.name, attrs_str), self.index) - def set_field_value(self, column_id, field, value): - return PQLQuery("SetFieldValue(frame='%s', %s=%d, %s=%d)" % \ - (self.name, self.column_label, column_id, field, value), - self.index) + def field(self, name): + """Returns a _RangeField object with the given name. - def sum(self, bitmap, field): - qry = "Sum(%s, frame='%s', field='%s')" % (bitmap.serialize(), self.name, field) - return PQLQuery(qry, self.index) + :param name: field name + :return: _RangeField object + :rtype: _RangeField + """ + field = self.range_fields.get(name) + if not field: + validate_label(name) + field = _RangeField(self, name) + self.range_fields[name] = field + return field def _get_options_string(self): data = {"rowLabel": self.row_label} @@ -600,7 +606,7 @@ def serialize(self): return u''.join(q.serialize() for q in self.queries) -class RangeField: +class IntField: def __init__(self, attrs): self.attrs = attrs @@ -616,3 +622,84 @@ def int(cls, name, min=0, max=100): "min": min, "max": max }) + + +class _RangeField: + + def __init__(self, frame, name): + self.frame_name = frame.name + self.name = name + self.index = frame.index + + def lt(self, n): + """Creates a Range query with less than (<) condition. + + :param n: The value to compare + :return: a PQL query + :rtype: PQLQuery + """ + return self._binary_operation("<", n) + + def lte(self, n): + """Creates a Range query with less than or equal (<=) condition. + + :param n: The value to compare + :return: a PQL query + :rtype: PQLQuery + """ + return self._binary_operation("<=", n) + + def gt(self, n): + """Creates a Range query with greater than (>) condition. + + :param n: The value to compare + :return: a PQL query + :rtype: PQLQuery + """ + return self._binary_operation(">", n) + + def gte(self, n): + """Creates a Range query with greater than or equal (>=) condition. + + :param n: The value to compare + :return: a PQL query + :rtype: PQLQuery + """ + return self._binary_operation(">=", n) + + def between(self, a, b): + """Creates a Range query with between (><) condition. + + :param a: Closed range start + :param b: Closed range end + :return: a PQL query + :rtype: PQLQuery + """ + q = u"Range(frame='%s', %s >< [%d,%d])" % (self.frame_name, self.name, a, b) + return PQLQuery(q, self.index) + + def sum(self, bitmap): + """Creates a Sum query. + + :param bitmap: The bitmap query to use. + :return: a PQL query + :rtype: PQLQuery + """ + q = u"Sum(%s, frame='%s', field='%s')" % (bitmap.serialize(), self.frame_name, self.name) + return PQLQuery(q, self.index) + + def set_value(self, column_id, value): + """Creates a SetFieldValue query. + + :param column_id: column ID + :param value: the value to assign to the field + :return: a PQL query + :rtype: PQLQuery + """ + q = u"SetFieldValue(frame='%s', %s=%d, %s=%d)" % \ + (self.frame_name, self.index.column_label, column_id, self.name, value) + return PQLQuery(q, self.index) + + def _binary_operation(self, op, n): + q = u"Range(frame='%s', %s %s %d)" % (self.frame_name, self.name, op, n) + return PQLQuery(q, self.index) diff --git a/tests/test_orm.py b/tests/test_orm.py index f12ee0e..15a6dc4 100644 --- a/tests/test_orm.py +++ b/tests/test_orm.py @@ -34,7 +34,7 @@ import unittest from datetime import datetime -from pilosa import PilosaError, Index, TimeQuantum, CacheType, RangeField, ValidationError +from pilosa import PilosaError, Index, TimeQuantum, CacheType, IntField, ValidationError from pilosa.orm import Schema schema = Schema() @@ -238,7 +238,6 @@ def test_other_class_not_equals(self): schema = Schema() self.assertNotEqual(sampleFrame, schema) - def test_bitmap(self): qry1 = sampleFrame.bitmap(5) self.assertEquals( @@ -332,17 +331,52 @@ def test_set_row_attributes(self): "SetRowAttrs(project=5, frame='collaboration', active=true, quote=\"\\\"Don't worry, be happy\\\"\")", q.serialize()) - def test_set_field_value(self): - q = collabFrame.set_field_value(50, "foo", 15) + def test_field(self): + # only a single instance of a field should exist + field1 = sampleFrame.field("the-field") + field2 = sampleFrame.field("the-field") + self.assertTrue(id(field1) == id(field2)) + + def test_field_lt(self): + q = sampleFrame.field("foo").lt(10) self.assertEquals( - "SetFieldValue(frame='collaboration', user=50, foo=15)", + "Range(frame='sample-frame', foo < 10)", q.serialize()) - def test_sum(self): - b = collabFrame.bitmap(42) - q = sampleFrame.sum(b, "foo") + def test_field_lte(self): + q = sampleFrame.field("foo").lte(10) + self.assertEquals( + "Range(frame='sample-frame', foo <= 10)", + q.serialize()) + + def test_field_gt(self): + q = sampleFrame.field("foo").gt(10) + self.assertEquals( + "Range(frame='sample-frame', foo > 10)", + q.serialize()) + + def test_field_gte(self): + q = sampleFrame.field("foo").gte(10) + self.assertEquals( + "Range(frame='sample-frame', foo >= 10)", + q.serialize()) + + def test_field_between(self): + q = sampleFrame.field("foo").between(10, 20) + self.assertEquals( + "Range(frame='sample-frame', foo >< [10,20])", + q.serialize()) + + def test_field_set_value(self): + q = sampleFrame.field("foo").set_value(10, 20) + self.assertEquals( + "SetFieldValue(frame='sample-frame', columnID=10, foo=20)", + q.serialize()) + + def test_field_sum(self): + q = sampleFrame.field("foo").sum(sampleFrame.bitmap(10)) self.assertEquals( - "Sum(Bitmap(project=42, frame='collaboration'), frame='sample-frame', field='foo')", + "Sum(Bitmap(rowID=10, frame='sample-frame'), frame='sample-frame', field='foo')", q.serialize()) def test_get_options_string(self): @@ -351,7 +385,7 @@ def test_get_options_string(self): inverse_enabled=True, cache_type=CacheType.RANKED, cache_size=1000, - fields=[RangeField.int("foo"), RangeField.int("bar", min=-1, max=1)]) + fields=[IntField.int("foo"), IntField.int("bar", min=-1, max=1)]) target = '{"options": {"cacheSize": 1000, "cacheType": "ranked", "fields": [{"max": 100, "min": 0, "name": "foo", "type": "int"}, {"max": 1, "min": -1, "name": "bar", "type": "int"}], "inverseEnabled": true, "rangeEnabled": true, "rowLabel": "rowID", "timeQuantum": "DH"}}' self.assertEquals(target, frame._get_options_string()) @@ -383,4 +417,4 @@ def test_equality(self): class RangeFieldTestCase(unittest.TestCase): def test_min_greater_equals_max_fails(self): - self.assertRaises(ValidationError, RangeField.int, "foo", min=10, max=9) \ No newline at end of file + self.assertRaises(ValidationError, IntField.int, "foo", min=10, max=9) \ No newline at end of file