diff --git a/doc/index.rst b/doc/index.rst index 33f4e66..0157b49 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -18,7 +18,7 @@ Python client for `Pilosa `_ high performance distribute Requirements ------------ -- Python 2.6 and higher or Python 3.3 and higher +- Python 2.7 and higher or Python 3.4 and higher. Install ------- diff --git a/docs/data-model-queries.md b/docs/data-model-queries.md index 0c7decd..70ebb92 100644 --- a/docs/data-model-queries.md +++ b/docs/data-model-queries.md @@ -109,6 +109,8 @@ Index: * `count(self, row)` * `set_column_attrs(self, column_id, attrs)` * `xor(self, *rows)` +* `not_(self, row)` +* `options(self, row_query, column_attrs=False, exclude_columns=False, exclude_row_attrs=False, shards=[])` Field: @@ -127,3 +129,5 @@ Field: * `min(self, row=None)` * `max(self, row=None)` * `setvalue(self, column_id, value)` +* `store(self, row_query, row)` +* `clear_row(self, row)` diff --git a/docs/imports.md b/docs/imports.md index a2b51e9..f6b881e 100644 --- a/docs/imports.md +++ b/docs/imports.md @@ -1,6 +1,6 @@ # Importing Data -If you have large amounts of data, it is more efficient to import it to Pilosa instead of several `Set` queries. +If you have large amounts of data, it is more efficient to import it to Pilosa instead of several `Set` or `Clear` queries. `pilosa.imports` module defines several format functions. Depending on the data, the following format is expected: * `row_id_column_id`: `ROW_ID,COLUMN_ID` @@ -17,10 +17,10 @@ ROW_ID,COLUMN_ID,TIMESTAMP Note that, each line corresponds to a single bit and the lines end with a new line (`\n` or `\r\n`). The target index and field must have been created before hand. -Here's some sample code that uses `row_id_column_id` formatter: +Here's some sample code that uses `csv_row_id_column_id` formatter: ```python import pilosa -from pilosa.imports import csv_column_reader, row_id_column_id +from pilosa.imports import csv_column_reader, csv_row_id_column_id try: # python 2.7 and 3 @@ -35,7 +35,7 @@ text = u""" 3,41,683793385 10,10485760,683793385 """ -reader = csv_column_reader(StringIO(text), row_id_column_id) +reader = csv_column_reader(StringIO(text), csv_row_id_column_id) client = pilosa.Client() schema = client.schema() index = schema.index("sample-index") @@ -43,3 +43,13 @@ field = index.field("sample-field", time_quantum=pilosa.TimeQuantum.YEAR_MONTH_D client.sync_schema(schema) client.import_field(field, reader) ``` + +`client.import_field` function imports `Set` bits by default. If you want to import `Clear` bits instead, pass `clear=True`: +```python +client.import_field(field, reader, clear=True) +``` + +Pilosa supports a fast way of importing bits for row ID/Column ID data by transferring bits from the client to the server by packing bits into a roaring bitmap. You can enable that by passing `fast_import=True`: +```python +client.import_field(field, reader, fast_import=True) +``` diff --git a/docs/server-interaction.md b/docs/server-interaction.md index b67b8f2..882a5d6 100644 --- a/docs/server-interaction.md +++ b/docs/server-interaction.md @@ -152,7 +152,7 @@ changed = result.changed ## SSL/TLS -Make sure the Pilosa server runs on a TLS address. [How To Set Up a Secure Cluster](https://www.pilosa.com/docs/latest/tutorials/#how-to-set-up-a-secure-cluster) tutorial explains how to do that. +Make sure the Pilosa server runs on a TLS address. [How To Set Up a Secure Cluster](https://www.pilosa.com/docs/latest/tutorials/#setting-up-a-secure-cluster) tutorial explains how to do that. In order to enable TLS support on the client side, the scheme of the address should be explicitly specified as `https`, e.g.: `https://01.pilosa.local:10501` diff --git a/pilosa/client.py b/pilosa/client.py index 562430d..10dcc7c 100644 --- a/pilosa/client.py +++ b/pilosa/client.py @@ -70,10 +70,16 @@ class Client(object): # Create a Client instance client = pilosa.Client() + # Load the schema from the Pilosa server + schema = client.schema() + # Create an Index instance - index = pilosa.Index("repository") + index = schema.Index("repository") + # Create a Field instance stargazer = index.field("stargazer") + + # Execute a query response = client.query(stargazer.row(5)) # Act on the result @@ -88,6 +94,21 @@ class Client(object): def __init__(self, cluster_or_uri=None, connect_timeout=30000, socket_timeout=300000, pool_size_per_route=10, pool_size_total=100, retry_count=3, tls_skip_verify=False, tls_ca_certificate_path=""): + """Creates a Client. + + :param object cluster_or_uri: A ``pilosa.Cluster`` or ``pilosa.URI` instance + :param int connect_timeout: The maximum amount of time in milliseconds to wait for a connection attempt to a server + to succeed + :param int socket_timeout: The maximum amount of time in milliseconds to wait between consecutive + read operations for a response from the server + :param int pool_size_per_route: Number of connections in the pool per server + :param int pool_size_total: Total number of connections in the pool + :param int retry_count: Number of connection trials + :param bool tls_skip_verify: Do not verify the TLS certificate of the server (Not recommended for production) + :param str tls_ca_certificate_path: Server's TLS certificate (Useful when using self-signed certificates) + + * See `Pilosa Python Client/Server Interaction `_. + """ if cluster_or_uri is None: self.cluster = Cluster(URI()) elif isinstance(cluster_or_uri, Cluster): @@ -114,7 +135,7 @@ def __init__(self, cluster_or_uri=None, connect_timeout=30000, socket_timeout=30 def query(self, query, column_attrs=False, exclude_columns=False, exclude_attrs=False, shards=None): """Runs the given query against the server with the given options. - + :param pilosa.PqlQuery query: a PqlQuery object with a non-null index :param bool column_attrs: Enables returning column data from row queries :param bool exclude_columns: Disables returning columns from row queries @@ -149,7 +170,7 @@ def query(self, query, column_attrs=False, exclude_columns=False, exclude_attrs= def create_index(self, index): """Creates an index on the server using the given Index object. - + :param pilosa.Index index: :raises pilosa.IndexExistsError: if there already is a index with the given name """ @@ -164,7 +185,7 @@ def create_index(self, index): def delete_index(self, index): """Deletes the given index on the server. - + :param pilosa.Index index: :raises pilosa.PilosaError: if the index does not exist """ @@ -173,7 +194,7 @@ def delete_index(self, index): def create_field(self, field): """Creates a field on the server using the given Field object. - + :param pilosa.Field field: :raises pilosa.FieldExistsError: if there already is a field with the given name """ @@ -189,7 +210,7 @@ def create_field(self, field): def delete_field(self, field): """Deletes the given field on the server. - + :param pilosa.Field field: :raises pilosa.PilosaError: if the field does not exist """ @@ -198,7 +219,7 @@ def delete_field(self, field): def ensure_index(self, index): """Creates an index on the server if it does not exist. - + :param pilosa.Index index: """ try: @@ -208,7 +229,7 @@ def ensure_index(self, index): def ensure_field(self, field): """Creates a field on the server if it does not exist. - + :param pilosa.Field field: """ try: @@ -221,6 +242,11 @@ def _read_schema(self): return json.loads(response.data.decode('utf-8')).get("indexes") or [] def schema(self): + """Loads the schema from the server. + + :return: a Schema instance. + :rtype: pilosa.Schema + """ schema = Schema() for index_info in self._read_schema(): index = schema.index(index_info["name"]) @@ -233,6 +259,12 @@ def schema(self): return schema def sync_schema(self, schema): + """Syncs the given schema with the server. + + Loads new indexes/fields from the server and creates indexes/fields not existing on the server. Does not delete remote indexes/fields/ + + :param pilosa.Schema schema: Local schema to be synced + """ server_schema = self.schema() # find out local - remote schema @@ -259,9 +291,10 @@ def sync_schema(self, schema): def import_field(self, field, bit_reader, batch_size=100000, fast_import=False, clear=False): """Imports a field using the given bit reader - :param field: - :param bit_reader: - :param batch_size: + :param pilosa.Field field: The field to import into + :param object bit_reader: An iterator that returns a bit on each call + :param int batch_size: Number of bits to read from the bit reader before posting them to the server + :param bool fast_import: Enables fast import for data with columnID/rowID bits :param clear: clear bits instead of setting them """ for shard, columns in batch_columns(bit_reader, batch_size): @@ -272,11 +305,11 @@ def http_request(self, method, path, data=None, headers=None): NOTE: This function is experimental and may be removed in later revisions. - :param method: HTTP method - :param path: Request path - :param data: Request body + :param str method: HTTP method + :param str path: Request path + :param bytes data: Request body :param headers: Request headers - :return HTTP response: + :return: HTTP response """ return self.__http_request(method, path, data=data, headers=headers) @@ -447,6 +480,8 @@ class URI: :param str scheme: is the scheme of the Pilosa Server, such as ``http`` or ``https`` :param str host: is the hostname or IP address of the Pilosa server. IPv6 addresses should be enclosed in brackets, e.g., ``[fe00::0]``. :param int port: is the port of the Pilosa server + + * See `Pilosa Python Client/Server Interaction `_. """ __PATTERN = re.compile("^(([+a-z]+):\\/\\/)?([0-9a-z.-]+|\\[[:0-9a-fA-F]+\\])?(:([0-9]+))?$") @@ -458,7 +493,7 @@ def __init__(self, scheme="http", host="localhost", port=10101): @classmethod def address(cls, address): """ Creates a URI from an address. - + :param str address: of the form ``${SCHEME}://${HOST}:{$PORT}`` :return: a Pilosa URI :type: pilosa.URI @@ -509,7 +544,7 @@ def __eq__(self, other): class Cluster: """Contains hosts in a Pilosa cluster. - + :param hosts: URIs of hosts. Leaving out hosts creates the default cluster """ @@ -521,7 +556,7 @@ def __init__(self, *hosts): def add_host(self, uri): """Makes a host available. - + :param pilosa.URI uri: """ with self.__lock: @@ -535,7 +570,7 @@ def add_host(self, uri): def remove_host(self, uri): """Makes a host unavailable. - + :param pilosa.URI uri: """ with self.__lock: @@ -545,9 +580,9 @@ def remove_host(self, uri): def get_host(self): """Returns the next host in the cluster. - + :return: next host - :rtype: pilosa.URI + :rtype: pilosa.URI """ for host, ok in self.hosts: if not ok: diff --git a/pilosa/orm.py b/pilosa/orm.py index aa40258..602be92 100644 --- a/pilosa/orm.py +++ b/pilosa/orm.py @@ -47,9 +47,9 @@ class TimeQuantum: - """Valid time quantum values for fields having support for that. + """Valid time quantum values. - * See: `Data Model `_ + * See: `Data Model/Time Quantum `_ """ NONE = None @@ -89,9 +89,16 @@ def __eq__(self, other): class CacheType: + """Cache type for set and mutex fields. + * See: `Data Model/Ranked `_ + """ + + #: Use the default cache type for the server DEFAULT = None + #: The LRU cache maintains the most recently accessed Rows. See: `Data Model/LRU `_ LRU = None + #: Ranked Fields maintain a sorted cache of column counts by Row ID. `Data Model/Ranked `_ RANKED = None def __init__(self, value): @@ -241,6 +248,8 @@ def raw_query(self, query): Note that the query is not validated before sending to the server. + Raw queries may be less efficient than the corresponding ORM query, since they are only sent to the coordinator node. + :param str query: :return: Pilosa query :rtype: pilosa.PQLQuery @@ -253,6 +262,10 @@ def raw_query(self, query): def batch_query(self, *queries): """Creates a batch query. + Using batch queries is more efficient than sending each query individually. + + If you are sending a large amount of ``Set`` or ``Clear`` queries, it is more efficient to import them instead of using a batch query. + :param pilosa.PQLQuery queries: the queries in the batch :return: Pilosa batch query :rtype: pilosa.PQLBatchQuery @@ -269,6 +282,8 @@ def union(self, *rows): :param pilosa.PQLQuery rows: 0 or more row queries to union :return: Pilosa row query :rtype: pilosa.PQLQuery + + * See `Query Language/Union `_ """ return self._row_op("Union", rows) @@ -281,6 +296,8 @@ def intersect(self, *rows): :return: Pilosa row query :rtype: pilosa.PQLQuery :raise PilosaError: if the number of rows is less than 1 + + * See `Query Language/Intersect `_ """ if len(rows) < 1: raise PilosaError("Number of row queries should be greater than or equal to 1") @@ -296,24 +313,40 @@ def difference(self, *rows): :return: Pilosa row query :rtype: pilosa.PQLQuery :raise PilosaError: if the number of rows is less than 1 + + * See `Query Language/Difference `_ """ if len(rows) < 1: raise PilosaError("Number of row queries should be greater than or equal to 1") return self._row_op("Difference", rows) def xor(self, *rows): - """Creates a ``Xor`` query. + """Creates an ``Xor`` query. + + ``Xor`` performs a logical XOR on the results of each ROW_CALL query passed to it. :param pilosa.PQLQuery rows: 2 or more row queries to xor :return: Pilosa row query :rtype: pilosa.PQLQuery :raise PilosaError: if the number of rows is less than 2 + + * See `Query Language/Xor `_ """ if len(rows) < 2: raise PilosaError("Number of row queries should be greater than or equal to 2") return self._row_op("Xor", rows) def not_(self, row): + """Creates a ``Not`` query. + + ``Not`` returns the inverse of all of the bits from the ROW_CALL argument. The ``Not`` query requires that ``track_existence`` has been enabled on the Index (the default). + + :param pilosa.PQLQuery row: a row query + :return: Pilosa row query + :rtype: pilosa.PQLQuery + + * See `Query Language/Not `_ + """ return PQLQuery(u"Not(%s)" % row.serialize().query, self) def count(self, row): @@ -324,11 +357,13 @@ def count(self, row): :param pilosa.PQLQuery row: the row query :return: Pilosa query :rtype: pilosa.PQLQuery + + * See `Query Language/Count `_ """ return PQLQuery(u"Count(%s)" % row.serialize().query, self) def set_column_attrs(self, col, attrs): - """Creates a SetColumnAttrs query. + """Creates a ``SetColumnAttrs`` query. ``SetColumnAttrs`` associates arbitrary key/value pairs with a column in an index. @@ -343,6 +378,8 @@ def set_column_attrs(self, col, attrs): :param dict attrs: column attributes :return: Pilosa query :rtype: pilosa.PQLQuery + + * See `Query Language/SetColumnAttrs `_ """ col_str = idkey_as_str(col) attrs_str = _create_attributes_str(attrs) @@ -352,6 +389,24 @@ def set_column_attrs(self, col, attrs): return q def options(self, row_query, column_attrs=False, exclude_columns=False, exclude_row_attrs=False, shards=None): + """Creates an ``Options`` query. + + Modifies the given query as follows: + * ``columnAttrs``: Include column attributes in the result (Default: false). + * ``excludeColumns``: Exclude column IDs from the result (Default: false). + * ``excludeRowAttrs``: Exclude row attributes from the result (Default: false). + * ``shards``: Run the query using only the data from the given shards. By default, the entire data set (i.e. data from all shards) is used. + + :param bool column_attrs: Include column attributes in the result (Default: ``False``). + :param bool exclude_columns: Exclude column IDs from the result (Default: ``False``). + :param bool exclude_row_attrs: Exclude row attributes from the result (Default: ``False``). + :param bool shards: Run the query using only the data from the given shards. By default, the entire data set (i.e. data from all shards) is used. + :return: Pilosa query + :rtype: pilosa.PQLQuery + + * See `Query Language/Options `_ + """ + make_bool = lambda b: "true" if b else "false" serialized_options = u"columnAttrs=%s,excludeColumns=%s,excludeRowAttrs=%s" % \ (make_bool(column_attrs), make_bool(exclude_columns), make_bool(exclude_row_attrs)) @@ -444,21 +499,25 @@ def row(self, row_idkey): :param int row_idkey: :return: Pilosa row query :rtype: pilosa.PQLQuery + + * See `Query Language/Row `_ """ row_str = idkey_as_str(row_idkey) fmt = u"Row(%s=%s)" return PQLQuery(fmt % (self.name, row_str), self.index) def set(self, row, col, timestamp=None): - """Creates a SetBit query. + """Creates a Set query. - ``SetBit`` assigns a value of 1 to a bit in the binary matrix, thus associating the given row in the given field with the given column. + ``Set`` assigns a value of 1 to a bit in the binary matrix, thus associating the given row in the given field with the given column. :param int row: :param int col: :param pilosa.TimeStamp timestamp: :return: Pilosa query :rtype: pilosa.PQLQuery + + * See `Query Language/Set `_ """ row_str = idkey_as_str(row) col_str = idkey_as_str(col) @@ -467,14 +526,16 @@ def set(self, row, col, timestamp=None): return PQLQuery(fmt % (col_str, self.name, row_str, ts), self.index) def clear(self, row, col): - """Creates a ClearBit query. + """Creates a Clear query. - ``ClearBit`` assigns a value of 0 to a bit in the binary matrix, thus disassociating the given row in the given field from the given column. + ``Clear`` assigns a value of 0 to a bit in the binary matrix, thus disassociating the given row in the given field from the given column. :param int row: :param int col: :return: Pilosa query :rtype: pilosa.PQLQuery + + * See `Query Language/Clear `_ """ row_str = idkey_as_str(row) col_str = idkey_as_str(col) @@ -492,6 +553,8 @@ def topn(self, n, row=None, name="", *values): :param pilosa.PQLQuery row: a PQL Row query :param str name: only return rows which have the attribute specified by attribute name :param object values: filter values to be matched against the attribute name + + * See `Query Language/TopN `_ """ parts = [self.name] if row: @@ -514,6 +577,8 @@ def range(self, row, start, end): :param int row: :param datetime.datetime start: start timestamp :param datetime.datetime end: end timestamp + + * See `Query Language/Range `_ """ row_str = idkey_as_str(row) start_str = start.strftime(_TIME_FORMAT) @@ -538,6 +603,8 @@ def set_row_attrs(self, row, attrs): :param dict attrs: row attributes :return: Pilosa query :rtype: pilosa.PQLQuery + + * See `Query Language/SetRowAttrs `_ """ row_str = idkey_as_str(row) attrs_str = _create_attributes_str(attrs) @@ -547,18 +614,30 @@ def set_row_attrs(self, row, attrs): def store(self, row_query, row): """Creates a Store query. - Store writes the result of the row query to the specified row. If the row already exists, it will be replaced. The destination field must be of field type set. + ``Store`` writes the result of the row query to the specified row. If the row already exists, it will be replaced. The destination field must be of field type set. :param row_query: - :param row: + :param row: ID or key of the target row :return: Pilosa query :rtype: pilosa.PQLQuery + + * See `Query Language/Store `_ """ row_str = idkey_as_str(row) fmt = u"Store(%s,%s=%s)" return PQLQuery(fmt % (row_query.serialize().query, self.name, row_str), self.index) def clear_row(self, row): + """Creates a ClearRow query. + + ``ClearRow`` sets all bits to 0 in a given row of the binary matrix, thus disassociating the given row in the given field from all columns. + + :param row: ID or key of the target row + :return: Pilosa query + :rtype: pilosa.PQLQuery + + * See `Query Language/ClearRow `_ + """ row_str = idkey_as_str(row) fmt = u"ClearRow(%s=%s)" return PQLQuery(fmt % (self.name, row_str), self.index) @@ -569,6 +648,8 @@ def lt(self, n): :param n: The value to compare :return: a PQL query :rtype: PQLQuery + + * See `Query Language/Range `_ """ return self._binary_operation("<", n) @@ -578,6 +659,8 @@ def lte(self, n): :param n: The value to compare :return: a PQL query :rtype: PQLQuery + + * See `Query Language/Range `_ """ return self._binary_operation("<=", n) @@ -587,6 +670,8 @@ def gt(self, n): :param n: The value to compare :return: a PQL query :rtype: PQLQuery + + * See `Query Language/Range `_ """ return self._binary_operation(">", n) @@ -596,6 +681,8 @@ def gte(self, n): :param n: The value to compare :return: a PQL query :rtype: PQLQuery + + * See `Query Language/Range `_ """ return self._binary_operation(">=", n) @@ -605,6 +692,8 @@ def equals(self, n): :param n: The value to compare :return: a PQL query :rtype: PQLQuery + + * See `Query Language/Range `_ """ return self._binary_operation("==", n) @@ -614,6 +703,8 @@ def not_equals(self, n): :param n: The value to compare :return: a PQL query :rtype: PQLQuery + + * See `Query Language/Range `_ """ return self._binary_operation("!=", n) @@ -622,6 +713,8 @@ def not_null(self): :return: a PQL query :rtype: PQLQuery + + * See `Query Language/Range `_ """ q = u"Range(%s != null)" % self.name return PQLQuery(q, self.index) @@ -633,6 +726,8 @@ def between(self, a, b): :param b: Closed range end :return: a PQL query :rtype: PQLQuery + + * See `Query Language/Range `_ """ q = u"Range(%s >< [%d,%d])" % (self.name, a, b) return PQLQuery(q, self.index) @@ -643,6 +738,8 @@ def sum(self, row=None): :param row: The row query to use. :return: a PQL query :rtype: PQLQuery + + * See `Query Language/Sum `_ """ return self._value_query("Sum", row) @@ -652,6 +749,8 @@ def min(self, row=None): :param row: The row query to use. :return: a PQL query :rtype: PQLQuery + + * See `Query Language/Min `_ """ return self._value_query("Min", row) @@ -661,6 +760,8 @@ def max(self, row=None): :param row: The row query to use. :return: a PQL query :rtype: PQLQuery + + * See `Query Language/Max `_ """ return self._value_query("Max", row) @@ -671,6 +772,8 @@ def setvalue(self, col, value): :param value: the value to assign to the field :return: a PQL query :rtype: PQLQuery + + * See `Query Language/SetValue `_ """ col_str = idkey_as_str(col) q = u"Set(%s,%s=%d)" % (col_str, self.name, value)