From 69567bb64676319af6d877b779495b0dd2000526 Mon Sep 17 00:00:00 2001 From: dollylipare Date: Wed, 20 Jan 2021 13:35:37 +0000 Subject: [PATCH 01/11] ibis_cloud_spanner folder added --- .../ibis/ibis_cloud_spanner/__init__.py | 2 + third_party/ibis/ibis_cloud_spanner/api.py | 93 ++++ third_party/ibis/ibis_cloud_spanner/client.py | 439 ++++++++++++++++++ .../ibis/ibis_cloud_spanner/compiler.py | 141 ++++++ .../ibis/ibis_cloud_spanner/dataset.py | 149 ++++++ .../ibis/ibis_cloud_spanner/datatypes.py | 98 ++++ third_party/ibis/ibis_cloud_spanner/table.py | 353 ++++++++++++++ .../ibis/ibis_cloud_spanner/to_pandas.py | 82 ++++ 8 files changed, 1357 insertions(+) create mode 100644 third_party/ibis/ibis_cloud_spanner/__init__.py create mode 100644 third_party/ibis/ibis_cloud_spanner/api.py create mode 100644 third_party/ibis/ibis_cloud_spanner/client.py create mode 100644 third_party/ibis/ibis_cloud_spanner/compiler.py create mode 100644 third_party/ibis/ibis_cloud_spanner/dataset.py create mode 100644 third_party/ibis/ibis_cloud_spanner/datatypes.py create mode 100644 third_party/ibis/ibis_cloud_spanner/table.py create mode 100644 third_party/ibis/ibis_cloud_spanner/to_pandas.py diff --git a/third_party/ibis/ibis_cloud_spanner/__init__.py b/third_party/ibis/ibis_cloud_spanner/__init__.py new file mode 100644 index 000000000..139597f9c --- /dev/null +++ b/third_party/ibis/ibis_cloud_spanner/__init__.py @@ -0,0 +1,2 @@ + + diff --git a/third_party/ibis/ibis_cloud_spanner/api.py b/third_party/ibis/ibis_cloud_spanner/api.py new file mode 100644 index 000000000..199998d41 --- /dev/null +++ b/third_party/ibis/ibis_cloud_spanner/api.py @@ -0,0 +1,93 @@ +# Copyright 2021 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + + +"""CloudScanner public API.""" + + +from third_party.ibis.ibis_cloud_spanner.client import CloudSpannerClient +from third_party.ibis.ibis_cloud_spanner.compiler import dialect +from ibis.config import options +from typing import Optional + +import google.cloud.spanner # noqa: F401, fail early if spanner is missing +import ibis.common.exceptions as com +import pydata_google_auth + + + + + +__all__ = ('compile', 'connect', 'verify', '' + '' + '' + '' + '' + '') + + +def compile(expr, params=None): + """Compile an expression for Cloud Spanner. + + Returns + ------- + compiled : str + + See Also + -------- + ibis.expr.types.Expr.compile + + """ + from third_party.ibis.ibis_cloud_spanner.compiler import to_sql + + return to_sql(expr, dialect.make_context(params=params)) + + +def verify(expr, params=None): + """Check if an expression can be compiled using Cloud Spanner.""" + try: + compile(expr, params=params) + return True + except com.TranslationError: + return False + + + + +def connect( + instance_id: Optional[str] = None, + database_id: Optional[str] = None, + +) -> CloudSpannerClient: + """Create a CloudSpannerClient for use with Ibis. + + Parameters + ---------- + instance_id : str + A Cloud Spanner Instance id. + database_id : str + A database id that lives inside of the Cloud Spanner Instance indicated by + `instance_id`. + + Returns + ------- + CloudSpannerClient + + """ + + return CloudSpannerClient( + instance_id=instance_id, database_id=database_id + ) + diff --git a/third_party/ibis/ibis_cloud_spanner/client.py b/third_party/ibis/ibis_cloud_spanner/client.py new file mode 100644 index 000000000..9ab05bd70 --- /dev/null +++ b/third_party/ibis/ibis_cloud_spanner/client.py @@ -0,0 +1,439 @@ +# Copyright 2021 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Cloud Spanner ibis client implementation.""" + +import datetime +from collections import OrderedDict +from typing import Optional, Tuple + +import google.cloud.spanner as cs +from google.cloud import spanner +import pandas as pd +import regex as re +from google.api_core.exceptions import NotFound +from multipledispatch import Dispatcher +from pkg_resources import parse_version + +import ibis +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt +import ibis.expr.lineage as lin +import ibis.expr.operations as ops +import ibis.expr.schema as sch +import ibis.expr.types as ir +from third_party.ibis.ibis_cloud_spanner import compiler as comp +from third_party.ibis.ibis_cloud_spanner.datatypes import ibis_type_to_cloud_spanner_type +from ibis.client import Database, Query, SQLClient +from third_party.ibis.ibis_cloud_spanner import dataset as dataset_class +from third_party.ibis.ibis_cloud_spanner import table + +from google.cloud import spanner +from google.cloud import spanner +from pandas import DataFrame +from third_party.ibis.ibis_cloud_spanner.to_pandas import pandas_df + + +def parse_instance_and_dataset( + instance: str, dataset: Optional[str] = None +) -> Tuple[str, str, Optional[str]]: + try: + data_instance, dataset = dataset.split('.') + except (ValueError, AttributeError): + billing_instance = data_instance = instance + else: + billing_instance = instance + + return data_instance, billing_instance, dataset + +class CloudSpannerTable(ops.DatabaseTable): + pass + +def _find_scalar_parameter(expr): + """Find all :class:`~ibis.expr.types.ScalarParameter` instances. + + Parameters + ---------- + expr : ibis.expr.types.Expr + + Returns + ------- + Tuple[bool, object] + The operation and the parent expresssion's resolved name. + + """ + op = expr.op() + + if isinstance(op, ops.ScalarParameter): + result = op, expr.get_name() + else: + result = None + return lin.proceed, result + +def convert_to_cs_type(dtype): + if (dtype == 'FLOAT64'): + return spanner.param_types.FLOAT64 + elif (dtype == 'INT64'): + return spanner.param_types.INT64 + elif (dtype == 'DATE'): + return spanner.param_types.DATE + elif (dtype == 'TIMESTAMP'): + return spanner.param_types.TIMESTAMP + elif (dtype == 'NUMERIC'): + return spanner.param_types.NUMERIC + elif (dtype == 'INT64'): + return spanner.param_types.INT64 + else: + return spanner.param_types.STRING + +cloud_spanner_param = Dispatcher('cloud_spanner_param') + + +@cloud_spanner_param.register(ir.ArrayValue, list) +def cs_param_array(param, value): + param_type = param.type() + assert isinstance(param_type, dt.Array), str(param_type) + + try: + spanner_type = ibis_type_to_cloud_spanner_type(param_type.value_type) + except NotImplementedError: + raise com.UnsupportedBackendType(param_type) + else: + if isinstance(param_type.value_type, dt.Struct): + raise TypeError('ARRAY> is not supported in Cloud Spanner') + elif isinstance(param_type.value_type, dt.Array): + raise TypeError('ARRAY> is not supported in Cloud Spanner') + else: + query_value = value + + params={param.get_name(): query_value}, + param_types={param.get_name(): convert_to_cs_type(spanner_type)} + final_dict={ + 'params':params, + 'param_types':param_types + } + + return final_dict + + +@cloud_spanner_param.register( + ir.TimestampScalar, (str, datetime.datetime, datetime.date) +) +def cs_param_timestamp(param, value): + assert isinstance(param.type(), dt.Timestamp), str(param.type()) + + timestamp_value = pd.Timestamp(value, tz='UTC').to_pydatetime() + params={param.get_name(): timestamp_value}, + param_types={param.get_name(): spanner.param_types.TIMESTAMP} + final_dict={ + 'params':params[0], + 'param_types':param_types + + } + return final_dict + + +@cloud_spanner_param.register(ir.StringScalar, str) +def cs_param_string(param, value): + params={param.get_name(): value}, + param_types={param.get_name(): spanner.param_types.STRING} + final_dict={ + 'params':params[0], + 'param_types':param_types + + } + return final_dict + + + +@cloud_spanner_param.register(ir.IntegerScalar, int) +def cs_param_integer(param, value): + params={param.get_name(): value}, + param_types={param.get_name(): spanner.param_types.INT64} + final_dict={ + 'params':params[0], + 'param_types':param_types + + } + return final_dict + + + +@cloud_spanner_param.register(ir.FloatingScalar, float) +def cs_param_double(param, value): + params={param.get_name(): value}, + param_types={param.get_name(): spanner.param_types.FLOAT64} + final_dict={ + 'params':params[0], + 'param_types':param_types + + } + return final_dict + + +@cloud_spanner_param.register(ir.BooleanScalar, bool) +def cs_param_boolean(param, value): + params={param.get_name(): value}, + param_types={param.get_name(): spanner.param_types.BOOL} + final_dict={ + 'params':params[0], + 'param_types':param_types + + } + return final_dict + + + + +@cloud_spanner_param.register(ir.DateScalar, str) +def cs_param_date_string(param, value): + params={param.get_name(): pd.Timestamp(value).to_pydatetime().date()}, + param_types={param.get_name(): spanner.param_types.DATE} + final_dict={ + 'params':params[0], + 'param_types':param_types + + } + return final_dict + + +@cloud_spanner_param.register(ir.DateScalar, datetime.datetime) +def cs_param_date_datetime(param, value): + params={param.get_name(): value.date()}, + param_types={param.get_name(): spanner.param_types.DATE} + final_dict={ + 'params':params[0], + 'param_types':param_types + + } + return final_dict + + +@cloud_spanner_param.register(ir.DateScalar, datetime.date) +def cs_param_date(param, value): + params={param.get_name(): value}, + param_types={param.get_name(): spanner.param_types.DATE} + final_dict={ + 'params':params[0], + 'param_types':param_types + + } + return final_dict + + +class CloudSpannerQuery(Query): + def __init__(self, client, ddl, query_parameters=None): + super().__init__(client, ddl) + + # self.expr comes from the parent class + query_parameter_names = dict( + lin.traverse(_find_scalar_parameter, self.expr) + ) + + self.query_parameters = [ + cloud_spanner_param( + param.to_expr().name(query_parameter_names[param]), value + ) + for param, value in (query_parameters or {}).items() + ] + + + def execute(self): + dataframe_output = self.client._execute(self.compiled_sql,results=True,query_parameters=self.query_parameters) + + return dataframe_output + +class CloudSpannerDatabase(Database): + """A Cloud scanner dataset.""" + + +class CloudSpannerClient(SQLClient): + """An ibis CloudSpanner client implementation.""" + + query_class = CloudSpannerQuery + database_class = CloudSpannerDatabase + table_class = CloudSpannerTable + dialect = comp.CloudSpannerDialect + + def __init__(self, instance_id, database_id=None, credentials=None): + """Construct a CloudSpannerClient. + + Parameters + ---------- + instance_id : str + A instance name + database_id : Optional[str] + A ``.`` string or just a dataset name + + + """ + self.spanner_client = spanner.Client() + self.instance = self.spanner_client.instance(instance_id) + self.database_name = self.instance.database(database_id) + ( + self.data_instance, + self.billing_instance, + self.dataset, + ) = parse_instance_and_dataset(instance_id, database_id) + self.client = cs.Client() + + + + def _parse_instance_and_dataset(self, dataset): + if not dataset and not self.dataset: + raise ValueError("Unable to determine Cloud Spanner dataset.") + instance, _, dataset = parse_instance_and_dataset( + self.billing_instance, + dataset or '{}.{}'.format(self.data_instance, self.dataset), + ) + return instance, dataset + + def get_data_using_query(self, query, results=False): + return self._execute(query, results=results) + + @property + def instance_id(self): + return self.data_instance + + @property + def dataset_id(self): + return self.dataset + + def table(self,name,database=None): + t = super().table(name, database=database) + name = t.op().name + instance = self.instance_id + dataset = self.dataset_id + dataset_ref = dataset_class.DatasetReference(instance,dataset) + table_ref = dataset_ref.table(name) + cs_table = table.Table(table_ref) + return t + + def _build_ast(self, expr, context): + result = comp.build_ast(expr, context) + return result + + def _get_query(self, dml, **kwargs): + return self.query_class(self, dml, query_parameters=dml.context.params) + + def _fully_qualified_name(self, name, database): + instance, dataset = self._parse_instance_and_dataset(database) + return "{}".format(name) + + def _get_table_schema(self, qualified_name): + table = qualified_name + dataset = self.dataset_id + assert dataset is not None, "dataset is None" + return self.get_schema(table, database=dataset) + + @property + def current_database(self): + return self.database(self.dataset) + + def list_databases(self, like=None): + databases=self.instance.list_databases() + list_db=[] + for row in databases: + list_db.append((row.name).rsplit('/', 1)[1]) + return list_db + + + + def list_tables(self,like=None,database=None): + if database is None: + db_value = self.dataset_id + else: + db_value = database + db = self.instance.database(db_value) + tables=[] + with db.snapshot() as snapshot: + query="SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES where SPANNER_STATE = 'COMMITTED' " + results = snapshot.execute_sql(query) + for row in results: + tables.append(row[0]) + + if like: + tables = [ + table_name + for table_name in tables + if re.match(like, table_name) is not None + ] + return tables + + def exists_table(self, name, database=None): + if database is None: + db_value = self.dataset_id + else: + db_value = database + db = self.instance.database(db_value) + with db.snapshot() as snapshot: + query = "SELECT EXISTS(SELECT * FROM INFORMATION_SCHEMA.TABLES where TABLE_NAME = '{}' )".format(name) + output = snapshot.execute_sql(query) + result = '' + for row in output: + result = row[0] + return result + + + def get_schema(self, name, database=None): + if database is None: + database = self.dataset_id + instance, dataset = self._parse_instance_and_dataset(database) + dataset_ref = dataset_class.DatasetReference(instance,dataset) + table_ref = dataset_ref.table(name) + cs_table = table.Table(table_ref).schema + return (ibis.schema(cs_table)) + + + def _execute(self, stmt, results=True, query_parameters=None): + + from google.cloud import spanner + spanner_client = spanner.Client() + instance_id = self.instance_id + instance = spanner_client.instance(instance_id) + database_id = self.dataset_id + database_1 = instance.database(database_id) + + with database_1.snapshot() as snapshot: + data_qry = pandas_df.to_pandas(snapshot,stmt,query_parameters) + return data_qry + + + + def database(self, name=None): + if name is None and self.dataset is None: + raise ValueError( + "Unable to determine Cloud Scanner dataset. Call " + "client.database('my_dataset') or set_database('my_dataset') " + "to assign your client a dataset." + ) + return self.database_class(name or self.dataset, self) + + def set_database(self, name): + self.data_instance, self.dataset = self._parse_instance_and_dataset(name) + + def dataset(self,database): + instance = spanner_client.instance(self.data_instance) + database = instance.database(database) + + + def exists_database(self, name): + return self.instance.database(name).exists() + + + + + + + diff --git a/third_party/ibis/ibis_cloud_spanner/compiler.py b/third_party/ibis/ibis_cloud_spanner/compiler.py new file mode 100644 index 000000000..6aef76dc7 --- /dev/null +++ b/third_party/ibis/ibis_cloud_spanner/compiler.py @@ -0,0 +1,141 @@ +# Copyright 2021 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +from functools import partial + +import numpy as np +import regex as re +import toolz +from multipledispatch import Dispatcher + +import ibis +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt +import ibis.expr.lineage as lin +import ibis.expr.operations as ops +import ibis.expr.types as ir +import ibis.sql.compiler as comp +from third_party.ibis.ibis_cloud_spanner.datatypes import ibis_type_to_cloud_spanner_type +from ibis.impala import compiler as impala_compiler +from ibis.impala.compiler import ( + ImpalaSelect, + ImpalaTableSetFormatter, + _reduction, + fixed_arity, + unary, +) + + +class CloudSpannerSelectBuilder(comp.SelectBuilder): + @property + def _select_class(self): + return CloudSpannerSelect + + + +class CloudSpannerUnion(comp.Union): + @staticmethod + def keyword(distinct): + return 'UNION DISTINCT' if distinct else 'UNION ALL' + + + +class CloudSpannerQueryBuilder(comp.QueryBuilder): + + select_builder = CloudSpannerSelectBuilder + union_class = CloudSpannerUnion + + +def build_ast(expr, context): + builder = CloudSpannerQueryBuilder(expr, context=context) + return builder.get_result() + + +def to_sql(expr, context): + query_ast = build_ast(expr, context) + compiled = query_ast.compile() + return compiled + + +class CloudSpannerContext(comp.QueryContext): + def _to_sql(self, expr, ctx): + return to_sql(expr, context=ctx) + + + +_operation_registry = impala_compiler._operation_registry.copy() + + +_invalid_operations = { + ops.Translate, + ops.FindInSet, + ops.Capitalize, + ops.DateDiff, + ops.TimestampDiff, +} + +_operation_registry = { + k: v + for k, v in _operation_registry.items() + if k not in _invalid_operations +} + + +class CloudSpannerExprTranslator(impala_compiler.ImpalaExprTranslator): + _registry = _operation_registry + _rewrites = impala_compiler.ImpalaExprTranslator._rewrites.copy() + + context_class = CloudSpannerContext + + def _trans_param(self, expr): + op = expr.op() + if op not in self.context.params: + raise KeyError(op) + return '@{}'.format(expr.get_name()) + + +compiles = CloudSpannerExprTranslator.compiles +rewrites = CloudSpannerExprTranslator.rewrites + + + + + +class CloudSpannerTableSetFormatter(ImpalaTableSetFormatter): + def _quote_identifier(self, name): + if re.match(r'^[A-Za-z][A-Za-z_0-9]*$', name): + return name + return '`{}`'.format(name) + + +class CloudSpannerSelect(ImpalaSelect): + + translator = CloudSpannerExprTranslator + + @property + def table_set_formatter(self): + return CloudSpannerTableSetFormatter + + + + + +class CloudSpannerDialect(impala_compiler.ImpalaDialect): + translator = CloudSpannerExprTranslator + + +dialect = CloudSpannerDialect + + diff --git a/third_party/ibis/ibis_cloud_spanner/dataset.py b/third_party/ibis/ibis_cloud_spanner/dataset.py new file mode 100644 index 000000000..34be92610 --- /dev/null +++ b/third_party/ibis/ibis_cloud_spanner/dataset.py @@ -0,0 +1,149 @@ +# Copyright 2021 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from third_party.ibis.ibis_cloud_spanner import table + +import six +import copy +import google.cloud._helpers +import ibis +import third_party.ibis.ibis_cloud_spanner + + + +def _get_table_reference(self, table_id): + """Constructs a TableReference. + Args: + table_id (str): The ID of the table. + Returns: + cloud_spanner.table.TableReference: + A table reference for a table in this dataset. + """ + return table.TableReference(self, table_id) + +class DatasetReference(object): + """DatasetReferences are pointers to datasets. + + Args: + instance_id (str): The ID of the instance + dataset_id (str): The ID of the dataset + Raises: + ValueError: If either argument is not of type ``str``. + """ + + def __init__(self, instance_id, dataset_id): + '''if not isinstance(instance_id, six.string_types): + raise ValueError("Pass a string for instance_id") + if not isinstance(dataset_id, six.string_types): + raise ValueError("Pass a string for dataset_id")''' + self._instance_id = instance_id + self._dataset_id = dataset_id + + table = _get_table_reference + + @property + def instance_id(self): + """str: Project ID of the dataset.""" + return self._instance_id + + @property + def dataset_id(self): + """str: Dataset ID.""" + return self._dataset_id + + + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a dataset reference given its API representation + Args: + resource (Dict[str, str]): + Dataset reference resource representation returned from the API + Returns: + cloud_spanner.dataset.DatasetReference: + Dataset reference parsed from ``resource``. + """ + instance_id = resource["instanceId"] + dataset_id = resource["datasetId"] + return cls(instance_id, dataset_id) + + @classmethod + def from_string(cls, dataset_id, default_instance_id=None): + """Construct a dataset reference from dataset ID string. + Args: + dataset_id (str): + A dataset ID in standard SQL format. If ``instance_id`` + is not specified, this must include both the instance ID and + the dataset ID, separated by ``.``. + default_instance_id (Optional[str]): + The instance ID to use when ``dataset_id`` does not include a + instance ID. + Returns: + DatasetReference: + Dataset reference parsed from ``dataset_id``. + + """ + output_dataset_id = dataset_id + output_instance_id = default_instance_id + parts = dataset_id.split(".") + parts = [part for part in parts if part] + + + if len(parts) == 1 and not default_instance_id: + raise ValueError( + "When instance is not set, dataset_id must be a " + "fully-qualified dataset ID in standard SQL format, " + 'e.g., "instance_id.dataset_id" got {}'.format(dataset_id) + ) + elif len(parts) == 2: + output_instance_id, output_dataset_id = parts + elif len(parts) > 2: + raise ValueError( + "Too many parts in dataset_id. Expected a fully-qualified " + "dataset ID in standard SQL format. e.g. " + '"instance _id.dataset_id", got {}'.format(dataset_id) + ) + + return cls(output_instance_id, output_dataset_id) + + def to_api_repr(self): + """Construct the API resource representation of this dataset reference + Returns: + Dict[str, str]: dataset reference represented as an API resource + """ + return {"instanceId": self._instance_id, "datasetId": self._dataset_id} + + def _key(self): + """A tuple key that uniquely describes this field. + Used to compute this instance's hashcode and evaluate equality. + Returns: + Tuple[str]: The contents of this :class:`.DatasetReference`. + """ + return (self._instance_id, self._dataset_id) + + def __eq__(self, other): + if not isinstance(other, DatasetReference): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self._key()) + + def __repr__(self): + return "DatasetReference{}".format(self._key()) + diff --git a/third_party/ibis/ibis_cloud_spanner/datatypes.py b/third_party/ibis/ibis_cloud_spanner/datatypes.py new file mode 100644 index 000000000..b69313a83 --- /dev/null +++ b/third_party/ibis/ibis_cloud_spanner/datatypes.py @@ -0,0 +1,98 @@ +# Copyright 2021 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from multipledispatch import Dispatcher + +import ibis.expr.datatypes as dt + + +class TypeTranslationContext: + """A tag class to allow alteration of the way a particular type is + translated.""" + + __slots__ = () + + +ibis_type_to_cloud_spanner_type = Dispatcher('ibis_type_to_cloud_spanner_type') + + +@ibis_type_to_cloud_spanner_type.register(str) +def trans_string_default(datatype): + return ibis_type_to_cloud_spanner_type(dt.dtype(datatype)) + + +@ibis_type_to_cloud_spanner_type.register(dt.DataType) +def trans_default(t): + return ibis_type_to_cloud_spanner_type(t, TypeTranslationContext()) + + +@ibis_type_to_cloud_spanner_type.register(str, TypeTranslationContext) +def trans_string_context(datatype, context): + return ibis_type_to_cloud_spanner_type(dt.dtype(datatype), context) + + +@ibis_type_to_cloud_spanner_type.register(dt.Floating, TypeTranslationContext) +def trans_float64(t, context): + return 'FLOAT64' + + +@ibis_type_to_cloud_spanner_type.register(dt.Integer, TypeTranslationContext) +def trans_integer(t, context): + return 'INT64' + + +@ibis_type_to_cloud_spanner_type.register(dt.Array, TypeTranslationContext) +def trans_array(t, context): + return 'ARRAY<{}>'.format( + ibis_type_to_cloud_spanner_type(t.value_type, context) + ) + + + +@ibis_type_to_cloud_spanner_type.register(dt.Date, TypeTranslationContext) +def trans_date(t, context): + return 'DATE' + + +@ibis_type_to_cloud_spanner_type.register(dt.Timestamp, TypeTranslationContext) +def trans_timestamp(t, context): + return 'TIMESTAMP' + + +@ibis_type_to_cloud_spanner_type.register(dt.DataType, TypeTranslationContext) +def trans_type(t, context): + return str(t).upper() + +@ibis_type_to_cloud_spanner_type.register( + dt.UInt64, TypeTranslationContext +) +def trans_lossy_integer(t, context): + raise TypeError( + 'Conversion from uint64 to Cloud Spanner integer type (int64) is lossy' + ) + + + +@ibis_type_to_cloud_spanner_type.register(dt.Decimal, TypeTranslationContext) +def trans_numeric(t, context): + if (t.precision, t.scale) != (38, 9): + raise TypeError( + 'Cloud Spanner only supports decimal types with precision of 38 and ' + 'scale of 9' + ) + return 'NUMERIC' + + + + diff --git a/third_party/ibis/ibis_cloud_spanner/table.py b/third_party/ibis/ibis_cloud_spanner/table.py new file mode 100644 index 000000000..580886a55 --- /dev/null +++ b/third_party/ibis/ibis_cloud_spanner/table.py @@ -0,0 +1,353 @@ +# Copyright 2021 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from google.api_core.page_iterator import HTTPIterator +from third_party.ibis.ibis_cloud_spanner import dataset as dataset_class + +import copy +import datetime +import functools +import logging +import operator +import pytz +import warnings +import six +import google.api_core.exceptions +import google.cloud._helpers + +try: + import pandas +except ImportError: # pragma: NO COVER + pandas = None + +try: + import pyarrow +except ImportError: # pragma: NO COVER + pyarrow = None + + + +code_to_spanner_dtype_dict = { + 1 : 'bool', + 2 : 'int64', + 3 : 'float64', + 4 : 'timestamp', + 5 : 'date', + 6 : 'string', + 7 : 'binary', + 8 : 'array', + 10 : 'decimal' +} + +def _parse_3_part_id(full_id, default_instance=None, property_name="table_id"): + output_instance_id = default_instance + output_dataset_id = None + output_resource_id = None + parts = full_id.split(".") + parts = [part for part in parts if part] + if len(parts) != 2 and len(parts) != 3: + raise ValueError( + "{property_name} must be a fully-qualified ID in " + 'standard SQL format, e.g., "instance.dataset.{property_name}", ' + "got {}".format(full_id, property_name=property_name) + ) + if len(parts) == 2 and not default_instance: + raise ValueError( + "When default_instance is not set, {property_name} must be a " + "fully-qualified ID in standard SQL format, " + 'e.g., "instance.dataset_id.{property_name}", got {}'.format( + full_id, property_name=property_name + ) + ) + if len(parts) == 2: + output_dataset_id, output_resource_id = parts + else: + output_instance_id, output_dataset_id, output_resource_id = parts + return output_instance_id, output_dataset_id, output_resource_id + + +class TableReference(object): + """TableReferences are pointers to tables. + + Args: + dataset_ref (cloud_spanner.dataset.DatasetReference): + A pointer to the dataset + table_id (str): The ID of the table + """ + + def __init__(self, dataset_ref, table_id): + self._instance = dataset_ref.instance_id + self._dataset_id = dataset_ref.dataset_id + self._table_id = table_id + + @property + def instance(self): + """str: instance bound to the table""" + return self._instance + + @property + def dataset_id(self): + """str: ID of dataset containing the table.""" + return self._dataset_id + + @property + def table_id(self): + """str: The table ID.""" + return self._table_id + + + # we use this in Tableclass.py so keep it + @classmethod + def from_string(cls, table_id, default_instance=None): + """Construct a table reference from table ID string. + Args: + table_id (str): + A table ID in standard SQL format. If ``instance`` + is not specified, this must included a instance ID, dataset + ID, and table ID, each separated by ``.``. + default_instance (Optional[str]): + The instance ID to use when ``table_id`` does not + include a instance ID. + Returns: + TableReference: Table reference parsed from ``table_id``. + Examples: + >>> TableReference.from_string('my-instance.mydataset.mytable') + TableRef...(DatasetRef...('my-instance', 'mydataset'), 'mytable') + Raises: + ValueError: + If ``table_id`` is not a fully-qualified table ID in + standard SQL format. + """ + + + + ( + output_instance_id, + output_dataset_id, + output_table_id, + ) = _parse_3_part_id( + table_id, default_instance=default_instance, property_name="table_id" + ) + + return cls( + dataset_class.DatasetReference(output_instance_id, output_dataset_id), output_table_id + ) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a table reference given its API representation + Args: + resource (Dict[str, object]): + Table reference representation returned from the API + Returns: + cloud_spanner.table.TableReference: + Table reference parsed from ``resource``. + """ + + + instance = resource["instanceId"] + dataset_id = resource["datasetId"] + table_id = resource["tableId"] + return cls(dataset_class.DatasetReference(instance, dataset_id), table_id) + + def to_api_repr(self): + """Construct the API resource representation of this table reference. + Returns: + Dict[str, object]: Table reference represented as an API resource + """ + return { + "instanceId": self._instance, + "datasetId": self._dataset_id, + "tableId": self._table_id, + } + + + + def __eq__(self, other): + if not isinstance(other, TableReference): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self._key()) + + def __repr__(self): + + dataset_ref = dataset_class.DatasetReference(self._instance, self._dataset_id) + return "TableReference({}, '{}')".format(repr(dataset_ref), self._table_id) + + + +def _table_arg_to_table_ref(value, default_instance=None): + """Helper to convert a string or Table to TableReference. + This function keeps TableReference and other kinds of objects unchanged. + """ + if isinstance(value, six.string_types): + value = TableReference.from_string(value, default_instance=default_instance) + + return value + +class Table(object): + """Tables represent a set of rows whose values correspond to a schema. + + Args: + table_ref (Union[cloud_spanner.table.TableReference, str]): + A pointer to a table. If ``table_ref`` is a string, it must + included a instance ID, dataset ID, and table ID, each separated + by ``.``. + schema (Optional): + The table's schema. If any item is a mapping, its content must be + compatible with + """ + + + def __init__(self, table_ref, schema=None): + table_ref = _table_arg_to_table_ref(table_ref) + self._properties = {"tableReference": table_ref.to_api_repr(), "labels": {}} + # Let the @property do validation. + if schema is not None: + self.schema = schema + + @property + def instance(self): + """str: instance bound to the table.""" + return self._properties["tableReference"]["instanceId"] + + @property + def dataset_id(self): + """str: ID of dataset containing the table.""" + return self._properties["tableReference"]["datasetId"] + + @property + def table_id(self): + """str: ID of the table.""" + return self._properties["tableReference"]["tableId"] + + + @property + def path(self): + """str: URL path for the table's APIs.""" + return "/instances/%s/datasets/%s/tables/%s" % ( + self.instance, + self.dataset_id, + self.table_id, + ) + + + + @property + def schema(self): + + records_list=[] + from google.cloud import spanner + spanner_client = spanner.Client() + instance_id = self.instance + instance = spanner_client.instance(instance_id) + db_id = self.dataset_id + database = instance.database(db_id) + with database.snapshot() as snapshot: + query="select * from {} limit 1".format(self.table_id) + results=snapshot.execute_sql(query) + for row in results: + records_list.append(row) + + schema_list=list(results.fields) + + final=[] + + for item in schema_list: + field_name = item.name + if(item.type_.code == 8): + field_type = 'array<{}>'.format(code_to_spanner_dtype_dict[item.type.array_element_type.code]) + else : + field_type = code_to_spanner_dtype_dict[item.type_.code] + final_item = ( field_name , field_type ) + + final.append(final_item) + + + return final + + @schema.setter + def schema(self, value): + if value is None: + self._properties["schema"] = None + else: + value = _to_schema_fields(value) + self._properties["schema"] = {"fields": _build_schema_resource(value)} + + + @classmethod + def from_string(cls, full_table_id): + """Construct a table from fully-qualified table ID. + Args: + full_table_id (str): + A fully-qualified table ID in standard SQL format. Must + included a instance ID, dataset ID, and table ID, each + separated by ``.``. + Returns: + Table: Table parsed from ``full_table_id``. + + """ + return cls(TableReference.from_string(full_table_id)) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a table given its API representation + Args: + resource (Dict[str, object]): + Table resource representation from the API + Returns: + cloud_spanner.table.Table: Table parsed from ``resource``. + + """ + + + if ( + "tableReference" not in resource + or "tableId" not in resource["tableReference"] + ): + raise KeyError( + "Resource lacks required identity information:" + '["tableReference"]["tableId"]' + ) + instance_id = resource["tableReference"]["instanceId"] + table_id = resource["tableReference"]["tableId"] + dataset_id = resource["tableReference"]["datasetId"] + dataset_ref = dataset_class.DatasetReference(instance_id, dataset_id) + + table = cls(dataset_ref.table(table_id)) + table._properties = resource + + return table + + def to_api_repr(self): + """Constructs the API resource of this table + Returns: + Dict[str, object]: Table represented as an API resource + """ + return copy.deepcopy(self._properties) + + + + def __repr__(self): + d_ref = dataset_class.DatasetReference(self.instance, self.dataset_id) + return "Table({})".format(TableReference(d_ref, self.table_id)) + + + diff --git a/third_party/ibis/ibis_cloud_spanner/to_pandas.py b/third_party/ibis/ibis_cloud_spanner/to_pandas.py new file mode 100644 index 000000000..d5554224e --- /dev/null +++ b/third_party/ibis/ibis_cloud_spanner/to_pandas.py @@ -0,0 +1,82 @@ +# Copyright 2021 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pandas import DataFrame +from google.cloud import spanner + +code_to_spanner_dtype_dict = { + 1 : 'BOOL', + 2 : 'INT64', + 3 : 'FLOAT64', + 4 : 'TIMESTAMP', + 5 : 'DATE', + 6 : 'STRING', + 7 : 'BYTES', + 8 : 'ARRAY', + 10 : 'NUMERIC' +} + +class pandas_df(): + + def to_pandas(snapshot, sql, query_parameters): + + if(query_parameters): + param={} + param_type={} + for i in query_parameters: + param.update(i['params']) + param_type.update(i['param_types']) + + + data_qry=snapshot.execute_sql(sql, params=param, param_types=param_type) + + else: + data_qry=snapshot.execute_sql(sql) + + data=[] + for row in data_qry: + data.append(row) + + + columns_dict={} + + for item in data_qry.fields : + columns_dict[item.name]=code_to_spanner_dtype_dict[item.type_.code] + + #Creating list of columns to be mapped with the data + column_list=[k for k,v in columns_dict.items()] + + #Creating pandas dataframe from data and columns_list + df = DataFrame(data,columns=column_list) + + + #Mapping dictionary to map every spanner datatype to a pandas compatible datatype + mapping_dict={ + 'INT64':'int64', + 'STRING':'object', + 'BOOL':'bool', + 'BYTES':'object', + 'ARRAY':'object', + 'DATE':'datetime64[ns, UTC]', + 'FLOAT64':'float64', + 'NUMERIC':'object', + 'TIMESTAMP':'datetime64[ns, UTC]' + } + for k,v in columns_dict.items() : + try: + df[k]= df[k].astype(mapping_dict[v]) + except KeyError: + print("Spanner Datatype not present in datatype mapping dictionary") + + return df From f4279b4e3e90c0b2336ec702546aa9376fd75012 Mon Sep 17 00:00:00 2001 From: dollylipare Date: Wed, 10 Feb 2021 10:54:41 +0000 Subject: [PATCH 02/11] Added tests folder for ibis_cloud_spanner connector --- third_party/ibis/ibis_cloud_spanner.zip | Bin 0 -> 38490 bytes third_party/ibis/ibis_cloud_spanner/table.py | 7 +- .../ibis/ibis_cloud_spanner/tests/__init__.py | 1 + .../ibis/ibis_cloud_spanner/tests/conftest.py | 69 ++ .../ibis/ibis_cloud_spanner/tests/schema.sql | 93 +++ .../ibis_cloud_spanner/tests/test_client.py | 512 +++++++++++++ .../ibis_cloud_spanner/tests/test_compiler.py | 681 ++++++++++++++++++ .../tests/test_datatypes.py | 62 ++ .../ibis/ibis_cloud_spanner/to_pandas.py | 2 +- 9 files changed, 1423 insertions(+), 4 deletions(-) create mode 100644 third_party/ibis/ibis_cloud_spanner.zip create mode 100644 third_party/ibis/ibis_cloud_spanner/tests/__init__.py create mode 100644 third_party/ibis/ibis_cloud_spanner/tests/conftest.py create mode 100644 third_party/ibis/ibis_cloud_spanner/tests/schema.sql create mode 100644 third_party/ibis/ibis_cloud_spanner/tests/test_client.py create mode 100644 third_party/ibis/ibis_cloud_spanner/tests/test_compiler.py create mode 100644 third_party/ibis/ibis_cloud_spanner/tests/test_datatypes.py diff --git a/third_party/ibis/ibis_cloud_spanner.zip b/third_party/ibis/ibis_cloud_spanner.zip new file mode 100644 index 0000000000000000000000000000000000000000..49b9b3dba70e93aa2694d746a54200f52fa1d2c2 GIT binary patch literal 38490 zcmb4q1FSAQyXCuW+dA8}ZQHi3vu&Pj+qUhqZQHiZ`R<*`+}z1b{`s41pqN;gfv)b zATeFYaq7#kW}nCk2QPvCln|0gi&Kl6eF z{0A@)5B+$?Kdp{`GTXlf*4MYRvvkqdr!#i&aIvtrqh)5N`)AnyT*!ZnceIM}e=fv% z|LDjF5QriFGt3NhHUcZ*j}(sQ-ts{ z$C(ZmAzJ=!TJB-`&Y3Rx4wg)c_2x0`dHg<D#TY&oDom_4mEJFF~3v@yhPDax4Pl&tN#)I3C=lQNBJ3$O6Ho8 zG*I8t_`Wwgp2~dbd5P9ejtn`xdFcIi)glx6$#eIuyYKc+OaYCgfS(1tFo0)*n&dsA zT2iYNV=q}oU@sP-(<*-;G?w-VGznFzhpW*`$`?B0spR&Cl)2JO!m+#NI=lBB<|Pf< zVKUJzE>BNO=i4@S%IAj9KcsS78>DSjUroum+kTMe+f});hiso++coP$RyXf#T|(O0 zy1U!zC#~1Eu)DV|UmVk>gl!@Nzy!c%0WSW?{y=~ukDFy_vnRf!J_Q8jMG{nHrw~tQ zRTHoZ2GfQ`SzOOU$_3j8<%(s)3la}cu~!7XakRIJlUU}40C=nR6v`Ef1qwYO_Jjc$ zS}_wQY`V*Q*z8?cbEy)gqv(W-{N#LVPUj%!?%4bJH=~jXiKWd^sg+;Mkv_FAaXdpj zwWjY8#OfH%)T+!EBHVMS3T9H3O@W~pUXU3sq~(?n5gtcfyc>?Qp0f8=eC&GDmsK6XHPuxKQ-aS7dV*dSudR{u_vKGG#|Vsp37+V;aFQ@bzp7o`) z?`E02x-+S8pUMxW3TQ+UM%RMzp}kh6QS8#%;){bU7bzk}W%s3$HymTZ%wz=ktwxQU zIimV5ytbsjQhrdmbt|{dB$#<#I-g7IxNJTmd=&vkU<#02OFV5$;+c_zN$`7{)qwdW zyDboFeIeQdQ(Ro`hO%|AU#_VI58DMB*4uVKUi$t7&JoH4ABUBr_Tb~?Fwmo&8=`_? zAe3b`EE3d}%KJhk(p%SdTtF1Kpo-KyR%haZPRj!F{siHBU;&Dz1>ZKo0dM*|7cx?1 z{k)*})`m&TM$^dXC9M2ab8DY{(Z>9jD`hvuzNj7EI<=5KsKRUolhf(!Qt64XAop5_#%5lbo}df~SfzIZO&1ck=!N^5*jyedciu9xGeeE`H6gS!I~`FzR=K4S53>3XztBy z&)m|<_E7B2$>e}j4qtV^zaZ_)pSFG|E+S|YDsYG4j1tVS`B79bsF8V8O$%B$G}c#t z)^Z*3|A+qnmpI)P)3{Xpb1MA9Er9V~i<6;)<-Z|I(JH<62W+Thq?&FvPGeD3g@hw zuW?SKlbH#IIn;1SdB^O{i z4m}${EP*Y^5Qt4Di-6eAKmG+cBT%YARiZD=( zRTMr^=?qigwA~^iZX+tNFV)7r6y$xMmoll~U{&ipG*sq7quTC@3V zvm%G0p8plW|E14+Uf1TM>bFtCi($9nLX-mC(m)U(YdKW0JAqC>`^SjV<4{Rdwt}jv zdkJD{pN87$BSg~Ui?(`TUDegz=N!=~b{uYztbB#s#a7J$n#wex4c%d&+FM)BeG^Zq zWl|-Zz9F!h)DMdCNt{rpH+JjD^%l~7Aa8;9@)?Qm_8aKGi$aUMb%rAv0071a0I>XP zQ82c#G_`a2H*{f;OWPht-P8HtmN+uOR5RXmBxhC5=3VG5VPe>|E8djTak&IHA|_jM zo^gmdK@x$UkJ*JLRA#Puqv#aea?WTD_)HCWovuT4uE3sl_T<~YXG)sjeO~mVg97m} zf9EATeJ`#+$%_CRipP06-TC7>m05jwgc%rxGdq+R6$S>8%kMgw}j{hYWRnjPd zk};h!#%BCh1+Ta!qaw;Dqi_kute_^dgJ$^G1*K?0vFG}vg8TqhkSEvgOxBUJbY}A1 za_+<@-eY{vRF{%-uc>YodCr9=pW_~b=_pR&8I1GBrh8v!y5G)uzHy#Wty=xNh$a4iFS za&_$;B|o*T_v|xzZ|j+nW_hoCq3=Pex&Pvcaja=X?8{1CNQC&|8Ng{xmm}^e?jDy>0Sxi zez_zNtnJq;*v1jOk?n-+7GG67*M;}AU%L^$VSHwKssZh;Y`Uhth z@5Q_51^*&nqdfls>GkVW?AUDV71pXNWgM!Vdss`J6ZIDg)Lu(wg}2fspGJDmBPpDX?Nc>s)Ovb% z$`iES`fi^h<#5afBJ*eyf~=}Go+}ww;6a|pR%m*^lRqBKx>S#AYpdx}Pw076U#|P^f3-}pe?6Xmu%n-y{sslN*Tt!X z!W0{kI*79F(kTV68Xm71T6V0q`uj8<9XaI;D5`bduznB|g&Nci(#Q7wLvgR3x0-c6 zuH9F#aX%s@e>Ub`6q+xYb4htI9t6uzFa5d`62aSC>P^A=*g1P%u3KE`cI1ktP>br* z4$vmwOi_ONoP;nfwWW95KJ>NgfB5ZMeHG!Rcc5HX%CXKdU>Y*=M7~lHqiky+$W|XgAYXa$ut+hvr~A!{2}tdz^i~?rpu$xnrY4(ZzV9#`_^tspFxrUAN_N zze@U=xUQxt!A5oL1vm7F&ijYv;a?(AVR2k1dNf z8#xlYe_$vC?;8nj{k|{CIb8$a`KrgO*>}l_W(alXfK+6L?wP zb`?KR*FT#C%PaFvX<&x*fjGmd#BL4=K4c+db_~PCkse z`%u9yQT`-wC-=(X{tFse{RRF*Jo*3ha_WACuV4!m4!dJjwPX{h_AKHGL8m2xK8B<$ zpZkIC^>==OKiA@tFM?3+k41b(@DWrLP(UT3sM)jBE2!j&&mHAYVRyz4m!N$Uxh~Hm zK?sPE3GnSAdMr%c9(PH$#e~3ix~d7TTui8&#)RDLNHuK|J1|&`4SeN%q+YCbJ<0(m z$J|Nj&{EY)^@5tachY@|&`I|4WS?6~e~+G*b`x%}7@B?yNlp$ypa(*?r%$0vrqgHZ zJ7vwt*=y0RCVTN8sf07`V{n^};4JViq0Ia1B9uF!Sb9kA9UblpI#LhGe=b8Pu7Z470mpo8rw>Z zUx(9V!Q+Re2|AV~{|3dCnVZM^DU*9QX|p-|Zj3P3ro|G~8w!(E+!!E{g7+r?fnm~@ zbCG1@Cn<@ilEMl8n>;xyE5IE561WAGv-sYcqKl~ZEMSXNPdjKp*e)@k z!;!BLzu!7X4`d?}{z83R4Y?h8yTqRzbstW(smp~{8K;u?qzo2!v}TxPH&z5=kUN*+ z{rqa47&2wwm7^lIYd(xKmvsGZrT#Yvk!)&(E6$=`_t4JZ^dFIR^oJ3_d#y|ZSX%@! z$S(2$SBZoNF8w|(Pn7Rqj2h-B^PZ-Kv;glg$e80V%=#n}{UL=C$bPPZM>E{wh&i_! zYLr0KxqAWT%dm6kI=CPILAuFwlZ!;D5X1+ej1<1sFp%Mjl(!rRA2M7?K^{)Rho# z&#X%8KSX`GHs|H9*q4Gn#+Gle!tLAlXoqm(L;h$a2aJVC^;zmDm)c&?E9ng_e#A*Jh z5G3ND6^Te-8&#W;2#9z8&L>EB_q^8`hu);D%%Yq*kFc>ei!nk4%*o1cmrsAgjB@5q zM2=U)aHR$s2{+V8|Hgji4@sCwbJJ84(9nX0O?fen&d6^+*zB3*$>8ctNp8~;+gsx) z8t%UWa6HLVjwuFQ6hXMub*R$9Gr_!LOg2&TL0)+|rUxpyMi0MMIoW`uOz1OBybQKk zGL5|U+GAxKeQvW1nJ>Y9_W+p9*L*WLC#ID;RM=h`os%$=r#Rhm67m~7{B`p{%WUvX zY8whKF&FhrYU@+is1>*eV;WKFwhdFv$@eVllrLF$^ofsLL2ti1_|Zl>C5~L>Xjw$m zn7-{&QIGN}{@A>E6`d>m`0pzzAhxJ9oz5yL61K2J3FRmITO_kAjLh1TDo*v&UzrBB z8xylHz1<9%{VhHnr@-N!!TlvJR^%d96P{TKrlMQIkI zxf*NmPA>d#v(<^Z0sN?AbAIIY0=vhEDQma5PB(z+(6U;KAd4hKIIesOtiX>E8X1V6 zRQjmAcTTJ+`LQdM^mEXtaXAurpDr~->H|U?OuQRA1b-eoG<_DU+BRj7NL6-j7$Jnl zNEi0b^e5hwj*-M2K8jGTh~Z#Om-i4&2HDNj9Xh#$`)G_9VvlGAFw-Q*1lA^6ksqJb z5~1uVg|I~V8lD8L@+Uj76(_BhrK_Yr?)k~;Fa9K)NJU}dYnA$I6@(N*eZP4f2d2^J z%1UOJ-ZP{*P~afPK%)&@bl) z_Cf-pNa3U78VdWV+k&%vt$nF^YR`)%uLi~I$nUV7KPA` ze|Z|LOvr5nl53!MWNkjiVd=`cYM7?!3d~Wwcdtn?l;DqX?1{MIN+cQWcsxN;viN)} zKijyPZqOJB>+&q{7 zb&oVFR4PB>rnKEM)J!<;B<$qipoSn{aHM0k>y0NTiz}xfbEJ7z=tJIMTOtIb2;#s@ zVh2`(7s+;K8+PKMQn=HKaJ98KwLJE4zR~+Y=$-r)GHTlx3{f?U5^2UGSC>J{&=CaC ztFc=Ken<`6?3$bgpjSWds!E+?KF)B-4cg+${E1MX{!U zbXZX{rZ7R>rs9i>Bee-T1|t^z6PGV+hTK}tC5V>X81PIU%kx3I9rP^AlC(RDc7z?5&~!5QJYIz4jWqSV?wNE1KiLn#L8uCQ0FY18 z&vjJ|-Ij(H4OVdoHG4|c{=gLJ!TR{U?1AUq`u+G+$o$h$L379J_54%_Fq|eUUpE5u zwnlfe0u5cYMrCoxe8OFdqqpPR@zh1}{QS}Zc(F40zH(D^5(PsMp(4XZqT-E_rvZ*) z+_9EAIuvle9~wMaQH(wUz&zuZd|)l5BH=?W{QR*og;u`UZ2#6-)=o54QxrE7OdiZ# zHneL(t`p8tidXO1o3Y@il;9l!Nv~;e+-DPiT6KhXPb~oA%!uF{uII);P;NFi_ypdN za1Q%Vc2$%C?l1JeUR-c8VdxAvb(sZ}yEEk^K1r*@+#E3?wqIBrbh|C;UQn|;*d9<^ zcd0Soz{>tNC7qJHoiw+)*f%PkBmUEUa2~-d=~#J^ zq2KXTh`(|11*2a-!*R>omONVJV{qd^7LuKI{5>(-Ds*i#>8gZ`erFJh$wIu)wig#N zAu1&zhZEs!{M~n^blLO$V}Q=qzrxTLI@gK4yE6=YllGdJowlbhc*pJZ8wd~f7myWsais<{##rb3E8SbxCKg|35>x(`ozlTLWhJ9sRE}hSy{c2PA4?CA|?15@GgYhU*-= z#ma~u*eF5!aCJz5Si;vYwN`g1#O;LCMF@n2{(kMmoQ}Dyar(R0H;zcETll++0=q!1 zU}SD@(isnC690j|zK@6Y2Mqo7EsdY-69LJTSCn#_JC403MR3N;Hz6W&)ELFtJ9I&r zHb!61BlC+;tL&J*?$w9JH*jc0<1+yDHPajCPj4J>v-G>5!nM7Sl4DoH5d!!udmNFy zK)k?Yb}GY9RlwVTpYGUX$vhTorD?McS#hz->zO8P;`HT&j0w}Nw6K2QF-eb%5aQ9t zsogX|B?}&C^SVu&4pD|@X0?@Z5DAHc34tG1jWvwm_pJP&O6{Ma8Os)hS{L1O^0sb6 z{$sz$r*?yQr%Ef;j7@MM^~;o13?FH~hHbYltBHEe+cC=47Ms(xT$~wt z{eR7K41dF#yenAB7|0*`k2p}WoOtu>kLnF>CeCz;o9K?M&$qHBVLy%h#le!Ih^P6s z&ooO6WPlYhTo=I<(h{B}Y!XMppaaF|xPyfj%7@{-${J2*FK>&}JR&VRw70nDYurn+ zVZwI;0w8cr059vkTK5cfh5+q`^c3Ou8D67xoEOj8x53&3 zd&vYRD`%Acdr`Q#d-<}K0Q~Chi7g@%ac!x?@!d?uWI5uLp22JpsxiO+!;t1g+l3;> z1purC{=ckwV|!Z%OB++Ce)VuQNuFrQHPvKkzAls5 zle5s2-m!4T_%*j|YyR%Dv!P2PFe}MoE4p{(MX-JCyYq2|LZ=H+y+w^7jQTJN+kcc7wJQ2;W~sGvVCumt}f;9t;jivGA}AU3yPq+aYB1EIT~2$QShp@zc@lN`Xz z*QOh*Zm-9Djj>l6s*Nw1FSWb|d=~H+1c5VuFSu|Fx%CxxGsU~?@rMw+ok2E4cX)=~ z8gcfpVjFoc$=dKu*&DTY-$DP>YwvkRZ4KLnSqE^tC@L#*mL z$TkFBBI=N@QgFkrpj_IyA?V0AkX-7u$u)eeemVFg@b=*cUr;_HdSTWmUm`tIct!C} z$25>{Lz%*NF5){rc7~oh4xgYiRV{m$O~wLoHl}XL%!sc;8?hzm4A>jW0;+qY_n8Nt8(y)@}L-~>5 z0e9`~p=#=CmRQug;|)bf0FVgMB_5?_IZ^f3F;j=U7YytIk278VH5vwaZ|JXMN*m}6 zy4^Xj4RzyF+K#dPu87(DhZo`CWQyDHlhJc#WG}|O@nse-mvEFEs9{`>aI_Ti-qOD- zwL6xu7UdAmziYGx>5%!7^qI+v|DzAb^SLp2=wx?8eYNuO3R`d)=w9#{TaZsaE*EGt zY&8t(9xTusiIBjMfOItVl$IanbMPWH`$9eIVy1sGwm$Yi<}LNh!xMPYJl7z%i2M^+ za2NiW^4T)bJDKp7=9$YYh)|ydlJAQ7vJv!6RHJaFo>ileb$5xrpUP+J)AKnsJwt8( z!oGlCVn`LK;;7yBNP0FpO8ZG`n~o*}SFBY7qsntyuO^+dGlM#*0xl(}mKrS)1+^p? zX31%as{heHTrS1>O^MGoWipV~ZCsJeFW;m@)~jHA!efK2J^Zk33>$2tLQUkS%XP@4 zou~EE6v;y6-n)I*UA^O?#?F6OrGZMh;h(@mvZY8~zYF0IV&^bod()sSu|t<(a@rV7 z#RJ9CTh!hGtc4AjovI^HDEl^?LZMyCac{w`VA_Y;-mOuy4Zhiuk|n2lgTc_`-_*(R z;=iO91AFTY-5S-K-0!=49j2LKR&N7JzWr4MEf}=ZPHi0%VUCpLXF4N)uivAWQ-%a; znTl1Q~6AgfxopT<^>(30~Tv>$=(H3uc>8@x{(eus178GlHDU$I?}7 zyqU7oWui?pe4MhDNWqS?A+~pF|mgO=&qS|v> z>lqy}RE6R(?gJoWalpUM(Y^ju1sEGVGTdxzqHA@tc<3`i~n>G*37Kh8$Ix8h7A2T&w^rEjD@fR7C z)Tz#2Q9ZWLojx2;V=ZSPTyk4m4=!nsXvn-Q(3b|`{Yaa-V#bR4JrXFP2B_s+s&834Af?BwikjAViBTU2 znbjAA?evwigVsOm#gF_TnU?(vPu7y^>g@(}1P@Ih9c7)&| zQGn|@!J&}^4z{WaPD*PrfKZ?fvi)yI2VHX~XHr_WY}Q0TMKr0y*@5hp0qiWVP#Z?2 zn&|0~w=DAE97LAo!a~RyARJ|e{zM=ZMnFnCr(czKF50$l=ox&VrEHZDQ52402}s#k z)l1b;=%&-J~|HLp?LSucZ9NIWm*_PjAn9PO#ml@ zngp^MFl`W}w9h9S&gFK+JAN}--W?LAHHX=I2K>EIlR^C%w3D!tT~UU6;JE|0TKPI- zr4DtvDGE#YkpEkK$5i#WkPSYA&9cZe9Miz z6MX1hc&CkTKkDAlCzFAi26BI-WkUibaB$fyP<*sBW&J1-`pvK__AfewAfq_~x^ofs zUS6=o`3PSWV}LgGdF8rW2s*ThbLnA+0wZ~ZhHE8oxC(TM@nmIl2c)cE*7O6ZWauGh ziJ*lW_0}l-36FTZb4l0w@pt%mX-E4F)Nxtv2euPIayi&(4Q8I6Ek4c#wuGoaa!VGI)rZ|YHpBiH*p{n!$z6SBkM0j4?T@fjSU{yKu1iq zx2W8jxZIk~FPAf=3f^&Gyd*IH1_NpeN*qTDXWn~x0gWz71ZAJ846t}|B=+_bx@6< zA)N)=+nOQ3eb}Wbz@zcHD6X?8aeVTyZ&kKL?$Yo0eB8q*R9GyNh!-+&HsAK(u|qSF znZ42c^r0vJH{~E9@?+C{vXO^X(};XrM*$J1f6um4Dqipzn}D~C8BaPlFLV010^zd^ z7i0V+C;XM&t+^OGTyXm~tvt@+jv$vly~Z;8hK4xO1@o58Z54e9X<8T>(YXEsz+J;F z6;D**Ps((Yde#tu>5d=TKFX}Wu(BlVd`Ut%N_rwPCWM*_^dApBh$}#T{wf&- zQ64R?cN5OMtD^h!mBiulK*84)TLg*3KIc&CbeS)5ScC{HIWBO&^aRHgfT*sWK?+@G zG#`}iNE!}h;9f7G=Whg-)cg(y(YiYtz52mPG9B|F5o1l452mD`NLp-yAaq{Wth+&! zT3{KfK~`W5GN?3~(&o34Yky;hnERw849+>0EjGrzYa+kY`k$?-AXyTKo7bHEHub?EVCAB~6RKIxbP+JSu0-6b}MDQr#)(l~P;WQxVfLg-wqD=|vebzlq4zd;PSL&Dm~~E_ z7V)(@WWAH%0N zY>rb}rl(~{n&7w5Z(ribnE)@~MO*tc(&5mr!V6LKQGuh=F z%Y+m5o!Qryb!0kEq!M3_bI8vIp-{Xa6QMeOGq7oQvZl^n9=uI(&KFz*4-7Hf7P8Z|r{ zRW42Bi#jxemh6g^8743{kQ2|QglP+YOsm(LiGa)=ff?Cz5v^4{zO~9dL$84tcYBnk z8lf0ZcC2EV0TQv;zPS)l-u~-o-KU3w$6_k8n5zzKVnj%qG#Gw@&TUGyt8`|OLAbdh z`5KXT)|4!yN28ktI2A*-hv+5xkmvKey1G|5uQTYDaj|TgK0Y*8n#@e}w7h-LhK!Rs zih1mi5-Baz;;CqM1m%SAQf&t6i84P2>AXo{M6|bxSV0jSLRmR+n($Pwlw8?4y@)|> zdU9$2qzPO>s|E(`J2j+Zbx)wX0cHIRBN=1KV~Uao=!XkN-j1oo7UW^fIDQCJtOj#s zHd3k+o3PV!l5Lq`6=b6kU8__Gq+VYeMlWa(kEVvs_M#qMY*=Hd>O+&D(F3FqSEi$e zt3WY!ZAk;9i79k{9=D?-VPbN+c$jAa_nK)+C`={S8FyJ%T9&Gr=oSNFWadPiSnYyQ3McA{Ik-X;6$41bkNh!j6q>Bnc5uiDueH{98D{K$$zJAs1-EH0eH zcc4gIg)m1f?_&v3*h#+{qK*?f zDz930ZfzzZ5cSmPQgTifdFC6CO$Ud3#n#G#BE*sL85`rShh;)={_|{lGVwE{F@77- z+W|X-9wk!^*OR-`Nd`5_Du`DVoBh+$V?}d|iwg#`>$jskBHtcLEZZW~#%Jut4N5%i zXX3AeQ=*W7X@y4*Iwj1BxUooqcCC7hkm>u00gWoQHprA}DG(kl1@s6}zcngB067E^ zxvUq;kqqd3X(<`Ay;W1vtt?YK5>h(HAu_pK`0k;Im7SH;<5(52pyFDxVD)M|hmWV* zG0otmbGgj)RvWUI=e>y3=>qeCQJ*kJDki}RJ;@rqp)*`2M!g3?PtVoI9wQ10ag@jc z?aV=zyU_!kjar@M9<3Af;HJ^V>gJ^lGQ%zYzGw#?4k86FI%v#^xPT}jSO7o_XsC}t z?4~9Yw<0mbNiLGC?<7%TkH8wZG5Y(1X{6a$w^nq zyHo>ayNlhU+dRv8FEd)K+l{)NH8Xf6P+A^MP_b)gS9oH}gznok`9jqM?tW_r1N725 z)74`*ka_IPn!@LkoYlJGdad9?gESNvbVfc>(~H=)t<(nGK@Y+euq1nx@_qp~E5!Pa zh9V~yXCTc{=?ql$5qgW@mT@MOggE!^kupoat@wH#7 zy&lfEil=-xni2x>KP^t(uJ^WyVRvTKMnmZyE{us9cG!&Ehnebupve@iMhrA9*!SHTy6B38i^ z&KXJuXdRA+_#z*3hxEWWL7w6EJx;u!Poq?vPX;G1U5FDOy6y#`7xw>o78AwV>gIT83`}c3oz4T)uWRN+mYgC@!ORYBn2~DC=>|%guUVaj2;nm18bj|+g-smA z&DZqOIFU>^vT;0{o~RQ(>b=&TUwUc(^t#V?P&t~u6+;p7kH>5%C{fspL^Jtg$UedY z#9jS3TC)xYqdpNNH_UpCMkCY^zUP$R>Fs!MHfY1O_H6o{VC8InL(k3(xdk)fmpj?e zd5cML$L|mwf}q#Ro@KatpwCCcytd?acuz|ovmBbm!6n3q@lnCufXP4=CR#=e9iZG`v&695`9ur!xMI$or$Qh z7s1f(iQUd_nD^T@u4HpaY>CY|Zx?MNqgCeHE9dGLmKDCQHnAA{=FUOgK(1S);|9zZ zBg>BSm6S%YfEs!Lq zJLC#?1-@O;oC@Oa3PkEH2h+yHQ;+ z;{h7gNXeEG!P_@`eSbCeN5F@G1va#*_68etH}HrsxY6$KW-X?N9hN7yupe_TTD8<64+5{Ydx;70|oaNV@FaG$kI|ztR`yw+NxnDopHohV}lpxjo zDNTrRp(sDi5Vn5uC|Sz<8ZraWD0=mmpz_x;)m#Hg_LcZAtZa7lf_8Z~N}IC-yf#dW z(^C}ir<iFr>vQi_tZte-|bI8l&CT6|m(w3!tE^Q@>^)-1$O{&Ma9h7&Q49AMs z*RK_&dT;{XZVcLil$Yfu8!mlp--9&QbYsE%rE}rr09eCaM(c<>Es;?LQ_Z zM{+Zyi-#+m&#=h|RW(sa z%Ctz;Yyt5X9^<2|Et+Ht>x?G+!SN(UXjefB^umMZnJ9{7S@lt?ne1_axik}ExFYp_ zbKn5AA){HN`;#NNtJ>4N+pCF_h3&z`s6{;M6JzzWxYe7@62=zowPGck=aj~H!E&xNY$vPWA^qUy)XFHSoE9KGWny5>!(?>UKuup5X3vzLR)0EthEN`fYvwa=LMrhCF7R%(oc^)W5eh2hhJw!=}cv$<2$us zE#7U?{xiSHIjj2s@Yt3-vg>vL<)u%!LGArzZ(-K#CJ=k@T6^mu(u3fvgA-W9RjOp0 zH&3yW!bwbtF!t%6$5BWbIv$;18^CEi9jnbD#%H242zgx|4SaWGWxCj<#9*zm8q5(X zKs+C+h@@vzh;!@xK{2u6@#x`-_4eF6();J2`YN5Y(&!jN2FZz=%VqHSIoM57Rb<^~ zy$nrzec|*>0KNOyj<4jGk>%n>#FF8lVo_a+dQF6IPi}tH{9`n~@ZDUX&dl?KhYGa| zbQoXw3tf!eALIT|V?6%@eAtm$8CaK33DIveB7xqvIDXaecfZ4b+^f8Y50~Suw|UMs zxn>EjNt&6#P*Xva>w<0@9!o-@#CudmJUsM0M?$ioe#o4|0_7T)-$^l+^Yua=kI*x% zbBqjJIHC}-r*-hzKKTnoP8aW-HV)F7c7D$%4qjQ$3+G0o7^P#p&Gr+VDsl17fzjC8 zf}Pse-(cBXBl`$Hc){P`TS-Z!OVfLFVvATZ3ua=2SkWh`-InBz*2aC=WbPcBhFaIQ zuwFYYFWlhsDwS!;c%jcJz= zggo=X8}H_K6j~YZXpt0fxGY)mFXt#->t&quFmltSgwd$On8#d-Tzr5tgs-Hk9W2Qz z3x6mfucRRx~r|s$;3QCnoPIu0X(;*7?Lj3$M&&V!HUAr_ya3n?{xD zeiIxx3B-|Imaz^(C*dXJm=DqO(J;ipFGOivkd}VA6?*7CST`r&K5b5k$xTp;QAgJ3 z8akDh37Z)Q%b#7Cx7UM2Kj4KZ;^uc}U??Jx0-$7IC=d{Rj2F%<>lPhIz(?x@^adq? zGGUy~AxH0h|A^s9s2;Z(i_z9ke41iCz zadwG1M^^0lul#jZ^879Rn;X0wiKGBKW7EX-o5fpu_QUx@qac~cHttwbVldA6v<&t! zl;tz3jO5&*PvJ;k1Ho#p?-nq*7$3OTciXZTkUy(H@EG(reosOr%-Z6&!|&3fpHRRg z)ZEA`!GHgC5|X)aXNm*>s8Rs{%>Vk=iHo6;jp@JRNoq~l*)(K!!mM87Ew~3Ig^k z>@ZE*AFQEN)M?!>?*;i+Sf79#CmkZ;j`~uqM;r^fBuCcy`Sp24ZO_P!*JD>}U7qyq zN{5ymHLc{kJLOB5`*uj}P8?~3gos;ljmRXVG$4g>nxLie%7`nt)DcS?>KM_-tvk;q{9|7Wt z!%3KktbB}|%41Qdh%C~R-D%P`=Nuh>Zj9IL36T#KE4^=QkHw@yQLmAgO+f+L(v24l zK&lmioxNWdmVy}se$WGQJ;+9Bzgp4N?;LCNEWO(;_sRL1i?!8qP`T9ifUT=cY}jJO zIfS%@4dy@Gax-SajRwqS2my^v2QId)ns3-5#6Hyqg+_&j%6%o_vQU{!Y!riKxU1ZB zv*KJQTy>@FOlIL0d*J?L{CNABlN&)cA~NS{@F`f#-FtGU8Verlq;8 z=sO!%`_;E-v9tK0I&UxS^C?ydLjjvLU&=$#^Y@7tAkxRtx0n3b!YPoE-`2s)`k`@ILg=I!GL ze;zG%om%70?~Z z_#tNR5q35ozaEQ`@9Qw9{-VAHuL`TFc}0u4hLg8zBfnY$OItZnUz_>4tM4}(c!+LV z8|(X6kk9%*H}Pf&x;Q*}6ONv_Aaa8m)`On}C6mSaK`X`nn@%~za=I(g_mdXxJa?=I z4HeV{HlX-8@HA2@nVSGzfe(#ma6!K~-JyKxyXGixW~|||8@Mz~H37-@bf(9ao*SCVI(F`SJ?B6QZRv!S0wW)!ry#p`zUgX!S-`8_s6)wLEC6AzGU=ZppvJ{fWX;e6U{2xb zfqJ>=_y-4_Ul^mJ9Bkn+br?&niW4^ry5khLR*y4@Cp|#NCXc-S9OK?3x3G3-zS1jj zubo5BpK#lMdxP7fuCXgoT9T~J@Em1M?9JpK-V%ymr9N2FnedVXa_gbf);Aei3}MUi zS|`LGI=yY6qpe_>aIxu~EqDOo^#B8SRE`Rw7C_QJ@4YHD!z(@?H8Y z2?CHB;7C|c@a$(vpl~=tWYcv|z0@CWrBQ53RQh8Lc`8rkjgeG}Wa@(k_=?WZ#1G=N9wF?fEERJoKrBZqlgdo^|AZ&>=+DCiB zmaf`CK?nwC-CUk#H)F*q1H9%W;^Lw}evCa|2JnGTMUcc}^N5j_f`6hUEw|S%5kG>C zp6y;`*I$OXKO`Q`s63V+le#ubfdk?=fvP+u7o} zO^XI=;b_w*8?EiA49}8GN`d^G$DYThwG6AF04-{*75C~`G&`=tv6_cPGJqx*;jnaD zZ5^G>77jI=#&c#tB$x<)?d7YS`KTy5Y17#c_~`ROv!Meft^+Fq3HoFyQ)edXS3+Rm zK%tv>Qii%Xfxdu&13r0piU@&%!P#xhct%MBh$MJC4W2re4G{GrGLU>ZNQo7m3fl)q z|Ne&ueey1E8!Wule~AI z0=VTbed;?-rFu}$z>YNwvJbzrsViv^ID>oFoz{3-R0Tb{dmJ0aK@rqKW}>XzH`$mGuC~N7s=$NF%jG zxhlp3nAV8sNNo0bL_v9B15uhwU4!~4tDO_xIJT=q1Y!Ba!}5LL*A2D<@u^=4bXSBb z*~Aq<*>f{sp+icw{9s8p1N)=H~911z$#r~cMZAJ?OE!6>XqPYOr!$@d(*>_dc zPvCt3bVtN~IEwX0Yu{Y2XLl^o-x1>U=cet>rldM7R%IF~_fH(d_sAIe4!L%b8RQTq$Zq6>dE0(7BT63ecGZxfMo4F1>!Du$+s3J2aw>7hSknv5`0V2t4 z3TCFe3d2;#vnT?g+;!&So#yWREKUuEO>}N94|&Pv$aSD|?C&hnjeOwb%4uEI4Dc`f zgF0UC_IA4ir?2%J{ihHdoRkxDBzkIViX~i}G#PBa^~w**O2%8RT7Ex@bSP;|(GYSL zxmQq`kOn2b2mxyAr-Hl-!?qRue#aj0MC1#8BzF2Gtp?$G!*u~ulKJKxG+>eMpQ2rH z5!jI;-5_}5$IC^ezI;_U^En-BnB(#AACme)gFV7~IqcouD3eftrioD^Fn#fki>!F& zXR`}OqQiYGpYfh-gQ+v{jTVZxe^DP8M)vg1>1y(E;_*FNc^=xXkOn5Uod}CJ`KzM0{BgvScb<(4-60eg0pLP5&?8gAi%{k$nKvm#7 zGliGJ*P z<1bjccUHkJLt48mFClHvhUkC~?RNc)G}64NVs$BYxgSLuH`;ZcdnL zxZhA(S`Um_Sv06!YPsfgO;RO7D?^Mdl$bSqD$_E-fVW0r(Ilt+w$sUo5dUh73-2c< zRUOhK`xHZ3q4WY5Qx?qHF<%CUC+cTBX%(piUzMIatKF6Ru#_5br6x14`gKN^vgnEB zDyrp+Ds=3N)K!&_nSabvcG~puO5}w2!aO$>sdI1} z*kij(J=IuJrQ<7=*ApBn!?@E27 z{M89nMkU@_e$-Qo#e&{M{E1LrPalhzDoXIrAvl@N@K#!0-h$NwZwaH%_5Srt2BEF~ z_2YYMpRs81GQyy>bog|HbOy;r-x77Bb;}RF+fbN8qqQssFqlR)|S3|GC?b4}<<#O-9KB*D;Ni!&}2+W;rgywOV-NBLr?349dtldqf}lThq~?2 zg`N4+OUTQHviAdYiTKrUb_ZdEhaNBqM8B}I$?s!9Iq&7sKTEaI6UQxMVY(37L#0H^ zIo*4q30Nuo6bfb`wYe-dkCDWGaMhjSXsapKfY@Ua756nGSnU{TI)71n)ld{@8ilH8 zO0pL%2WP%>FF49}LlNVrZ)l^0jKEiTawmqZ5y=!9%_XJyY^#>*f^6T(@@0tjGi9^` z4UdMEPd}~B_6O0AYNm8EMGfv1fHHC2sB<$b-ovxM5fs!BsY|l-BB66PL~?j7cAh++ z548)x(dGB^n!1CoI2Ff*Y$B;c#7}_|sy)(02OS8QBkggp8vZn#eQdvvDkJwQTeoSp zI`(+p-O6#Z27E1wiLmXkaHW!4ScCeN|z>BcXRlh&fxs6BL*etnRi4o0@w}`aFd0d0o#_ zj@sz8J+rO?B=U#2zk4|4D!Xlc8JPJoD6v|Ocm~TlHVvoNG01m(+~#iKmH_*!IG7vZ z_<@Mlq{JApO?m;b=y(6ld*-RxUM zW*|caD@lm$S2)cjoEU|RR&#TGB*9+oN!4VN_{qP%bKv-E*i1Xcn%aLC$n;Eq`pCU9v4=(_hObq_jT82t3Qi zWOcQDQSo#A!^)9)&Y1b)Br(NbPUbTYIa;)T!Psy3XqG_!%8N5pvS$UPcIv4~Sh{R- zww?2-!=Qn4-|Cz!6qF(z&8_B8!c6x$tH0x}CNrO3AJfh2K_ACRzMggDN zjsW_ydULreNTQy{&w}YESIY-2_53*U>4YsNpb#AlWrwjoAfdVIlAlJlp|5>UHlzvYdJnDcpAc`3L$|kFXRS*$eBE_5|@@y)0h}S zktbO*1dRDfb@>(9?slzeb=$fXcPrc$?;D$rK}itDLq5Bz_vi}U)mxzEph1bmh0`kM z+CY=QdtB8z{^$qBAAJH*`PkYs|FxCD0=E{6fVGiC)!;k(T+AidXu*vbn(py)KrLaK zo}(jTKL?HJ3J4}s%Qq92_z5M)Q2D5(=flC&lGs8S{-iC>zsclv+V!2R%>3p_rrZ(Z z{DHyx z5VX}G<*G3s4>p?box18TExcjfC*uEmN2f%}t4}Kq0{(k}z zsnObT+}1*WZQ)0?WRYHKHn-Nspp99N9p_2&0utY;{H?7-h9-lHVckiaF!nk-jGJ&{ zV3?6-96Vo&(oGcPby5Tf?F+Lc`0P9^`Wfu)=Uyv#N-71xo9T)7yX8L1f8r%v6ErY% zq;~rH*pj#_;6yv2{q1pa?*r8LrixWA5>*!~m4Vc)>E~YJ&JL)1_twmCE#69WZR#Si zEVdojzNuBo<1nDvSye>eETLsY?;Kp6ZE?u4QA~?}z&UZ3@}9XED=L7p@F}J`1|eV% zU3%R}syfxF=&(poYT|$s0f?7Qrs|dVN6CFZP{ElDplS}6ni3oxQrh)%!RlGG$h8wZ z8h10^-6vn$CuH;1S2j5k6fiyA_rpP!2J*gh=)QzTg+jKZ)e=%H$CFY*VYRVLY6H+~ zQgfVH0S}vap?Gf7`QE@#e@s$%2BWgZ%{Y4Rf^}o9bpWC2kSn3 zC=ViU7Uo~aTJOXWEPWWY?oH-i|N1Ow<%SL*RukTiFqIvN|vWGMZvQdwLnmthwQo5k6o z(4Am%QRsx&mQC&dqOnyDsh#{_Z|{>|v-%(p`q^~LbLhWx_l|QI>=v$DqR*^b!2Lrp zo%09d3u1chg7;4jlj8yg(Xm1&Ao1om+u8B+Cu$&qr3IRIkP#n;xK=xPs}6ej^kNTz z&}@)<#$h=TmFDp$gMMP)T;iD*a~1F%>X_pCSZ;*x9v@!jbO<;QnSh)G9TG(#6;zlR zTYTom74VM%BWP;X3B$spQnGO~^blI`xJD^WY$ys-D$jccXJT%5yHY?|>w-S(OK*K( z8z!86Dw?Oo!>sa6b|ddrLLOEKi!SMc;ZfU8#dHTz8_U{{xZQ|Dmn+!9a;t50iLJ+T zZ}9_!#J)~mp?Xh-aRxlZA~r|;W2n8Zl@?ggUgXfqg4QPda_&=JqPkomOvL4o?AOO~$l zLsv$^79Cz1c%L!gVal%kLrHE_=W36-RGcy7W%obb;(HR*<1fy{oolpebcZu@ z#PXW1+7a0Myxx@^{<%0KB2g?CSMP!UWM!?W01k(r!oZY8mPlcWJNiQ7@XzvT%^B~F z0x2;dm}OQ!83SQk7vWhPZRRWvghI7kcCajtS?Co-^>3rfQN&Og+&VTZ)Iv93>EN#S z4~p&|v&}a&t~~33yvK@JQmqD+8ld3|?nAgUQlQIXP7ryV^mO55O2R96EZuS_SOZ@z zOQ1POne_Al!1Z5P_S$I9cqUr01 zL$gwsA~~eC{_6C|K(N7J;ATAF^~(fd)G7q(4tWkI6vK5^@~1OoiO?g#F0%6YGqj-K zT%qKjXJ|0jI!*1`HCF5K#v62AXN*@v=f+(;#~&ZF756D0$Zi#rvzn>Vu_-9vqO+Zh3?Pig<>Dmp09}+0!^WU8f%K za4jzN_6WkejEpq zZ~S2Mr3!9~B)FRt%2Bng%N(C;>93=W-iD?us&`Y-{|vG|Z^AoYRWw}#eVWjWb?O-! z92*5{mRU4hwUMn^o1J}RRp~DCbMs5!tTw6B`|F|f^nDCN;vABWR?3U)$m!^odaX2Z zg`b%@Zi*|7bWOEjck zC`(XgJh4(-D3)yC5(1GksFVWEqSH>m>>IHCBqjPO(%Uf)=vg;d>fY%a*iE#}@*vkZ z8NFMcepWfJKCnCD^#8D!_U^c?UtNpkGZQr<3RqMY@vr0v$U7i3Jw#_V-NgO*swZh4 zz=}?MX)pZ2g?8$c;PHE`i*dzDc=1LNq*3VKJ8)2qDUP9Ddf97D)?3#d_CH0}ibSDq zEC5UCy?8g)zv;V<(szmz6`ScjkIMP=tW&=A!+b;>My+3Ci$YW4Br}gnGrfU<+T-BO z{8_@wYLa$lT?V%pT%|-&_CFOAfDK~u;_M{-R^vPR*HCsGOpk;w*>CwshIMz=FwE;pw)0feq5+zJGKC@?*wKG+arA%bwjt_1^Y z$MTXde$x@A*zh9k?2_y1;_nc!K{U40_V+(uo08bDePdcNgyxNLR!#&8?wqls7d6IM z2!0mQG*be`0@q9mIU7f6k{CNJIF{B4&DbvwX4LN5t$EAs8Q8A^n@axlRw=E~@ZFzF zajXBcUCto;b1(lNzqJqg{};dYKdTrwfRp+E3BNT>UEgtw0}Y*;`+L0YX@kwS1q16c zVzeKp1Xs!JDcKFSzFqXtB-0RVwFI>#`6S6r#d2e6n=qO0788E{DiHV`78K(8QD)W* z)Sd;K;@C9#EPu)eu`es1LHpkKbxm-W@cyMN4S@IN4{!J!sD;tS1znxU2F-lOR#+rY zZxW`!Dp~DF@8EjYGm-~vuK+dZ3c5HQ^C>Qj#t2=o|Ne?WL@wsyNC8ZHPhz4vLC5SY z4Tw@C;mDiK5q67KB<0EZn zn&Lns#bvRX+n#4ToIlsAW8p43+zcO`=|(c|ro4EB*YBnA)W+ku6{Iw^iZlaU*bSx_ z1Ldqkd23~Ckl)-dqeGvfW$zMPt`q0Q)lc2!VBw-S*zwb0g|?%8AApYmGZ!P9k15fO znfXdR_t7yH{HLJ%?4cZ-QG8BSxFqdNS{3&PKH%xn87-P7A-M1kT%bPYGU_~64fMyo zWovCr`D|izsq(SKYumi0?OLk5Qs9OBRj%Z#@@G?%%d3Xy(i!n_?E*^sM2?kI$lmQ^ z5rQU#+nft_xhQh@)j2QgGHq)vo32Xj z5gupE0T=8L5lf}~+Hg#!f}-_>XFMOnW;N!ykA zY|ID70ykhcDDQ2}Kil{AZC=yR62%Kz2CbMbX=5#B%Nv=wyXl)ttka3E>`?bDXyu#e zN_KpPot@^Ky`5MZ%Z8b_?0N;yPTeCXH5c6y60)f{=l}lv&y_>DnFN=ma9QCt7@Xg4 z^Qpy^BF!Zyvl_sYYNi}9-lALK;b9ZEgDak~$OD4=MpNwdrGYjwy8C@=T-^FI6vx*D zo$w-B#0J~Ni{0pRWVVcR6${)!l%cVS-2k74dFHKeHK;4a3fP^h_wxqDjrjh7pXTBo z)vw=1B~D{rX^jJ%#6uNmp`mea8d!!U4Rm^5^UHXT|pdfa5OGSs;qkRk?J3^eAt2OPX{6|?+Te{ZPwC0;$8QX(Pw;Q z-cwJ#ZU~eB*6{3RW8r90!+|*JJ6t=A2K-EW1Q>lZKRSkPre08m2nW=&*JM>;)ks(N zrTq1zd8c`?b#ABip?}(r(d-yXgPW|%e$?0}Ug==vjdl3!l9GG6j^Kiv@zf2A==7T` z-vNd*jDsdS0dPG0h}4)LkpFj6rzne~bos9w0>c9V5&YLp-T&Cb~ef2D1 zg38tm;JKrW#wP#xz8CxFoR9S>@M?CDA~;rT$*{FW`)FW=k4X7b!6(K7BiQ1SuL3 z{K;jOanN?z^^~*Mc#YVdC)U}S={7J*Kw6chY$bGRbw}VabEvJfGwD^O3|wmC4%gx~ zP%F(P-d}*p&{3V3bcGvv0y&%18?%!W5+?p8OjWV_=K8%c>DHz^q3ihvzMr@xRi{jm z_wtOxGCD4)cSA{Rwr8!)El707S_TaX{e$5@eKk zl!`0cTOli=3=|Ax44j8zo8Xe8iz7i~%!w=^;Om%ZQmYHC#4Jka*h5dUsfV;T2E6kaQPc`aap#e#|kisFRCd!_JhI z+Be(;RfjX14@}oeM2ycMDY*n`Fzb|#9dwcbYh#M8dP7}&y0)~H-^olhm*y)48Qr*+ z87Kp6$hSujY<^0#sdM;#Ux)PzJmt?V>!^^vHq2h~`#7r4$gJL8sov9C+u~dUj&V5@ zNeT@QF*LPIEK2#OHa(VLW^^lPEZ}JRGF8Udv(RVyFh7BxQk6q)x{HwI7LJtB#I+~N zXU{nD(&k1Z0897( z(t6M)cOhM49ObQfDe6`ZX1n0MBRd+Ez9x>K3VtkbK3**)1`*2c~y z3=kqWMn(>}ieKR?ck*fcV8h&)L%`N{VJzopaS?)Do`U%YI;*21XXXI1SsbXkcdC_C z|Nf%9E8i-?PLI9i%}Y6@k`s#KywbB(CoY8~F^@X0`=Zr3FYYi(@%~(rTffpO_dzGW zeU0KvWb^m}Yo<>D|NEJbuO2!?Z|ip{QhR2Ev3%@_yG7E==hE#iF*k?Di z%#mVq`H*0p;>w=)4_D~7^R~MET|+pweupwA$Fn;vhHazYL+?ss+novUG5Dnxrt!J! z+!b3CG%Lpyf!{b=kYOp8O9Th2CfAmrmq|_7uK1p~ou-c~7YG5(h@wH_J%1Z-ukVf2 z>&CsG$!#Bz!!?l@p`>Vqg}ay$oK4SU1$msrC%JZH?wmYp(V!`>iU;yu{%*YvwL59{ zRhg`H2dghD-&(xN@X4P?kc@eAS;CQvLbuo9(*7+O%*(5Ngb=9GYOoav&V3r&$^gN9kV$v!5FRV5E%O(m^v zu^3CYK1!!CZrH1-^rKLs)26(KYX_^O0{2k>Mbb=4IdYL7StoR3D~zcUI(-V{fBE`z zllCDpGK8mWc(IY6VIIHvK6>!s7AWrDhQd+aO)wr!=oTm;q0))=p^+~licwIB;1qY4 zrs{yvxe6<(&x<$8Vf1UZZi>;$XuMnMSo9-MTXiVVsF{Huj|7s-uR_OQ1uUA-IOQ7D zdFGzxWXi=*O&M`zftJA%k(lPKc-QS7hfq>>9!tqUPHjKl?d)7}{a5Hx-D z!V0H-RT;`9)Wyujhvo)H*s7?kpLl>EbwV=XRT3Y3?SYFLz=*z}j-|Q9rZ<7Cla<-v zvBIZbyb#h+F4!M`*@UR|FY^oeb7A?1=_wbflne(aftW~A zztbW}7%@2@p&h0qo^a3w&@jPX*yNmggNi*tk?z&u>NxnZd_Dik6y?jrEvfhwTP30g zqNk>gC^5{W$NjqXZVa>t59hBqDaCzF+z1gQgE#zG&1Aq6kGk}``HHLxOX3SjLX;YK zjrd-fR1yrNtgghhB<}}?z%T{4T}J{xfzO9NcmO@()DYwN8F_4SvZlQs@o*XKoE)gE zI~XgOV)_QsFJUg}V)Bb#hSNY!^?(KVL@kCYPDYIv3#9mW zOg^14aeDy}T-0JJ-a}Ztz-QON?LaE2SJ`R6Je+!vku;+E=S$YN)Bz?rw+%#+5m<)F zU+|1b_6$SVrpz<$;~3v>$dYVZ`cv#F!)HM)>Vd^VIf5vk?&bICcO*<*v!4#KFx_RTE zlFd`YneFr;44;R3iOK@V{J%y6wKU_Sap9fM*z&p!4h;6tl6v4Q>bF49YhjZDd=D#s zMLQ0IxG#7ebHgiIz$j*L-rZ|nEN;uN;g_z-0G+hKh}pJAUIA_`kGoBJ2EcgoV@+(+NSBqrs~qWg2E(d<#ty^>@yYDU@9VeI^(S&>Dw6HqS z-Dj2J7McTX|*4vm~VCx})CVYZMji+BOQQn}1x4VHuap`oBHs-2jT zU^$}{ENb7rS}1Sk29LEpy`appX>MDyx!#2r*LWGFu=F(@;`TS4oAWO)JJ$nuNc-}4 z6!b~*nfB^ULT0ELkc%dW*vh~g;}C=2Qvd#vSBJMCUw)yNy+Esy)cN>z`d2@^iYKqO zvz#|mwBs49=4rIk24o27!VP5M(bN5DPbe*_mj!2ny;FyKu{ z%skltX{=Nvf0-M^aE_U~jWi4fc##Q$XIFOU(a!0Js?fBPzysa{7mMvcDodoe|GC`u zUX!$5G*v>gfUS8pA}v6lsQM{z}%6 zf0{t^l*vLLi`KZ4O^oa`(m34ACtPcpd{$^v^#RV_W5V>b%!O<-pV0HPKM{5Xndl+)5G*@wS}S zonbA-Y}yn`#t)myg-tc1v3B@n`-2@XU|FpT(iYx^ciuGqQ1F{B$S|mkQC0z*6YTPa zG}{nffq9dy%^<0k2)~pruoWMq>s0Bb8p+aCm+pBTW_b?>O;itS$(ARc^Un*7-Mqx9 z;A@@2~?W|*moJ^9fV7vURO}`9Cz>KG5 z#eZ|>&z9D9`*>&8;I#7ht28TiOVBW9RO7-!W>+cPetk2rVsLF~RF}bk?y3W6jnt$a zepi#1FYKh>W12tQH8iHwrE>hfeS94#$wWt)V_i0C$-DP5p1Gt4apbzW{aOHTh%zmZ!X< ztG1fW^zr%HjEWGY6-OVo(z4Oiws9nCU%6S^ye7_9rVe(U5kh^zp`*+TE9QO$Blz?G zh#~p<7a^C9X$5%)3j~Bi3j~DwU-wD>%>?q_+oYNQIwx*K$H#SR6#0ku6HU{L1&kN& zSTfsUeGzp<#YRDIBF)T)1qa@ZEdF?f2oOC2C2tRu4Hwc=?d%E3uycs~eH5eG0VLW+=_}XJ2I{H4A?omF;C7T|r z<6uRHpg0N$_gGV&q(Bo*LsiSLe{0b|(uy40KvSKWo2$K}BdXktZE|WwxDq2O#IuQW zjPh)354g$YJ%Y|$Br(}<;}K;z_YR6aFatafG=jQ^7$ALKX~v{^GJYjLL7QJQDt0_| z8J-STPNt_uUf@V8sT|IlxMl!U`3oBwVQ%7@k0^kbuOKfWXtZo&WNQf#^LXiCW8+|A zG&|TC+;^(QF$g=AEUtoIR)^bxM1o zc}vV&F6?`sSJLSg83y|Jhnq^&L^5{c$*#jb@4jRuC;GhC- zL^_cXA;ZipJU)i0?p&nDSTu5d`}5G;+~6)>K7kb86$}<~O28-&)h*R^-@858N`CgK zQT;e7hGs{Dc&=Jk@CdY_TK&0#gSRk+85iUZ!r0>%dGAy8-vb~1dl<-cYlWPmJ7=gH zQkqWcKl=qvNbqHQ0sWrc>Kif%pV|Z?tU9-U#uK z<;XLEUfxS@(8i5ThN*O{lIh+1>f*3G(>+GNtaM9&)CQnKC^NZ~GCl}P0KEeN%gx;X z_H9>DXdt7W65(bg3CIji>-Xz!(3Ii&OiLBjdY4+FPZ2;}WqpIa#rxV$nbA-)Lm<{+ z>H4^xN-sr zoZSjUq7{uc*TUzuDa#u#7v)zdBU-TSRVJ)1l(A3gbTO+FsUz9R{cSnFq&?TLDZ}8G zf2Bg-si@KXJzk_-A_^Uluu$D}Va(3CX{Vp+{Q8`jbHb1gv%2Z1KkuH^wMFjYO+E zIJYoTnqTphd!M|RUm9KAINg@gh5VYg>7eUQ?Kc2400R(?g^jH5=e6w9`8zHDDNf<9 zLLXBE5aHJuTlUu}ZpR^Zhp{|6(1MJ2cQ-DwkWW&|Tr7GhZP|ykf}K{&Y)UpOSY*TG z&;Hr3hhO>CH!MF8LR#t$z~xFa0oJvk%JZtQ;>*a&|2UQo_^(CiScteLJI>rBP2?TD z9M=YhZBm)tg+P3u*uM|WWcD{mb|{cut&f75ik+`Evk7O7|FXylJB0B-(}6q3Yg9sX z_QvlETH4ZThLxdAE&f3n?!8RJ^&2{bVg$?ngkar!ZdurTW|wxMII)eX5pyNCUFS&= zIMp7C%R_y3(AG5R5yRd%{YC8K3tNtQSS9=Ve6>b!Gwv~PVDYM-C&|f+YRkzB7<5{m zFhvtLMvZ%zpT~7)tgkR7PWevVX6fX~Om* z{=$8Zn5Gq$UQMN5&sI1b7cHnt6;h-^mMQ6dbQB#BhC5vp$>l3ZDwP7AUZF9N&tx4> zc7Ti)iNz*_1MSNb65(0y6T8~jG1f@`?B6QL)NUkYF2Sh9Hy(z|7c1s*@3h_L-xwIX z;if%wtBs=9a-SncHlxFTP1jN=+oXiRGesBS+YgBw;i*>}ZV8_qlHK=G#=&BhT*V=0 z>N6LM!YTsysCONNwD(xxkye|S9}%ZuYt6sYF_LCp5*4wrt!0*FwHU0qt77`^ zH6h#lRc4LD-@?%5Pp;|H+#P^9#!AcweH*eoA+?aqpIty*p_8oE_$5 zvfSdz9fvFSmAL|{io1Phb;bJDnp!n`(2GJY9KAqp$ingz4Nd>2_*}k0@be6@Q%M9&B3o`?Dy#=vHccxPDI|J81@;Bb+1-N zG_R7AI30)qN750s*f*c)Ljzvz3&W*;lm34Mi)9ew*43bVq?!c-9AsZR+Y zgBjo!86J>k-N9cZn$P%jdes}@9vX)&(9&^)$&VLrcH|7fBy7Sk`9-wOwaiRC6 z+#1mFu%04A97lpy!+bUPrMzIq+YHnx!!9JGT`CbVCnn-e9GK2gH2_V>Ylj4fZX}1P zF*e>CLdGAuC9$KYu9*mvGcBux%^1{UNNnt&3%J}!9Kiiw<401vVc+_L&x`XUOY;h+ z7%TAz6bLiMNaEaH zx?b26O!i^t>eCl)B6Sf93{_X5VTTQnKKK|$YpRQ`s|ET8Skk%aRS#*4EskK{fkv`W zehu*2ToG)-A50sah2$nSjvz> zxgSZ3{OBR0`~Gl3Cpl z-ioK(?#cck&hfAC#>x8Hncy$P2;j$`{~F)k`z-fQSh8!|;;Ti{iUvzxcm4QBIIZ_ z5BTbU+;TnG@morG!tsc5faXHdaQXb7OyyO5-ChO%@=dY+iFdI7>mK4inWq2igX$G^ z`F{s3NIg%QaK|C!fu;Dxl;{kFR4%e?i6FApNNC_qBoWIb#|%APvC{zs8;Fz1HyrW~ z2f}*JyR&0koV=Dy8TNHvEv!y{?q4we8I6_5DoK6#giL4XyIAY8jEs*j0b_;*vC7NF7b3rRjy%Tns}XSW ztNlCiIGvEek9b-d*DX+6>Wo}_>Y?l#VH%?}kw$E5*Bx4i_HvW?MmC?)#BMY_+>y3C zZepA9*h_v0Bz3udtJZKg{@}GH_s;l+{kzl9Ra308lRQ`s##c(qM6x^dKP0>Zj`wCIi z5Sq(zYHx&XWa@|**&W-BNBsaAYA6e=IB};f*U|2?p+r7g_EFrKJ+i1Q$c1W+4Uo0b z)#j6bp!ZyE{2R9&rroYQD7b$!#MTJYfbLds&g7l&^8NF`>)G6^J875_;I+$lppn-> zblLfZ5sU2Uumszm*j0y9!5Gr45k$deg(d=7lHK}ngl!qt8jL8h7=W^6PPuDGWUz{k#sPMk5%CkLO5GF%t| zHnvs(U_U?afCbcFzxz=@@J+XHnGsVWo!zOeIv9-;73sh<>u+$h@CFDI<8N1$Dy;lBG0(?H-bHW@WyiYeb7A zo|lHO`@HOkt3avLT9olS9gRchcg>13=Uck!Fl40J{W>qwS8(*7ywchgus@oRH`gyx z9!$K?|Mp!d{S-konU5h@=sHjR`Dda8WAOJmK=CP2AZ9J;#biOwe_T@jRnWU&z$h0anu90%$?f19>4|oGYK2dz>F3()fm;FD`#v8`(-SIyHmC?N`)$Lr&R$#@8MIx&%!AuL@ocUn_5_53_=w`QM zXr&Sf73J8XJwHil0~5B|n^{Jb7(V{jt_aIyOD6Rdi6;~LgTt8-#QQFlFd-=OOCQT! z?CpEB-ve+qSw#?C5>Pbhi+%nUAOS@jdeZIpd;R0!aDOWX-XvAl%W-5{fh*{iA*6$G z$GOP|%Ft+G^+YNsWt=EfL&LY+b4oFZ;k?etmnT`GEzk@XRKHmeGVO!bRy7D`=Wbqw#&h>Vm)=RT0~G7+-+Iuj;H*BoPO~TV>;u(@lN-^IVO9ed`(fhPEYnQw z+1m6lP?>1j;FqLmusJoJ)a^ZEHq90Ci3$83Nl+WlgCQ?vfZ65ZC0%zB~~*#;h0hcu&+ z1I(&c%3`q7BEdXI>Zox43uo(ahP8%bBt;*Pb;z=p?4_D~?bfS8nbgrTfGz={wODEy zQraA>4v@G3d?6V+AMqLqnwE4`B(fy2uRe9FF;$Y3HfXV0i{*G!mW)t-41oTHg9rhl zqb-E}-C`tC9{#PfZ2#6ujB|+wfxeVp)?ySY-`EUQ6MJ(qL8bC%vw@6RL_3A-o;7w< z>_`grRl`I*8Aja_TAh^q(j#>+e+m~KQ4BHfunscVHx9f7YC^dnwg>Jhp3T{wG$s@R zi<^Iwhx1l$!OE8YAn=W<$$!;vqt_n?wMVd+bLX+!Y)fQg$?Do{P9#BLn8fi#y2r$L zlXAnGDail$$NT+X40pwvNP`0-x1RW|NgYF;Pn`>5onyMNuRAv%Itf_=NP9S^LrNTcEOH)E~4Pq(|bmdBCo;3b$OmgK?whaAd9kT;}+J+avip)Kl>F37*<|J?r{5;a3ZYX@sLL&N{g z%nt+eKcpM~`LdarF{RnRsPt7-ARzqzdO7i*;_@FC)Bn1()M))T?c1%Pvy$L2gx>EN zNlffh@(YV(?!9J>K`JA{*e!9rx=g{G$MMozfZWQN1QCsFWm@pv> z#r(_+vQ&Jlm{|x$=`oNVkTlo``$$U-skc4F=l`4>$bN8HNXQ zBahxNXx1X^f0!{h{n?VOVXUj)b(A+*jHYBm%oIpl&5{46k%LQ9k$v)_V|Rjbqx(>} z6Om_QlFCfJ$RYn&2N4xvbkKV7?lLor$?j; zw+s{@JOj#4`HS@UR%TT}h*@3$1pkr)m)!SvTWJ1@Rb^7-4zkbrCBW#G+!T{`J>wpSYB1thsscX0k zHS=-8>8X{K&V_IT><6U^U5_Y+Y9`?Nqbo6MuCsE$X#K*?4!&hL_lSHBy0i5>M=ss+ zLhU2O_@_w^9!x}jDWF?SVm!lnzet{8N18AQePJ})CrSucE0-}|%2wcORiDH=7w{D9 zfal<$g(OvFD{Fg#!X9I#jtJj$G*-xEI_(Q$Ms-Ps3ynoO9RJ&29>Y-_L8z;pMY*&aC z8-#?#hZ5Zu#N*+&V$HyszA#B-EF`!8AoHwd$&=iN7mkS4nfSf?&D(meg+ePWE) zUmcPy`gYf^V>z3|6UW9>TEVXByt*%?ywad8jXejru?=hO^yg1Wl`D3EOj@f0;fMeX zdt1M>>1u&}q1m{Nc;?DfZGPqGu%*k|L-sy?kok}~;e#4xyQ&Vsi9;p@M49;IJTbV$ zZ3(m-d3aL|-zMuCxzI4Xp)(i6J8NHByj3f9!HkxmcC_zomSJ3lX=ktJVF7ymL@8yo zT(Sl+2iKQ0;E0bRx?ydbgN023M(VhtYnZN2rDA1)!V!IN0aVif`vh^4FCuQj0pD7V z8-8|zLlZ1cXiNwYz0ob4SN7J2yCk~`rpb4yIb$?LIAw@qIjq1t`%#YPa!bW)F|M}z zH89tkqjw5oFSWC1XnZ&pgm=)1aA-Zlt=)x~v72C=*?29uX3T{PmixUQ9QceFZzEKH z%Kt75yB23vm&JfDR-S%oj4ity6;{C$*m!ZDrRnN=bB6UKO;6F8=HsT2%e+c$gg+&5 zZRdh$larS$T_S{n9%i}3;rP*`Tzq|Mx)v3t69*Bg#k{eyenH{xiFxh)ztcmFu0Y`6 z9-6-tX2p-ns+4|ub~P^SL8t)a`6ipg+($mHJbyGI{y_{_b&JJ#Z)GbW3VLQwK#G~`)_`$<`c zLq}1e`$&1)+9Xf-2X-;a1Mca7JnL}r>)bGn#b^pnqoFyJ0kn zUBJq~7w!Jme7Zy`JOK*q?PtlyO6nACucmt)pzA0>t+8n=e@f+wzvD@3d+T0x2ITYu zl{%G3IRl(oxO_}#2XulC=^UQg%zgWGEYBRfwm27SLw!DDFdA%1@^mt^GHEOp3&Go> z5!Kc$+QmX<2X8Y(Y4hld(l_7VcaiBxn7+(=^Jr;pE9k~}iyxc89)8VhX{WW$YkkVF zg#NL&r8fLdG0rAGzo<`&MfT;fnKguUNY{)ze*Qyb16<`3Nz~}o4$Es!c)-AsMvy1w?>YO;WYs|nI@_UXgmGZy$Feh6f4u3 z1WPNIwNvJ&FrwBng5t&Uht43F`dgxTWe!@YOd(T3B`iAq_JFEHj@x2RNVQ=s%sf)s zd@>1&w-c{{HwyroQ!hdXr<&hjU!jOhc9B~z`*M{xJ0jADv|LzeA{ zDTD2Nbv2tB$3A$X1U@$TbuP;G5(<=8A|UvOdhe#0r(E1-Aj2OPT9GC1VVpUk)^L

6uUq$4a}- zq;C12ZWV+@mfX(nrgt8xT=eA-e8~XOqzI@`$0;8_Ua1^)*D0CKM8 z?GE|RpK$W@N1gXT$|xw4l&I4`1^fv%QZiLZnxvfFNxp?L1-p!fYR!=s@SEEHHTY64 z$@wJJ`Y;I&qJSHb{-M%r91k2#;$2zoB#kl2mnx|z&et;)GpY66Xbu5O!)%D zul;nVrL_ciC;#Ymh@`Ik6D>`-%PBbvoQey|7p$ey1y|z2!`xPDJjAjSw3^#-`@=J_>MEIcw z{Gyz`&xIl<%pj=}zGX&{06UpcyJ$bki4t;vOdwUj*J+d8YiLJbVwBS7Sg%hVZPADy2=d~d0fBb1}57USCh@GoS? ZZvU>m=}F2Q0AM2>#d`sOl(?O{{{b>wTO|Mh literal 0 HcmV?d00001 diff --git a/third_party/ibis/ibis_cloud_spanner/table.py b/third_party/ibis/ibis_cloud_spanner/table.py index 580886a55..cd8cbe3ef 100644 --- a/third_party/ibis/ibis_cloud_spanner/table.py +++ b/third_party/ibis/ibis_cloud_spanner/table.py @@ -14,6 +14,7 @@ from __future__ import absolute_import from google.api_core.page_iterator import HTTPIterator +from google.cloud.spanner_v1 import TypeCode from third_party.ibis.ibis_cloud_spanner import dataset as dataset_class import copy @@ -261,7 +262,7 @@ def schema(self): db_id = self.dataset_id database = instance.database(db_id) with database.snapshot() as snapshot: - query="select * from {} limit 1".format(self.table_id) + query="select * from {} limit 0".format(self.table_id) results=snapshot.execute_sql(query) for row in results: records_list.append(row) @@ -272,8 +273,8 @@ def schema(self): for item in schema_list: field_name = item.name - if(item.type_.code == 8): - field_type = 'array<{}>'.format(code_to_spanner_dtype_dict[item.type.array_element_type.code]) + if(item.type_.code == TypeCode.ARRAY): + field_type = 'array<{}>'.format(code_to_spanner_dtype_dict[item.type_.array_element_type.code]) else : field_type = code_to_spanner_dtype_dict[item.type_.code] final_item = ( field_name , field_type ) diff --git a/third_party/ibis/ibis_cloud_spanner/tests/__init__.py b/third_party/ibis/ibis_cloud_spanner/tests/__init__.py new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/third_party/ibis/ibis_cloud_spanner/tests/__init__.py @@ -0,0 +1 @@ + diff --git a/third_party/ibis/ibis_cloud_spanner/tests/conftest.py b/third_party/ibis/ibis_cloud_spanner/tests/conftest.py new file mode 100644 index 000000000..8eb911a97 --- /dev/null +++ b/third_party/ibis/ibis_cloud_spanner/tests/conftest.py @@ -0,0 +1,69 @@ +# Copyright 2021 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest + +import ibis +from third_party.ibis.ibis_cloud_spanner.api import connect as connect_to_cs + + +instance_id = 'cloud_spanner_instance_id' +database_id = 'cloud_spanner_databae_id' + + +def connect(instance_id, database_id): + return connect_to_cs(instance_id, database_id ) + +@pytest.fixture(scope='session') +def inst_id(): + return instance_id + +@pytest.fixture(scope='session') +def client(): + return connect(instance_id, database_id) + + +@pytest.fixture(scope='session') +def client2(): + return connect(instance_id, database_id) + + +@pytest.fixture(scope='session') +def alltypes(client): + return client.table('functional_alltypes') + + +@pytest.fixture(scope='session') +def df(alltypes): + return alltypes.execute() + + +@pytest.fixture(scope='session') +def students(client): + return client.table('students_pointer') + + +@pytest.fixture(scope='session') +def students_df(students): + return students.execute() + + +@pytest.fixture(scope='session') +def array_table(client): + return client.table('array_table') + + + diff --git a/third_party/ibis/ibis_cloud_spanner/tests/schema.sql b/third_party/ibis/ibis_cloud_spanner/tests/schema.sql new file mode 100644 index 000000000..ab8501d3f --- /dev/null +++ b/third_party/ibis/ibis_cloud_spanner/tests/schema.sql @@ -0,0 +1,93 @@ +CREATE TABLE students_pointer + ( + id INT64, + name STRING(30), + division INT64, + marks INT64, + exam STRING(30), + overall_pointer FLOAT64, + date_of_exam TIMESTAMP + )PRIMARY KEY(id); + +INSERT into students_pointer(id,name,division,marks,exam,overall_pointer,date_of_exam) values(101,'Ross',12,500,'Biology',9.8,'2002-02-10 15:30:00+00'); + +INSERT into students_pointer(id,name,division,marks,exam,overall_pointer,date_of_exam) values(102,'Rachel',14,460,'Chemistry',9.9,'2018-04-22'); + +INSERT into students_pointer(id,name,division,marks,exam,overall_pointer,date_of_exam) values(103,'Chandler',12,480,'Biology',8.2,'2016-04-14'); + +INSERT into students_pointer(id,name,division,marks,exam,overall_pointer,date_of_exam) values(104,'Monica',12,390,'Maths',9.2,'2019-04-29'); + +INSERT into students_pointer(id,name,division,marks,exam,overall_pointer,date_of_exam) values(105,'Joey',16,410,'Maths',9.7,'2019-06-21'); + +INSERT into students_pointer(id,name,division,marks,exam,overall_pointer,date_of_exam) values(106,'Phoebe',10,490,'Chemistry',9.6,'2019-02-09'); + + + + + + +CREATE TABLE awards + ( + id INT64, + award_name STRING(20) + )PRIMARY KEY(id); + +Insert into awards (id,award_name) values (101,'LOTUS') +Insert into awards (id,award_name) values (102,'ROSE') + + + + + + +CREATE TABLE functional_alltypes ( + id INT64, + bigint_col INT64, + bool_col BOOL, + date DATE, + date_string_col STRING(MAX), + double_col NUMERIC, + float_col NUMERIC, + index INT64, + int_col INT64, + month INT64, + smallint_col INT64, + string_col STRING(MAX), + timestamp_col TIMESTAMP, + tinyint_col INT64, + Unnamed0 INT64, + year INT64 +) PRIMARY KEY (id) + +INSERT into functional_alltypes (id ,bigint_col ,bool_col ,date ,date_string_col ,double_col ,float_col ,index ,int_col ,month ,smallint_col ,string_col ,timestamp_col ,tinyint_col ,Unnamed0 ,year ) values +(1,10001,TRUE,'2016-02-09','01/01/2001',2.5,12.16,101,21,4,16,'David','2002-02-10 15:30:00+00',6,99,2010) + +INSERT into functional_alltypes (id ,bigint_col ,bool_col ,date ,date_string_col ,double_col ,float_col ,index ,int_col ,month ,smallint_col ,string_col ,timestamp_col ,tinyint_col ,Unnamed0 ,year ) values +(2,10002,FALSE,'2016-10-10','02/02/2002',2.6,13.16,102,22,5,18,'Ryan','2009-02-12 10:06:00+00',7,98,2012) + +INSERT into functional_alltypes (id ,bigint_col ,bool_col ,date ,date_string_col ,double_col ,float_col ,index ,int_col ,month ,smallint_col ,string_col ,timestamp_col ,tinyint_col ,Unnamed0 ,year ) values +(3,10003,TRUE,'2018-02-09','03/03/2003',9.5,44.16,201,41,6,56,'Steve','2010-06-10 12:12:00+00',12,66,2006) + +INSERT into functional_alltypes (id ,bigint_col ,bool_col ,date ,date_string_col ,double_col ,float_col ,index ,int_col ,month ,smallint_col ,string_col ,timestamp_col ,tinyint_col ,Unnamed0 ,year ) values +(4,10004,TRUE,'2018-10-10','04/04/2004',9.6,45.16,202,42,9,58,'Chandler','2014-06-12 10:04:00+00',14,69,2009) + + +INSERT into functional_alltypes (id ,bigint_col ,bool_col ,date ,date_string_col ,double_col ,float_col ,index ,int_col ,month ,smallint_col ,string_col ,timestamp_col ,tinyint_col ,Unnamed0 ,year ) values +(5,10005,FALSE,'2020-06-12','05/05/2005',6.6,66.12,401,62,12,98,'Rose','2018-02-10 10:06:00+00',16,96,2012) + +INSERT into functional_alltypes (id ,bigint_col ,bool_col ,date ,date_string_col ,double_col ,float_col ,index ,int_col ,month ,smallint_col ,string_col ,timestamp_col ,tinyint_col ,Unnamed0 ,year ) values +(6,10006,TRUE,'2020-12-12','06/06/2006',6.9,66.19,402,69,14,99,'Rachel','2019-04-12 12:09:00+00',18,99,2014) + + + + + + +CREATE TABLE array_table ( + string_col ARRAY, + int_col ARRAY, + id INT64, +) PRIMARY KEY (id) + +INSERT into array_table (id,string_col,int_col) values (1,['Peter','David'],[11,12]) +INSERT into array_table_1 (id,string_col,int_col) values (2,['Raj','Dev','Neil'],[1,2,3]) diff --git a/third_party/ibis/ibis_cloud_spanner/tests/test_client.py b/third_party/ibis/ibis_cloud_spanner/tests/test_client.py new file mode 100644 index 000000000..a65a0bb27 --- /dev/null +++ b/third_party/ibis/ibis_cloud_spanner/tests/test_client.py @@ -0,0 +1,512 @@ +# Copyright 2021 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import datetime +import decimal + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest +import pytz + +import ibis +import ibis.expr.datatypes as dt +import ibis.expr.types as ir + +from third_party.ibis.ibis_cloud_spanner import api as cs_compile + +pytestmark = pytest.mark.cloud_spanner + +from third_party.ibis.ibis_cloud_spanner.tests.conftest import connect + +def test_table(alltypes): + assert isinstance(alltypes, ir.TableExpr) + + +def test_column_execute(alltypes, df): + col_name = 'float_col' + expr = alltypes[col_name] + result = expr.execute() + expected = df[col_name] + tm.assert_series_equal( + (result.sort_values(col_name).reset_index(drop=True)).iloc[:,0], + expected.sort_values().reset_index(drop=True), + ) + + +def test_literal_execute(client): + expected = '1234' + expr = ibis.literal(expected) + result = (client.execute(expr)).iloc[0]['tmp'] + assert result == expected + + +def test_simple_aggregate_execute(alltypes, df): + col_name = 'float_col' + expr = alltypes[col_name].sum() + result = expr.execute() + expected = df[col_name].sum() + final_result = result.iloc[0]['sum'] + assert final_result == expected + +def test_list_tables(client): + tables = client.list_tables(like='functional_alltypes') + assert set(tables) == {'functional_alltypes'} + +def test_current_database(client): + assert client.current_database.name == 'spanner_dev_db' + assert client.current_database.name == client.dataset_id + assert client.current_database.tables == client.list_tables() + + +def test_database(client): + database = client.database(client.dataset_id) + assert database.list_tables() == client.list_tables() + + +def test_compile_toplevel(): + t = ibis.table([('foo', 'double')], name='t0') + + expr = t.foo.sum() + result = third_party.ibis.ibis_cloud_spanner.compile(expr) + + expected = """\ +SELECT sum(`foo`) AS `sum` +FROM t0""" # noqa + assert str(result) == expected + + +def test_count_distinct_with_filter(alltypes): + expr = alltypes.float_col.nunique( + where=alltypes.float_col.cast('int64') > 1 + ) + result = expr.execute() + result = result.iloc[:,0] + result = result.iloc[0] + + expected = alltypes.float_col.execute() + expected = expected[expected.astype('int64') > 1].nunique() + expected = expected.iloc[0] + assert result == expected + + +@pytest.mark.parametrize('type', ['date', dt.date]) +def test_cast_string_to_date(alltypes, df, type): + import toolz + + string_col = alltypes.date_string_col + month, day, year = toolz.take(3, string_col.split('/')) + + expr = ibis.literal('-').join([year, month, day]) + expr = expr.cast(type) + + result = ( + expr.execute().iloc[:,0] + .astype('datetime64[ns]') + .sort_values() + .reset_index(drop=True) + .rename('date_string_col') + ) + expected = ( + pd.to_datetime(df.date_string_col ) + .dt.normalize() + .sort_values() + .reset_index(drop=True) + ) + tm.assert_series_equal(result, expected) + + +def test_subquery_scalar_params(alltypes): + t = alltypes + param = ibis.param('timestamp').name('my_param') + expr = ( + t[['float_col', 'timestamp_col', 'int_col', 'string_col']][ + lambda t: t.timestamp_col < param + ] + .groupby('string_col') + .aggregate(foo=lambda t: t.float_col.sum()) + .foo.count() + ) + result = cs_compile.compile(expr,params={param: '20140101'}) + expected = """\ +SELECT count(`foo`) AS `count` +FROM ( + SELECT `string_col`, sum(`float_col`) AS `foo` + FROM ( + SELECT `float_col`, `timestamp_col`, `int_col`, `string_col` + FROM functional_alltypes + WHERE `timestamp_col` < @my_param + ) t1 + GROUP BY 1 +) t0""" + assert result == expected + +def test_scalar_param_string(alltypes, df): + param = ibis.param('string') + expr = alltypes[alltypes.string_col == param] + + string_value = 'David' + result = ( + expr.execute(params={param: string_value}) + .sort_values('id') + .reset_index(drop=True) + ) + expected = ( + df.loc[df.string_col == string_value] + .sort_values('id') + .reset_index(drop=True) + ) + tm.assert_frame_equal(result, expected) + + +def test_scalar_param_int64(alltypes, df): + param = ibis.param('int64') + expr = alltypes[alltypes.int_col == param] + + int64_value = 22 + result = ( + expr.execute(params={param: int64_value}) + .sort_values('id') + .reset_index(drop=True) + ) + expected = ( + df.loc[df.int_col == int64_value] + .sort_values('id') + .reset_index(drop=True) + ) + tm.assert_frame_equal(result, expected) + + +def test_scalar_param_double(alltypes, df): + param = ibis.param('double') + expr = alltypes[alltypes.double_col == param] + + double_value = 2.5 + result = ( + expr.execute(params={param: double_value}) + .sort_values('id') + .reset_index(drop=True) + ) + expected = ( + df.loc[df.double_col == double_value] + .sort_values('id') + .reset_index(drop=True) + ) + tm.assert_frame_equal(result, expected) + + +def test_scalar_param_boolean(alltypes, df): + param = ibis.param('boolean') + expr = alltypes[(alltypes.bool_col == param)] + + bool_value = True + result = ( + expr.execute(params={param: bool_value}) + .sort_values('id') + .reset_index(drop=True) + ) + expected = ( + df.loc[df.bool_col == bool_value] + .sort_values('id') + .reset_index(drop=True) + ) + tm.assert_frame_equal(result, expected) + +@pytest.mark.parametrize( + 'timestamp_value', + [ + '2019-04-12 12:09:00+00:00' + ], +) +def test_scalar_param_timestamp(alltypes, df, timestamp_value): + param = ibis.param('timestamp') + expr = (alltypes[alltypes.timestamp_col <= param]).select(['timestamp_col']) + + result = ( + expr.execute(params={param: timestamp_value}) + .sort_values('timestamp_col') + .reset_index(drop=True) + ) + value = pd.Timestamp(timestamp_value) + expected = ( + df.loc[df.timestamp_col <= value, ['timestamp_col']] + .sort_values('timestamp_col') + .reset_index(drop=True) + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + 'date_value', + ['2009-02-12', datetime.date(2009, 2, 12), datetime.datetime(2009, 2, 12)], +) +def test_scalar_param_date(alltypes, df, date_value): + param = ibis.param('date') + expr = alltypes[alltypes.timestamp_col.cast('date') <= param] + + result = ( + expr.execute(params={param: date_value}) + .sort_values('timestamp_col') + .reset_index(drop=True) + ) + value = pd.Timestamp(date_value) + value = pd.to_datetime(value).tz_localize('UTC') + expected = ( + df.loc[df.timestamp_col.dt.normalize() <= value] + .sort_values('timestamp_col') + .reset_index(drop=True) + ) + tm.assert_frame_equal(result, expected) + +def test_raw_sql(client): + assert (client.raw_sql('SELECT 1')).iloc[0][0] == 1 + + +def test_scalar_param_scope(alltypes): + t = alltypes + param = ibis.param('timestamp') + mut = t.mutate(param=param).compile(params={param: '2017-01-01'}) + assert ( + mut + == """\ +SELECT *, @param AS `param` +FROM functional_alltypes""" + ) + + +def test_column_names(alltypes): + assert 'bigint_col' in alltypes.columns + assert 'string_col' in alltypes.columns + +def test_column_names_in_schema(alltypes): + assert 'int_col' in alltypes.schema() + +def test_exists_table(client): + assert client.exists_table('functional_alltypes') + assert not client.exists_table('footable') + + +def test_exists_database(client): + assert client.exists_database('spanner_dev_db') + assert not client.exists_database('foodataset') + + +def test_set_database(client2): + client2.set_database('demo-db') + tables = client2.list_tables() + assert 'awards' in tables + + +def test_exists_table_different_project(client): + name = 'functional_alltypes' + assert client.exists_table(name) + assert not client.exists_table('foobar') + +@pytest.mark.parametrize( + ('name', 'expected'), + [ + ('spanner_dev_db', True), + ('database_one', False), + ], +) +def test_exists_database(client, name, expected): + assert client.exists_database(name) is expected + +def test_repeated_project_name(inst_id): + con = connect(inst_id,"spanner_dev_db") + assert 'functional_alltypes' in con.list_tables() + +def test_large_timestamp(client): + huge_timestamp = datetime.datetime(2012,10,10,10,10,10,154117) + expr = ibis.timestamp('2012-10-10 10:10:10.154117') + result = client.execute(expr) + + huge_timestamp = (pd.to_datetime(huge_timestamp).tz_localize('UTC')).date() + result = (result['tmp'][0]).date() + assert result == huge_timestamp + +def test_string_to_timestamp(client): + timestamp = pd.Timestamp( + datetime.datetime(year=2017, month=2, day=6), tz=pytz.timezone('UTC') + ) + expr = ibis.literal('2017-02-06').to_timestamp('%F') + result = client.execute(expr) + result = result.iloc[:,0][0] + result = result.date() + timestamp = timestamp.date() + assert result == timestamp + + timestamp_tz = pd.Timestamp( + datetime.datetime(year=2017, month=2, day=6, hour=5), + tz=pytz.timezone('UTC'), + ) + expr_tz = ibis.literal('2017-02-06').to_timestamp('%F', 'America/New_York') + result_tz = client.execute(expr_tz) + result_tz = result_tz.iloc[:,0][0] + result_tz = result_tz.date() + timestamp_tz = timestamp_tz.date() + assert result_tz == timestamp_tz + +def test_client_sql_query(client): + expr = client.get_data_using_query('select * from functional_alltypes limit 20') + result = expr + expected = client.table('functional_alltypes').head(20).execute() + tm.assert_frame_equal(result, expected) + +def test_prevent_rewrite(alltypes): + t = alltypes + expr = ( + t.groupby(t.string_col) + .aggregate(collected_double=t.double_col.collect()) + .pipe(ibis.prevent_rewrite) + .filter(lambda t: t.string_col != 'wat') + ) + result = cs_compile.compile(expr) + expected = """\ +SELECT * +FROM ( + SELECT `string_col`, ARRAY_AGG(`double_col`) AS `collected_double` + FROM functional_alltypes + GROUP BY 1 +) t0 +WHERE `string_col` != 'wat'""" + assert result == expected + + +@pytest.mark.parametrize( + ('case', 'dtype'), + [ + (datetime.date(2017, 1, 1), dt.date), + (pd.Timestamp('2017-01-01'), dt.date), + ('2017-01-01', dt.date), + (datetime.datetime(2017, 1, 1, 4, 55, 59), dt.timestamp), + ('2017-01-01 04:55:59', dt.timestamp), + (pd.Timestamp('2017-01-01 04:55:59'), dt.timestamp), + ], +) +def test_day_of_week(client, case, dtype): + date_var = ibis.literal(case, type=dtype) + expr_index = date_var.day_of_week.index() + result = client.execute(expr_index) + result = result['tmp'][0] + assert result == 6 + + expr_name = date_var.day_of_week.full_name() + result = client.execute(expr_name) + result = result['tmp'][0] + assert result == 'Sunday' + +def test_boolean_reducers(alltypes): + b = alltypes.bool_col + bool_avg = b.mean().execute() + bool_avg = bool_avg.iloc[:,0] + bool_avg = bool_avg[0] + assert type(bool_avg) == np.float64 + + bool_sum = b.sum().execute() + bool_sum = bool_sum.iloc[:,0] + bool_sum = bool_sum[0] + assert type(bool_sum) == np.int64 + +def test_students_table_schema(students): + assert students.schema() == ibis.schema( + [('id', dt.int64), ('name', dt.string), ('division',dt.int64), ('marks',dt.int64), ('exam', dt.string), ('overall_pointer',dt.float64), ('date_of_exam',dt.timestamp)] + ) + +def test_numeric_sum(students): + t = students + expr = t.overall_pointer.sum() + result = expr.execute() + result = (result.iloc[:,0])[0] + assert isinstance(result, np.float64) + + +def test_boolean_casting(alltypes): + t = alltypes + expr = t.groupby(k=t.string_col.nullif('1') == '9').count() + result = expr.execute().set_index('k') + count = result['count'] + assert count.loc[False] == 6 + + +def test_approx_median(alltypes): + m = alltypes.month + expected = m.execute().median() + expected = expected[0] + assert expected == 7.5 + + +def test_struct_field_access(array_table): + expr = array_table.string_col + result = expr.execute() + result = result.iloc[:,0] + expected = pd.Series( + [ + ['Peter', 'David'], + ['Raj', 'Dev', 'Neil'] + ], + name='string_col', + ) + + tm.assert_series_equal(result, expected) + +def test_array_index(array_table): + expr = array_table.string_col[1] + result = expr.execute() + result = result.iloc[:,0] + expected = pd.Series( + [ + 'David', + 'Dev', + ], + name='tmp', + ) + tm.assert_series_equal(result, expected) + + +def test_array_concat(array_table): + c = array_table.string_col + expr = c + c + result = expr.execute() + result = result.iloc[:,0] + expected = pd.Series( + [ + ['Peter', 'David', 'Peter', 'David'], + ['Raj', 'Dev', 'Neil', 'Raj', 'Dev', 'Neil'] + ], + name='tmp', + ) + tm.assert_series_equal(result, expected) + + +def test_array_length(array_table): + expr = array_table.string_col.length() + result = expr.execute() + result = result.iloc[:,0] + expected = pd.Series([2, 3], name='tmp') + tm.assert_series_equal(result, expected) + + +def test_scalar_param_array(alltypes, df, client): + expr = alltypes.sort_by('id').limit(1).double_col.collect() + result = client.get_data_using_query(cs_compile.compile(expr)) + result = result['tmp'][0] + expected = [df.sort_values('id').double_col.iat[0]] + assert result == expected + + + + diff --git a/third_party/ibis/ibis_cloud_spanner/tests/test_compiler.py b/third_party/ibis/ibis_cloud_spanner/tests/test_compiler.py new file mode 100644 index 000000000..b95a2efc5 --- /dev/null +++ b/third_party/ibis/ibis_cloud_spanner/tests/test_compiler.py @@ -0,0 +1,681 @@ +# Copyright 2021 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime + +import pandas as pd +import pytest + +import ibis +from third_party.ibis.ibis_cloud_spanner import api as cs_compile +import third_party.ibis.ibis_cloud_spanner as cs +import ibis.expr.datatypes as dt +import ibis.expr.operations as ops +from ibis.expr.types import TableExpr + +pytestmark = pytest.mark.cloud_spanner + +def test_timestamp_accepts_date_literals(alltypes): + date_string = '2009-03-01' + param = ibis.param(dt.timestamp).name('param_0') + expr = alltypes.mutate(param=param) + params = {param: date_string} + result = expr.compile(params=params) + expected = f"""\ +SELECT *, @param AS `param` +FROM functional_alltypes""" + assert result == expected + +@pytest.mark.parametrize( + ('distinct', 'expected_keyword'), [(True, 'DISTINCT'), (False, 'ALL')] +) +def test_union(alltypes, distinct, expected_keyword): + expr = alltypes.union(alltypes, distinct=distinct) + result = cs_compile.compile(expr) + expected = f"""\ +SELECT * +FROM functional_alltypes +UNION {expected_keyword} +SELECT * +FROM functional_alltypes""" + assert result == expected + + +def test_ieee_divide(alltypes): + expr = alltypes.double_col / 0 + result = cs_compile.compile(expr) + expected = f"""\ +SELECT IEEE_DIVIDE(`double_col`, 0) AS `tmp` +FROM functional_alltypes""" + assert result == expected + + + +def test_identical_to(alltypes): + t = alltypes + pred = t.string_col.identical_to('a') & t.date_string_col.identical_to('b') + expr = t[pred] + result = cs_compile.compile(expr) + expected = f"""\ +SELECT * +FROM functional_alltypes +WHERE (((`string_col` IS NULL) AND ('a' IS NULL)) OR (`string_col` = 'a')) AND + (((`date_string_col` IS NULL) AND ('b' IS NULL)) OR (`date_string_col` = 'b'))""" # noqa: E501 + assert result == expected + + + +@pytest.mark.parametrize('timezone', [None, 'America/New_York']) +def test_to_timestamp(alltypes, timezone): + expr = alltypes.date_string_col.to_timestamp('%F', timezone) + result = cs_compile.compile(expr) + if timezone: + expected = f"""\ +SELECT PARSE_TIMESTAMP('%F', `date_string_col`, 'America/New_York') AS `tmp` +FROM functional_alltypes""" + else: + expected = f"""\ +SELECT PARSE_TIMESTAMP('%F', `date_string_col`) AS `tmp` +FROM functional_alltypes""" + assert result == expected + + + +@pytest.mark.parametrize( + ('case', 'expected', 'dtype'), + [ + (datetime.date(2017, 1, 1), "DATE '2017-01-01'", dt.date), + (pd.Timestamp('2017-01-01'), "DATE '2017-01-01'", dt.date,), + ('2017-01-01', "DATE '2017-01-01'", dt.date), + ( + datetime.datetime(2017, 1, 1, 4, 55, 59), + "TIMESTAMP '2017-01-01 04:55:59'", + dt.timestamp, + ), + ( + '2017-01-01 04:55:59', + "TIMESTAMP '2017-01-01 04:55:59'", + dt.timestamp, + ), + ( + pd.Timestamp('2017-01-01 04:55:59'), + "TIMESTAMP '2017-01-01 04:55:59'", + dt.timestamp, + ), + ], +) +def test_literal_date(case, expected, dtype): + expr = ibis.literal(case, type=dtype).year() + result = cs_compile.compile(expr) + assert result == f"SELECT EXTRACT(year from {expected}) AS `tmp`" + + +@pytest.mark.parametrize( + ('case', 'expected', 'dtype', 'strftime_func'), + [ + ( + datetime.date(2017, 1, 1), + "DATE '2017-01-01'", + dt.date, + 'FORMAT_DATE', + ), + ( + pd.Timestamp('2017-01-01'), + "DATE '2017-01-01'", + dt.date, + 'FORMAT_DATE', + ), + ('2017-01-01', "DATE '2017-01-01'", dt.date, 'FORMAT_DATE',), + ( + datetime.datetime(2017, 1, 1, 4, 55, 59), + "TIMESTAMP '2017-01-01 04:55:59'", + dt.timestamp, + 'FORMAT_TIMESTAMP', + ), + ( + '2017-01-01 04:55:59', + "TIMESTAMP '2017-01-01 04:55:59'", + dt.timestamp, + 'FORMAT_TIMESTAMP', + ), + ( + pd.Timestamp('2017-01-01 04:55:59'), + "TIMESTAMP '2017-01-01 04:55:59'", + dt.timestamp, + 'FORMAT_TIMESTAMP', + ), + ], +) +def test_day_of_week(case, expected, dtype, strftime_func): + date_var = ibis.literal(case, type=dtype) + expr_index = date_var.day_of_week.index() + result = cs_compile.compile(expr_index) + assert ( + result + == f"SELECT MOD(EXTRACT(DAYOFWEEK FROM {expected}) + 5, 7) AS `tmp`" + ) + + expr_name = date_var.day_of_week.full_name() + result = cs_compile.compile(expr_name) + if strftime_func == 'FORMAT_TIMESTAMP': + assert ( + result + == f"SELECT {strftime_func}('%A', {expected}, 'UTC') AS `tmp`" + ) + else: + assert result == f"SELECT {strftime_func}('%A', {expected}) AS `tmp`" + + +@pytest.mark.parametrize( + ('case', 'expected', 'dtype'), + [ + ( + datetime.datetime(2017, 1, 1, 4, 55, 59), + "TIMESTAMP '2017-01-01 04:55:59'", + dt.timestamp, + ), + ( + '2017-01-01 04:55:59', + "TIMESTAMP '2017-01-01 04:55:59'", + dt.timestamp, + ), + ( + pd.Timestamp('2017-01-01 04:55:59'), + "TIMESTAMP '2017-01-01 04:55:59'", + dt.timestamp, + ), + (datetime.time(4, 55, 59), "TIME '04:55:59'", dt.time), + ('04:55:59', "TIME '04:55:59'", dt.time), + ], +) +def test_literal_timestamp_or_time(case, expected, dtype): + expr = ibis.literal(case, type=dtype).hour() + result = cs_compile.compile(expr) + assert result == f"SELECT EXTRACT(hour from {expected}) AS `tmp`" + + +def test_window_function(alltypes): + t = alltypes + w1 = ibis.window( + preceding=1, following=0, group_by='year', order_by='timestamp_col' + ) + expr = t.mutate(win_avg=t.float_col.mean().over(w1)) + result = cs_compile.compile(expr) + expected = f"""\ +SELECT *, + avg(`float_col`) OVER (PARTITION BY `year` ORDER BY `timestamp_col` ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS `win_avg` +FROM functional_alltypes""" # noqa: E501 + assert result == expected + + w2 = ibis.window( + preceding=0, following=2, group_by='year', order_by='timestamp_col' + ) + expr = t.mutate(win_avg=t.float_col.mean().over(w2)) + result = cs_compile.compile(expr) + expected = f"""\ +SELECT *, + avg(`float_col`) OVER (PARTITION BY `year` ORDER BY `timestamp_col` ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING) AS `win_avg` +FROM functional_alltypes""" # noqa: E501 + assert result == expected + + w3 = ibis.window( + preceding=(4, 2), group_by='year', order_by='timestamp_col' + ) + expr = t.mutate(win_avg=t.float_col.mean().over(w3)) + result = cs_compile.compile(expr) + expected = f"""\ +SELECT *, + avg(`float_col`) OVER (PARTITION BY `year` ORDER BY `timestamp_col` ROWS BETWEEN 4 PRECEDING AND 2 PRECEDING) AS `win_avg` +FROM functional_alltypes""" # noqa: E501 + assert result == expected + + +def test_range_window_function(alltypes): + t = alltypes + w = ibis.range_window( + preceding=1, following=0, group_by='year', order_by='month' + ) + expr = t.mutate(two_month_avg=t.float_col.mean().over(w)) + result = cs_compile.compile(expr) + expected = f"""\ +SELECT *, + avg(`float_col`) OVER (PARTITION BY `year` ORDER BY `month` RANGE BETWEEN 1 PRECEDING AND CURRENT ROW) AS `two_month_avg` +FROM functional_alltypes""" # noqa: E501 + assert result == expected + + w3 = ibis.range_window( + preceding=(4, 2), group_by='year', order_by='timestamp_col' + ) + expr = t.mutate(win_avg=t.float_col.mean().over(w3)) + result = cs_compile.compile(expr) + expected = f"""\ +SELECT *, + avg(`float_col`) OVER (PARTITION BY `year` ORDER BY UNIX_MICROS(`timestamp_col`) RANGE BETWEEN 4 PRECEDING AND 2 PRECEDING) AS `win_avg` +FROM functional_alltypes""" # noqa: E501 + assert result == expected + + +@pytest.mark.parametrize( + ('preceding', 'value'), + [ + (5, 5), + (ibis.interval(nanoseconds=1), 0.001), + (ibis.interval(microseconds=1), 1), + (ibis.interval(seconds=1), 1000000), + (ibis.interval(minutes=1), 1000000 * 60), + (ibis.interval(hours=1), 1000000 * 60 * 60), + (ibis.interval(days=1), 1000000 * 60 * 60 * 24), + (2 * ibis.interval(days=1), 1000000 * 60 * 60 * 24 * 2), + (ibis.interval(weeks=1), 1000000 * 60 * 60 * 24 * 7), + ], +) +def test_trailing_range_window(alltypes, preceding, value): + t = alltypes + w = ibis.trailing_range_window( + preceding=preceding, order_by=t.timestamp_col + ) + expr = t.mutate(win_avg=t.float_col.mean().over(w)) + result = cs_compile.compile(expr) + expected = f"""\ +SELECT *, + avg(`float_col`) OVER (ORDER BY UNIX_MICROS(`timestamp_col`) RANGE BETWEEN {value} PRECEDING AND CURRENT ROW) AS `win_avg` +FROM functional_alltypes""" # noqa: E501 + assert result == expected + + +@pytest.mark.parametrize( + ('preceding', 'value'), [(ibis.interval(years=1), None)] +) +def test_trailing_range_window_unsupported(alltypes, preceding, value): + t = alltypes + w = ibis.trailing_range_window( + preceding=preceding, order_by=t.timestamp_col + ) + expr = t.mutate(win_avg=t.float_col.mean().over(w)) + with pytest.raises(ValueError): + cs_compile.compile(expr) + + +@pytest.mark.parametrize( + ('distinct1', 'distinct2', 'expected1', 'expected2'), + [ + (True, True, 'UNION DISTINCT', 'UNION DISTINCT'), + (True, False, 'UNION DISTINCT', 'UNION ALL'), + (False, True, 'UNION ALL', 'UNION DISTINCT'), + (False, False, 'UNION ALL', 'UNION ALL'), + ], +) +def test_union_cte( + alltypes, distinct1, distinct2, expected1, expected2 +): + t = alltypes + expr1 = t.group_by(t.string_col).aggregate(metric=t.double_col.sum()) + expr2 = expr1.view() + expr3 = expr1.view() + expr = expr1.union(expr2, distinct=distinct1).union( + expr3, distinct=distinct2 + ) + result = cs_compile.compile(expr) + expected = f"""\ +WITH t0 AS ( + SELECT `string_col`, sum(`double_col`) AS `metric` + FROM functional_alltypes + GROUP BY 1 +) +SELECT * +FROM t0 +{expected1} +SELECT `string_col`, sum(`double_col`) AS `metric` +FROM functional_alltypes +GROUP BY 1 +{expected2} +SELECT `string_col`, sum(`double_col`) AS `metric` +FROM functional_alltypes +GROUP BY 1""" + assert result == expected + + +def test_projection_fusion_only_peeks_at_immediate_parent(): + schema = [ + ('file_date', 'timestamp'), + ('PARTITIONTIME', 'date'), + ('val', 'int64'), + ] + table = ibis.table(schema, name='unbound_table') + table = table[table.PARTITIONTIME < ibis.date('2017-01-01')] + table = table.mutate(file_date=table.file_date.cast('date')) + table = table[table.file_date < ibis.date('2017-01-01')] + table = table.mutate(XYZ=table.val * 2) + expr = table.join(table.view())[table] + result = cs_compile.compile(expr) + expected = """\ +WITH t0 AS ( + SELECT * + FROM unbound_table + WHERE `PARTITIONTIME` < DATE '2017-01-01' +), +t1 AS ( + SELECT CAST(`file_date` AS DATE) AS `file_date`, `PARTITIONTIME`, `val` + FROM t0 +), +t2 AS ( + SELECT t1.* + FROM t1 + WHERE t1.`file_date` < DATE '2017-01-01' +), +t3 AS ( + SELECT *, `val` * 2 AS `XYZ` + FROM t2 +) +SELECT t3.* +FROM t3 + CROSS JOIN t3 t4""" + assert result == expected + + +def test_bool_reducers(alltypes): + b = alltypes.bool_col + expr = b.mean() + result = cs_compile.compile(expr) + expected = f"""\ +SELECT avg(CAST(`bool_col` AS INT64)) AS `mean` +FROM functional_alltypes""" + assert result == expected + + expr2 = b.sum() + result = cs_compile.compile(expr2) + expected = f"""\ +SELECT sum(CAST(`bool_col` AS INT64)) AS `sum` +FROM functional_alltypes""" + assert result == expected + + +def test_bool_reducers_where(alltypes): + b = alltypes.bool_col + m = alltypes.month + expr = b.mean(where=m > 6) + result = cs_compile.compile(expr) + expected = f"""\ +SELECT avg(CASE WHEN `month` > 6 THEN CAST(`bool_col` AS INT64) ELSE NULL END) AS `mean` +FROM functional_alltypes""" # noqa: E501 + assert result == expected + + expr2 = b.sum(where=((m > 6) & (m < 10))) + result = cs_compile.compile(expr2) + expected = f"""\ +SELECT sum(CASE WHEN (`month` > 6) AND (`month` < 10) THEN CAST(`bool_col` AS INT64) ELSE NULL END) AS `sum` +FROM functional_alltypes""" # noqa: E501 + assert result == expected + + +def test_approx_nunique(alltypes): + d = alltypes.double_col + expr = d.approx_nunique() + result = cs_compile.compile(expr) + expected = f"""\ +SELECT APPROX_COUNT_DISTINCT(`double_col`) AS `approx_nunique` +FROM functional_alltypes""" + assert result == expected + + b = alltypes.bool_col + m = alltypes.month + expr2 = b.approx_nunique(where=m > 6) + result = cs_compile.compile(expr2) + expected = f"""\ +SELECT APPROX_COUNT_DISTINCT(CASE WHEN `month` > 6 THEN `bool_col` ELSE NULL END) AS `approx_nunique` +FROM functional_alltypes""" # noqa: E501 + assert result == expected + + +def test_approx_median(alltypes): + d = alltypes.double_col + expr = d.approx_median() + result = cs_compile.compile(expr) + expected = f"""\ +SELECT APPROX_QUANTILES(`double_col`, 2)[OFFSET(1)] AS `approx_median` +FROM functional_alltypes""" + assert result == expected + + m = alltypes.month + expr2 = d.approx_median(where=m > 6) + result = cs_compile.compile(expr2) + expected = f"""\ +SELECT APPROX_QUANTILES(CASE WHEN `month` > 6 THEN `double_col` ELSE NULL END, 2)[OFFSET(1)] AS `approx_median` +FROM functional_alltypes""" # noqa: E501 + assert result == expected + + +def test_cov(alltypes): + d = alltypes.double_col + expr = d.cov(d) + result = cs_compile.compile(expr) + expected = f"""\ +SELECT + COVAR_SAMP(ref_0 + CloudSpannerTable[table] + name: functional_alltypes + schema: + id : int64 + bigint_col : int64 + bool_col : boolean + date : date + date_string_col : string + double_col : decimal(9, 0) + float_col : decimal(9, 0) + index : int64 + int_col : int64 + month : int64 + smallint_col : int64 + string_col : string + timestamp_col : timestamp + tinyint_col : int64 + Unnamed0 : int64 + year : int64 + + double_col = Column[decimal(9, 0)*] 'double_col' from table + ref_0, ref_0 + CloudSpannerTable[table] + name: functional_alltypes + schema: + id : int64 + bigint_col : int64 + bool_col : boolean + date : date + date_string_col : string + double_col : decimal(9, 0) + float_col : decimal(9, 0) + index : int64 + int_col : int64 + month : int64 + smallint_col : int64 + string_col : string + timestamp_col : timestamp + tinyint_col : int64 + Unnamed0 : int64 + year : int64 + + double_col = Column[decimal(9, 0)*] 'double_col' from table + ref_0) AS `tmp` +FROM functional_alltypes""" + assert result == expected + + expr = d.cov(d, how='pop') + result = cs_compile.compile(expr) + expected = f"""\ +SELECT + COVAR_POP(ref_0 + CloudSpannerTable[table] + name: functional_alltypes + schema: + id : int64 + bigint_col : int64 + bool_col : boolean + date : date + date_string_col : string + double_col : decimal(9, 0) + float_col : decimal(9, 0) + index : int64 + int_col : int64 + month : int64 + smallint_col : int64 + string_col : string + timestamp_col : timestamp + tinyint_col : int64 + Unnamed0 : int64 + year : int64 + + double_col = Column[decimal(9, 0)*] 'double_col' from table + ref_0, ref_0 + CloudSpannerTable[table] + name: functional_alltypes + schema: + id : int64 + bigint_col : int64 + bool_col : boolean + date : date + date_string_col : string + double_col : decimal(9, 0) + float_col : decimal(9, 0) + index : int64 + int_col : int64 + month : int64 + smallint_col : int64 + string_col : string + timestamp_col : timestamp + tinyint_col : int64 + Unnamed0 : int64 + year : int64 + + double_col = Column[decimal(9, 0)*] 'double_col' from table + ref_0) AS `tmp` +FROM functional_alltypes""" + assert result == expected + + +@pytest.mark.parametrize( + ('unit', 'expected_unit', 'expected_func'), + [ + ('Y', 'YEAR', 'TIMESTAMP'), + ('Q', 'QUARTER', 'TIMESTAMP'), + ('M', 'MONTH', 'TIMESTAMP'), + ('W', 'WEEK', 'TIMESTAMP'), + ('D', 'DAY', 'TIMESTAMP'), + ('h', 'HOUR', 'TIMESTAMP'), + ('m', 'MINUTE', 'TIMESTAMP'), + ('s', 'SECOND', 'TIMESTAMP'), + ('ms', 'MILLISECOND', 'TIMESTAMP'), + ('us', 'MICROSECOND', 'TIMESTAMP'), + ('Y', 'YEAR', 'DATE'), + ('Q', 'QUARTER', 'DATE'), + ('M', 'MONTH', 'DATE'), + ('W', 'WEEK', 'DATE'), + ('D', 'DAY', 'DATE'), + ('h', 'HOUR', 'TIME'), + ('m', 'MINUTE', 'TIME'), + ('s', 'SECOND', 'TIME'), + ('ms', 'MILLISECOND', 'TIME'), + ('us', 'MICROSECOND', 'TIME'), + ], +) +def test_temporal_truncate(unit, expected_unit, expected_func): + t = ibis.table([('a', getattr(dt, expected_func.lower()))], name='t') + expr = t.a.truncate(unit) + result = cs_compile.compile(expr) + expected = f"""\ +SELECT {expected_func}_TRUNC(`a`, {expected_unit}) AS `tmp` +FROM t""" + assert result == expected + + +@pytest.mark.parametrize('kind', ['date', 'time']) +def test_extract_temporal_from_timestamp(kind): + t = ibis.table([('ts', dt.timestamp)], name='t') + expr = getattr(t.ts, kind)() + result = cs_compile.compile(expr) + expected = f"""\ +SELECT {kind.upper()}(`ts`) AS `tmp` +FROM t""" + assert result == expected + + +def test_now(): + expr = ibis.now() + result = cs_compile.compile(expr) + expected = 'SELECT CURRENT_TIMESTAMP() AS `tmp`' + assert result == expected + + +def test_bucket(): + t = ibis.table([('value', 'double')], name='t') + buckets = [0, 1, 3] + expr = t.value.bucket(buckets).name('foo') + result = cs_compile.compile(expr) + expected = """\ +SELECT + CASE + WHEN (`value` >= 0) AND (`value` < 1) THEN 0 + WHEN (`value` >= 1) AND (`value` <= 3) THEN 1 + ELSE CAST(NULL AS INT64) + END AS `tmp` +FROM t""" + assert result == expected + + +@pytest.mark.parametrize( + ('kind', 'begin', 'end', 'expected'), + [ + ('preceding', None, 1, 'UNBOUNDED PRECEDING AND 1 PRECEDING'), + ('following', 1, None, '1 FOLLOWING AND UNBOUNDED FOLLOWING'), + ], +) +def test_window_unbounded(kind, begin, end, expected): + t = ibis.table([('a', 'int64')], name='t') + kwargs = {kind: (begin, end)} + expr = t.a.sum().over(ibis.window(**kwargs)) + result = cs_compile.compile(expr) + assert ( + result + == f"""\ +SELECT sum(`a`) OVER (ROWS BETWEEN {expected}) AS `tmp` +FROM t""" + ) + + +def test_large_compile(): + """ + Tests that compiling a large expression tree finishes + within a reasonable amount of time + """ + num_columns = 20 + num_joins = 7 + + class MockCloudSpannerClient(cs.CloudSpannerClient): + def __init__(self): + pass + + names = [f"col_{i}" for i in range(num_columns)] + schema = ibis.Schema(names, ['string'] * num_columns) + ibis_client = MockCloudSpannerClient() + table = TableExpr( + ops.SQLQueryResult("select * from t", schema, ibis_client) + ) + for _ in range(num_joins): + table = table.mutate(dummy=ibis.literal("")) + table = table.left_join(table, ["dummy"])[[table]] + + start = datetime.datetime.now() + cs_compile.compile(table) + delta = datetime.datetime.now() - start + assert delta.total_seconds() < 10 + diff --git a/third_party/ibis/ibis_cloud_spanner/tests/test_datatypes.py b/third_party/ibis/ibis_cloud_spanner/tests/test_datatypes.py new file mode 100644 index 000000000..fcdfe25ad --- /dev/null +++ b/third_party/ibis/ibis_cloud_spanner/tests/test_datatypes.py @@ -0,0 +1,62 @@ +# Copyright 2021 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from multipledispatch.conflict import ambiguities +from pytest import param + +import ibis.expr.datatypes as dt +from third_party.ibis.ibis_cloud_spanner.datatypes import ( + TypeTranslationContext, + ibis_type_to_cloud_spanner_type +) + +pytestmark = pytest.mark.cloud_spanner + +def test_no_ambiguities(): + ambs = ambiguities(ibis_type_to_cloud_spanner_type.funcs) + assert not ambs + + +@pytest.mark.parametrize( + ('datatype', 'expected'), + [ + (dt.float32, 'FLOAT64'), + (dt.float64, 'FLOAT64'), + (dt.uint8, 'INT64'), + (dt.uint16, 'INT64'), + (dt.uint32, 'INT64'), + (dt.int8, 'INT64'), + (dt.int16, 'INT64'), + (dt.int32, 'INT64'), + (dt.int64, 'INT64'), + (dt.string, 'STRING'), + (dt.Array(dt.int64), 'ARRAY'), + (dt.Array(dt.string), 'ARRAY'), + (dt.date, 'DATE'), + (dt.timestamp, 'TIMESTAMP'), + param( + dt.Timestamp(timezone='US/Eastern'), + 'TIMESTAMP', + ) + ] +) +def test_simple(datatype, expected): + context = TypeTranslationContext() + assert ibis_type_to_cloud_spanner_type(datatype, context) == expected + +@pytest.mark.parametrize('datatype', [dt.uint64, dt.Decimal(8, 3)]) +def test_simple_failure_mode(datatype): + with pytest.raises(TypeError): + ibis_type_to_cloud_spanner_type(datatype) diff --git a/third_party/ibis/ibis_cloud_spanner/to_pandas.py b/third_party/ibis/ibis_cloud_spanner/to_pandas.py index d5554224e..35501440f 100644 --- a/third_party/ibis/ibis_cloud_spanner/to_pandas.py +++ b/third_party/ibis/ibis_cloud_spanner/to_pandas.py @@ -66,7 +66,7 @@ def to_pandas(snapshot, sql, query_parameters): 'INT64':'int64', 'STRING':'object', 'BOOL':'bool', - 'BYTES':'object', + 'BYTES':'object', 'ARRAY':'object', 'DATE':'datetime64[ns, UTC]', 'FLOAT64':'float64', From ced4f4f192aa3cb0b0b0774ece9819f281c16edc Mon Sep 17 00:00:00 2001 From: dollylipare Date: Wed, 10 Feb 2021 11:28:35 +0000 Subject: [PATCH 03/11] Modified compiler.py in ibis_cloud_spanner connector --- third_party/ibis/ibis_cloud_spanner.zip | Bin 38490 -> 0 bytes .../ibis/ibis_cloud_spanner/compiler.py | 464 +++++++++++++++++- 2 files changed, 462 insertions(+), 2 deletions(-) delete mode 100644 third_party/ibis/ibis_cloud_spanner.zip diff --git a/third_party/ibis/ibis_cloud_spanner.zip b/third_party/ibis/ibis_cloud_spanner.zip deleted file mode 100644 index 49b9b3dba70e93aa2694d746a54200f52fa1d2c2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 38490 zcmb4q1FSAQyXCuW+dA8}ZQHi3vu&Pj+qUhqZQHiZ`R<*`+}z1b{`s41pqN;gfv)b zATeFYaq7#kW}nCk2QPvCln|0gi&Kl6eF z{0A@)5B+$?Kdp{`GTXlf*4MYRvvkqdr!#i&aIvtrqh)5N`)AnyT*!ZnceIM}e=fv% z|LDjF5QriFGt3NhHUcZ*j}(sQ-ts{ z$C(ZmAzJ=!TJB-`&Y3Rx4wg)c_2x0`dHg<D#TY&oDom_4mEJFF~3v@yhPDax4Pl&tN#)I3C=lQNBJ3$O6Ho8 zG*I8t_`Wwgp2~dbd5P9ejtn`xdFcIi)glx6$#eIuyYKc+OaYCgfS(1tFo0)*n&dsA zT2iYNV=q}oU@sP-(<*-;G?w-VGznFzhpW*`$`?B0spR&Cl)2JO!m+#NI=lBB<|Pf< zVKUJzE>BNO=i4@S%IAj9KcsS78>DSjUroum+kTMe+f});hiso++coP$RyXf#T|(O0 zy1U!zC#~1Eu)DV|UmVk>gl!@Nzy!c%0WSW?{y=~ukDFy_vnRf!J_Q8jMG{nHrw~tQ zRTHoZ2GfQ`SzOOU$_3j8<%(s)3la}cu~!7XakRIJlUU}40C=nR6v`Ef1qwYO_Jjc$ zS}_wQY`V*Q*z8?cbEy)gqv(W-{N#LVPUj%!?%4bJH=~jXiKWd^sg+;Mkv_FAaXdpj zwWjY8#OfH%)T+!EBHVMS3T9H3O@W~pUXU3sq~(?n5gtcfyc>?Qp0f8=eC&GDmsK6XHPuxKQ-aS7dV*dSudR{u_vKGG#|Vsp37+V;aFQ@bzp7o`) z?`E02x-+S8pUMxW3TQ+UM%RMzp}kh6QS8#%;){bU7bzk}W%s3$HymTZ%wz=ktwxQU zIimV5ytbsjQhrdmbt|{dB$#<#I-g7IxNJTmd=&vkU<#02OFV5$;+c_zN$`7{)qwdW zyDboFeIeQdQ(Ro`hO%|AU#_VI58DMB*4uVKUi$t7&JoH4ABUBr_Tb~?Fwmo&8=`_? zAe3b`EE3d}%KJhk(p%SdTtF1Kpo-KyR%haZPRj!F{siHBU;&Dz1>ZKo0dM*|7cx?1 z{k)*})`m&TM$^dXC9M2ab8DY{(Z>9jD`hvuzNj7EI<=5KsKRUolhf(!Qt64XAop5_#%5lbo}df~SfzIZO&1ck=!N^5*jyedciu9xGeeE`H6gS!I~`FzR=K4S53>3XztBy z&)m|<_E7B2$>e}j4qtV^zaZ_)pSFG|E+S|YDsYG4j1tVS`B79bsF8V8O$%B$G}c#t z)^Z*3|A+qnmpI)P)3{Xpb1MA9Er9V~i<6;)<-Z|I(JH<62W+Thq?&FvPGeD3g@hw zuW?SKlbH#IIn;1SdB^O{i z4m}${EP*Y^5Qt4Di-6eAKmG+cBT%YARiZD=( zRTMr^=?qigwA~^iZX+tNFV)7r6y$xMmoll~U{&ipG*sq7quTC@3V zvm%G0p8plW|E14+Uf1TM>bFtCi($9nLX-mC(m)U(YdKW0JAqC>`^SjV<4{Rdwt}jv zdkJD{pN87$BSg~Ui?(`TUDegz=N!=~b{uYztbB#s#a7J$n#wex4c%d&+FM)BeG^Zq zWl|-Zz9F!h)DMdCNt{rpH+JjD^%l~7Aa8;9@)?Qm_8aKGi$aUMb%rAv0071a0I>XP zQ82c#G_`a2H*{f;OWPht-P8HtmN+uOR5RXmBxhC5=3VG5VPe>|E8djTak&IHA|_jM zo^gmdK@x$UkJ*JLRA#Puqv#aea?WTD_)HCWovuT4uE3sl_T<~YXG)sjeO~mVg97m} zf9EATeJ`#+$%_CRipP06-TC7>m05jwgc%rxGdq+R6$S>8%kMgw}j{hYWRnjPd zk};h!#%BCh1+Ta!qaw;Dqi_kute_^dgJ$^G1*K?0vFG}vg8TqhkSEvgOxBUJbY}A1 za_+<@-eY{vRF{%-uc>YodCr9=pW_~b=_pR&8I1GBrh8v!y5G)uzHy#Wty=xNh$a4iFS za&_$;B|o*T_v|xzZ|j+nW_hoCq3=Pex&Pvcaja=X?8{1CNQC&|8Ng{xmm}^e?jDy>0Sxi zez_zNtnJq;*v1jOk?n-+7GG67*M;}AU%L^$VSHwKssZh;Y`Uhth z@5Q_51^*&nqdfls>GkVW?AUDV71pXNWgM!Vdss`J6ZIDg)Lu(wg}2fspGJDmBPpDX?Nc>s)Ovb% z$`iES`fi^h<#5afBJ*eyf~=}Go+}ww;6a|pR%m*^lRqBKx>S#AYpdx}Pw076U#|P^f3-}pe?6Xmu%n-y{sslN*Tt!X z!W0{kI*79F(kTV68Xm71T6V0q`uj8<9XaI;D5`bduznB|g&Nci(#Q7wLvgR3x0-c6 zuH9F#aX%s@e>Ub`6q+xYb4htI9t6uzFa5d`62aSC>P^A=*g1P%u3KE`cI1ktP>br* z4$vmwOi_ONoP;nfwWW95KJ>NgfB5ZMeHG!Rcc5HX%CXKdU>Y*=M7~lHqiky+$W|XgAYXa$ut+hvr~A!{2}tdz^i~?rpu$xnrY4(ZzV9#`_^tspFxrUAN_N zze@U=xUQxt!A5oL1vm7F&ijYv;a?(AVR2k1dNf z8#xlYe_$vC?;8nj{k|{CIb8$a`KrgO*>}l_W(alXfK+6L?wP zb`?KR*FT#C%PaFvX<&x*fjGmd#BL4=K4c+db_~PCkse z`%u9yQT`-wC-=(X{tFse{RRF*Jo*3ha_WACuV4!m4!dJjwPX{h_AKHGL8m2xK8B<$ zpZkIC^>==OKiA@tFM?3+k41b(@DWrLP(UT3sM)jBE2!j&&mHAYVRyz4m!N$Uxh~Hm zK?sPE3GnSAdMr%c9(PH$#e~3ix~d7TTui8&#)RDLNHuK|J1|&`4SeN%q+YCbJ<0(m z$J|Nj&{EY)^@5tachY@|&`I|4WS?6~e~+G*b`x%}7@B?yNlp$ypa(*?r%$0vrqgHZ zJ7vwt*=y0RCVTN8sf07`V{n^};4JViq0Ia1B9uF!Sb9kA9UblpI#LhGe=b8Pu7Z470mpo8rw>Z zUx(9V!Q+Re2|AV~{|3dCnVZM^DU*9QX|p-|Zj3P3ro|G~8w!(E+!!E{g7+r?fnm~@ zbCG1@Cn<@ilEMl8n>;xyE5IE561WAGv-sYcqKl~ZEMSXNPdjKp*e)@k z!;!BLzu!7X4`d?}{z83R4Y?h8yTqRzbstW(smp~{8K;u?qzo2!v}TxPH&z5=kUN*+ z{rqa47&2wwm7^lIYd(xKmvsGZrT#Yvk!)&(E6$=`_t4JZ^dFIR^oJ3_d#y|ZSX%@! z$S(2$SBZoNF8w|(Pn7Rqj2h-B^PZ-Kv;glg$e80V%=#n}{UL=C$bPPZM>E{wh&i_! zYLr0KxqAWT%dm6kI=CPILAuFwlZ!;D5X1+ej1<1sFp%Mjl(!rRA2M7?K^{)Rho# z&#X%8KSX`GHs|H9*q4Gn#+Gle!tLAlXoqm(L;h$a2aJVC^;zmDm)c&?E9ng_e#A*Jh z5G3ND6^Te-8&#W;2#9z8&L>EB_q^8`hu);D%%Yq*kFc>ei!nk4%*o1cmrsAgjB@5q zM2=U)aHR$s2{+V8|Hgji4@sCwbJJ84(9nX0O?fen&d6^+*zB3*$>8ctNp8~;+gsx) z8t%UWa6HLVjwuFQ6hXMub*R$9Gr_!LOg2&TL0)+|rUxpyMi0MMIoW`uOz1OBybQKk zGL5|U+GAxKeQvW1nJ>Y9_W+p9*L*WLC#ID;RM=h`os%$=r#Rhm67m~7{B`p{%WUvX zY8whKF&FhrYU@+is1>*eV;WKFwhdFv$@eVllrLF$^ofsLL2ti1_|Zl>C5~L>Xjw$m zn7-{&QIGN}{@A>E6`d>m`0pzzAhxJ9oz5yL61K2J3FRmITO_kAjLh1TDo*v&UzrBB z8xylHz1<9%{VhHnr@-N!!TlvJR^%d96P{TKrlMQIkI zxf*NmPA>d#v(<^Z0sN?AbAIIY0=vhEDQma5PB(z+(6U;KAd4hKIIesOtiX>E8X1V6 zRQjmAcTTJ+`LQdM^mEXtaXAurpDr~->H|U?OuQRA1b-eoG<_DU+BRj7NL6-j7$Jnl zNEi0b^e5hwj*-M2K8jGTh~Z#Om-i4&2HDNj9Xh#$`)G_9VvlGAFw-Q*1lA^6ksqJb z5~1uVg|I~V8lD8L@+Uj76(_BhrK_Yr?)k~;Fa9K)NJU}dYnA$I6@(N*eZP4f2d2^J z%1UOJ-ZP{*P~afPK%)&@bl) z_Cf-pNa3U78VdWV+k&%vt$nF^YR`)%uLi~I$nUV7KPA` ze|Z|LOvr5nl53!MWNkjiVd=`cYM7?!3d~Wwcdtn?l;DqX?1{MIN+cQWcsxN;viN)} zKijyPZqOJB>+&q{7 zb&oVFR4PB>rnKEM)J!<;B<$qipoSn{aHM0k>y0NTiz}xfbEJ7z=tJIMTOtIb2;#s@ zVh2`(7s+;K8+PKMQn=HKaJ98KwLJE4zR~+Y=$-r)GHTlx3{f?U5^2UGSC>J{&=CaC ztFc=Ken<`6?3$bgpjSWds!E+?KF)B-4cg+${E1MX{!U zbXZX{rZ7R>rs9i>Bee-T1|t^z6PGV+hTK}tC5V>X81PIU%kx3I9rP^AlC(RDc7z?5&~!5QJYIz4jWqSV?wNE1KiLn#L8uCQ0FY18 z&vjJ|-Ij(H4OVdoHG4|c{=gLJ!TR{U?1AUq`u+G+$o$h$L379J_54%_Fq|eUUpE5u zwnlfe0u5cYMrCoxe8OFdqqpPR@zh1}{QS}Zc(F40zH(D^5(PsMp(4XZqT-E_rvZ*) z+_9EAIuvle9~wMaQH(wUz&zuZd|)l5BH=?W{QR*og;u`UZ2#6-)=o54QxrE7OdiZ# zHneL(t`p8tidXO1o3Y@il;9l!Nv~;e+-DPiT6KhXPb~oA%!uF{uII);P;NFi_ypdN za1Q%Vc2$%C?l1JeUR-c8VdxAvb(sZ}yEEk^K1r*@+#E3?wqIBrbh|C;UQn|;*d9<^ zcd0Soz{>tNC7qJHoiw+)*f%PkBmUEUa2~-d=~#J^ zq2KXTh`(|11*2a-!*R>omONVJV{qd^7LuKI{5>(-Ds*i#>8gZ`erFJh$wIu)wig#N zAu1&zhZEs!{M~n^blLO$V}Q=qzrxTLI@gK4yE6=YllGdJowlbhc*pJZ8wd~f7myWsais<{##rb3E8SbxCKg|35>x(`ozlTLWhJ9sRE}hSy{c2PA4?CA|?15@GgYhU*-= z#ma~u*eF5!aCJz5Si;vYwN`g1#O;LCMF@n2{(kMmoQ}Dyar(R0H;zcETll++0=q!1 zU}SD@(isnC690j|zK@6Y2Mqo7EsdY-69LJTSCn#_JC403MR3N;Hz6W&)ELFtJ9I&r zHb!61BlC+;tL&J*?$w9JH*jc0<1+yDHPajCPj4J>v-G>5!nM7Sl4DoH5d!!udmNFy zK)k?Yb}GY9RlwVTpYGUX$vhTorD?McS#hz->zO8P;`HT&j0w}Nw6K2QF-eb%5aQ9t zsogX|B?}&C^SVu&4pD|@X0?@Z5DAHc34tG1jWvwm_pJP&O6{Ma8Os)hS{L1O^0sb6 z{$sz$r*?yQr%Ef;j7@MM^~;o13?FH~hHbYltBHEe+cC=47Ms(xT$~wt z{eR7K41dF#yenAB7|0*`k2p}WoOtu>kLnF>CeCz;o9K?M&$qHBVLy%h#le!Ih^P6s z&ooO6WPlYhTo=I<(h{B}Y!XMppaaF|xPyfj%7@{-${J2*FK>&}JR&VRw70nDYurn+ zVZwI;0w8cr059vkTK5cfh5+q`^c3Ou8D67xoEOj8x53&3 zd&vYRD`%Acdr`Q#d-<}K0Q~Chi7g@%ac!x?@!d?uWI5uLp22JpsxiO+!;t1g+l3;> z1purC{=ckwV|!Z%OB++Ce)VuQNuFrQHPvKkzAls5 zle5s2-m!4T_%*j|YyR%Dv!P2PFe}MoE4p{(MX-JCyYq2|LZ=H+y+w^7jQTJN+kcc7wJQ2;W~sGvVCumt}f;9t;jivGA}AU3yPq+aYB1EIT~2$QShp@zc@lN`Xz z*QOh*Zm-9Djj>l6s*Nw1FSWb|d=~H+1c5VuFSu|Fx%CxxGsU~?@rMw+ok2E4cX)=~ z8gcfpVjFoc$=dKu*&DTY-$DP>YwvkRZ4KLnSqE^tC@L#*mL z$TkFBBI=N@QgFkrpj_IyA?V0AkX-7u$u)eeemVFg@b=*cUr;_HdSTWmUm`tIct!C} z$25>{Lz%*NF5){rc7~oh4xgYiRV{m$O~wLoHl}XL%!sc;8?hzm4A>jW0;+qY_n8Nt8(y)@}L-~>5 z0e9`~p=#=CmRQug;|)bf0FVgMB_5?_IZ^f3F;j=U7YytIk278VH5vwaZ|JXMN*m}6 zy4^Xj4RzyF+K#dPu87(DhZo`CWQyDHlhJc#WG}|O@nse-mvEFEs9{`>aI_Ti-qOD- zwL6xu7UdAmziYGx>5%!7^qI+v|DzAb^SLp2=wx?8eYNuO3R`d)=w9#{TaZsaE*EGt zY&8t(9xTusiIBjMfOItVl$IanbMPWH`$9eIVy1sGwm$Yi<}LNh!xMPYJl7z%i2M^+ za2NiW^4T)bJDKp7=9$YYh)|ydlJAQ7vJv!6RHJaFo>ileb$5xrpUP+J)AKnsJwt8( z!oGlCVn`LK;;7yBNP0FpO8ZG`n~o*}SFBY7qsntyuO^+dGlM#*0xl(}mKrS)1+^p? zX31%as{heHTrS1>O^MGoWipV~ZCsJeFW;m@)~jHA!efK2J^Zk33>$2tLQUkS%XP@4 zou~EE6v;y6-n)I*UA^O?#?F6OrGZMh;h(@mvZY8~zYF0IV&^bod()sSu|t<(a@rV7 z#RJ9CTh!hGtc4AjovI^HDEl^?LZMyCac{w`VA_Y;-mOuy4Zhiuk|n2lgTc_`-_*(R z;=iO91AFTY-5S-K-0!=49j2LKR&N7JzWr4MEf}=ZPHi0%VUCpLXF4N)uivAWQ-%a; znTl1Q~6AgfxopT<^>(30~Tv>$=(H3uc>8@x{(eus178GlHDU$I?}7 zyqU7oWui?pe4MhDNWqS?A+~pF|mgO=&qS|v> z>lqy}RE6R(?gJoWalpUM(Y^ju1sEGVGTdxzqHA@tc<3`i~n>G*37Kh8$Ix8h7A2T&w^rEjD@fR7C z)Tz#2Q9ZWLojx2;V=ZSPTyk4m4=!nsXvn-Q(3b|`{Yaa-V#bR4JrXFP2B_s+s&834Af?BwikjAViBTU2 znbjAA?evwigVsOm#gF_TnU?(vPu7y^>g@(}1P@Ih9c7)&| zQGn|@!J&}^4z{WaPD*PrfKZ?fvi)yI2VHX~XHr_WY}Q0TMKr0y*@5hp0qiWVP#Z?2 zn&|0~w=DAE97LAo!a~RyARJ|e{zM=ZMnFnCr(czKF50$l=ox&VrEHZDQ52402}s#k z)l1b;=%&-J~|HLp?LSucZ9NIWm*_PjAn9PO#ml@ zngp^MFl`W}w9h9S&gFK+JAN}--W?LAHHX=I2K>EIlR^C%w3D!tT~UU6;JE|0TKPI- zr4DtvDGE#YkpEkK$5i#WkPSYA&9cZe9Miz z6MX1hc&CkTKkDAlCzFAi26BI-WkUibaB$fyP<*sBW&J1-`pvK__AfewAfq_~x^ofs zUS6=o`3PSWV}LgGdF8rW2s*ThbLnA+0wZ~ZhHE8oxC(TM@nmIl2c)cE*7O6ZWauGh ziJ*lW_0}l-36FTZb4l0w@pt%mX-E4F)Nxtv2euPIayi&(4Q8I6Ek4c#wuGoaa!VGI)rZ|YHpBiH*p{n!$z6SBkM0j4?T@fjSU{yKu1iq zx2W8jxZIk~FPAf=3f^&Gyd*IH1_NpeN*qTDXWn~x0gWz71ZAJ846t}|B=+_bx@6< zA)N)=+nOQ3eb}Wbz@zcHD6X?8aeVTyZ&kKL?$Yo0eB8q*R9GyNh!-+&HsAK(u|qSF znZ42c^r0vJH{~E9@?+C{vXO^X(};XrM*$J1f6um4Dqipzn}D~C8BaPlFLV010^zd^ z7i0V+C;XM&t+^OGTyXm~tvt@+jv$vly~Z;8hK4xO1@o58Z54e9X<8T>(YXEsz+J;F z6;D**Ps((Yde#tu>5d=TKFX}Wu(BlVd`Ut%N_rwPCWM*_^dApBh$}#T{wf&- zQ64R?cN5OMtD^h!mBiulK*84)TLg*3KIc&CbeS)5ScC{HIWBO&^aRHgfT*sWK?+@G zG#`}iNE!}h;9f7G=Whg-)cg(y(YiYtz52mPG9B|F5o1l452mD`NLp-yAaq{Wth+&! zT3{KfK~`W5GN?3~(&o34Yky;hnERw849+>0EjGrzYa+kY`k$?-AXyTKo7bHEHub?EVCAB~6RKIxbP+JSu0-6b}MDQr#)(l~P;WQxVfLg-wqD=|vebzlq4zd;PSL&Dm~~E_ z7V)(@WWAH%0N zY>rb}rl(~{n&7w5Z(ribnE)@~MO*tc(&5mr!V6LKQGuh=F z%Y+m5o!Qryb!0kEq!M3_bI8vIp-{Xa6QMeOGq7oQvZl^n9=uI(&KFz*4-7Hf7P8Z|r{ zRW42Bi#jxemh6g^8743{kQ2|QglP+YOsm(LiGa)=ff?Cz5v^4{zO~9dL$84tcYBnk z8lf0ZcC2EV0TQv;zPS)l-u~-o-KU3w$6_k8n5zzKVnj%qG#Gw@&TUGyt8`|OLAbdh z`5KXT)|4!yN28ktI2A*-hv+5xkmvKey1G|5uQTYDaj|TgK0Y*8n#@e}w7h-LhK!Rs zih1mi5-Baz;;CqM1m%SAQf&t6i84P2>AXo{M6|bxSV0jSLRmR+n($Pwlw8?4y@)|> zdU9$2qzPO>s|E(`J2j+Zbx)wX0cHIRBN=1KV~Uao=!XkN-j1oo7UW^fIDQCJtOj#s zHd3k+o3PV!l5Lq`6=b6kU8__Gq+VYeMlWa(kEVvs_M#qMY*=Hd>O+&D(F3FqSEi$e zt3WY!ZAk;9i79k{9=D?-VPbN+c$jAa_nK)+C`={S8FyJ%T9&Gr=oSNFWadPiSnYyQ3McA{Ik-X;6$41bkNh!j6q>Bnc5uiDueH{98D{K$$zJAs1-EH0eH zcc4gIg)m1f?_&v3*h#+{qK*?f zDz930ZfzzZ5cSmPQgTifdFC6CO$Ud3#n#G#BE*sL85`rShh;)={_|{lGVwE{F@77- z+W|X-9wk!^*OR-`Nd`5_Du`DVoBh+$V?}d|iwg#`>$jskBHtcLEZZW~#%Jut4N5%i zXX3AeQ=*W7X@y4*Iwj1BxUooqcCC7hkm>u00gWoQHprA}DG(kl1@s6}zcngB067E^ zxvUq;kqqd3X(<`Ay;W1vtt?YK5>h(HAu_pK`0k;Im7SH;<5(52pyFDxVD)M|hmWV* zG0otmbGgj)RvWUI=e>y3=>qeCQJ*kJDki}RJ;@rqp)*`2M!g3?PtVoI9wQ10ag@jc z?aV=zyU_!kjar@M9<3Af;HJ^V>gJ^lGQ%zYzGw#?4k86FI%v#^xPT}jSO7o_XsC}t z?4~9Yw<0mbNiLGC?<7%TkH8wZG5Y(1X{6a$w^nq zyHo>ayNlhU+dRv8FEd)K+l{)NH8Xf6P+A^MP_b)gS9oH}gznok`9jqM?tW_r1N725 z)74`*ka_IPn!@LkoYlJGdad9?gESNvbVfc>(~H=)t<(nGK@Y+euq1nx@_qp~E5!Pa zh9V~yXCTc{=?ql$5qgW@mT@MOggE!^kupoat@wH#7 zy&lfEil=-xni2x>KP^t(uJ^WyVRvTKMnmZyE{us9cG!&Ehnebupve@iMhrA9*!SHTy6B38i^ z&KXJuXdRA+_#z*3hxEWWL7w6EJx;u!Poq?vPX;G1U5FDOy6y#`7xw>o78AwV>gIT83`}c3oz4T)uWRN+mYgC@!ORYBn2~DC=>|%guUVaj2;nm18bj|+g-smA z&DZqOIFU>^vT;0{o~RQ(>b=&TUwUc(^t#V?P&t~u6+;p7kH>5%C{fspL^Jtg$UedY z#9jS3TC)xYqdpNNH_UpCMkCY^zUP$R>Fs!MHfY1O_H6o{VC8InL(k3(xdk)fmpj?e zd5cML$L|mwf}q#Ro@KatpwCCcytd?acuz|ovmBbm!6n3q@lnCufXP4=CR#=e9iZG`v&695`9ur!xMI$or$Qh z7s1f(iQUd_nD^T@u4HpaY>CY|Zx?MNqgCeHE9dGLmKDCQHnAA{=FUOgK(1S);|9zZ zBg>BSm6S%YfEs!Lq zJLC#?1-@O;oC@Oa3PkEH2h+yHQ;+ z;{h7gNXeEG!P_@`eSbCeN5F@G1va#*_68etH}HrsxY6$KW-X?N9hN7yupe_TTD8<64+5{Ydx;70|oaNV@FaG$kI|ztR`yw+NxnDopHohV}lpxjo zDNTrRp(sDi5Vn5uC|Sz<8ZraWD0=mmpz_x;)m#Hg_LcZAtZa7lf_8Z~N}IC-yf#dW z(^C}ir<iFr>vQi_tZte-|bI8l&CT6|m(w3!tE^Q@>^)-1$O{&Ma9h7&Q49AMs z*RK_&dT;{XZVcLil$Yfu8!mlp--9&QbYsE%rE}rr09eCaM(c<>Es;?LQ_Z zM{+Zyi-#+m&#=h|RW(sa z%Ctz;Yyt5X9^<2|Et+Ht>x?G+!SN(UXjefB^umMZnJ9{7S@lt?ne1_axik}ExFYp_ zbKn5AA){HN`;#NNtJ>4N+pCF_h3&z`s6{;M6JzzWxYe7@62=zowPGck=aj~H!E&xNY$vPWA^qUy)XFHSoE9KGWny5>!(?>UKuup5X3vzLR)0EthEN`fYvwa=LMrhCF7R%(oc^)W5eh2hhJw!=}cv$<2$us zE#7U?{xiSHIjj2s@Yt3-vg>vL<)u%!LGArzZ(-K#CJ=k@T6^mu(u3fvgA-W9RjOp0 zH&3yW!bwbtF!t%6$5BWbIv$;18^CEi9jnbD#%H242zgx|4SaWGWxCj<#9*zm8q5(X zKs+C+h@@vzh;!@xK{2u6@#x`-_4eF6();J2`YN5Y(&!jN2FZz=%VqHSIoM57Rb<^~ zy$nrzec|*>0KNOyj<4jGk>%n>#FF8lVo_a+dQF6IPi}tH{9`n~@ZDUX&dl?KhYGa| zbQoXw3tf!eALIT|V?6%@eAtm$8CaK33DIveB7xqvIDXaecfZ4b+^f8Y50~Suw|UMs zxn>EjNt&6#P*Xva>w<0@9!o-@#CudmJUsM0M?$ioe#o4|0_7T)-$^l+^Yua=kI*x% zbBqjJIHC}-r*-hzKKTnoP8aW-HV)F7c7D$%4qjQ$3+G0o7^P#p&Gr+VDsl17fzjC8 zf}Pse-(cBXBl`$Hc){P`TS-Z!OVfLFVvATZ3ua=2SkWh`-InBz*2aC=WbPcBhFaIQ zuwFYYFWlhsDwS!;c%jcJz= zggo=X8}H_K6j~YZXpt0fxGY)mFXt#->t&quFmltSgwd$On8#d-Tzr5tgs-Hk9W2Qz z3x6mfucRRx~r|s$;3QCnoPIu0X(;*7?Lj3$M&&V!HUAr_ya3n?{xD zeiIxx3B-|Imaz^(C*dXJm=DqO(J;ipFGOivkd}VA6?*7CST`r&K5b5k$xTp;QAgJ3 z8akDh37Z)Q%b#7Cx7UM2Kj4KZ;^uc}U??Jx0-$7IC=d{Rj2F%<>lPhIz(?x@^adq? zGGUy~AxH0h|A^s9s2;Z(i_z9ke41iCz zadwG1M^^0lul#jZ^879Rn;X0wiKGBKW7EX-o5fpu_QUx@qac~cHttwbVldA6v<&t! zl;tz3jO5&*PvJ;k1Ho#p?-nq*7$3OTciXZTkUy(H@EG(reosOr%-Z6&!|&3fpHRRg z)ZEA`!GHgC5|X)aXNm*>s8Rs{%>Vk=iHo6;jp@JRNoq~l*)(K!!mM87Ew~3Ig^k z>@ZE*AFQEN)M?!>?*;i+Sf79#CmkZ;j`~uqM;r^fBuCcy`Sp24ZO_P!*JD>}U7qyq zN{5ymHLc{kJLOB5`*uj}P8?~3gos;ljmRXVG$4g>nxLie%7`nt)DcS?>KM_-tvk;q{9|7Wt z!%3KktbB}|%41Qdh%C~R-D%P`=Nuh>Zj9IL36T#KE4^=QkHw@yQLmAgO+f+L(v24l zK&lmioxNWdmVy}se$WGQJ;+9Bzgp4N?;LCNEWO(;_sRL1i?!8qP`T9ifUT=cY}jJO zIfS%@4dy@Gax-SajRwqS2my^v2QId)ns3-5#6Hyqg+_&j%6%o_vQU{!Y!riKxU1ZB zv*KJQTy>@FOlIL0d*J?L{CNABlN&)cA~NS{@F`f#-FtGU8Verlq;8 z=sO!%`_;E-v9tK0I&UxS^C?ydLjjvLU&=$#^Y@7tAkxRtx0n3b!YPoE-`2s)`k`@ILg=I!GL ze;zG%om%70?~Z z_#tNR5q35ozaEQ`@9Qw9{-VAHuL`TFc}0u4hLg8zBfnY$OItZnUz_>4tM4}(c!+LV z8|(X6kk9%*H}Pf&x;Q*}6ONv_Aaa8m)`On}C6mSaK`X`nn@%~za=I(g_mdXxJa?=I z4HeV{HlX-8@HA2@nVSGzfe(#ma6!K~-JyKxyXGixW~|||8@Mz~H37-@bf(9ao*SCVI(F`SJ?B6QZRv!S0wW)!ry#p`zUgX!S-`8_s6)wLEC6AzGU=ZppvJ{fWX;e6U{2xb zfqJ>=_y-4_Ul^mJ9Bkn+br?&niW4^ry5khLR*y4@Cp|#NCXc-S9OK?3x3G3-zS1jj zubo5BpK#lMdxP7fuCXgoT9T~J@Em1M?9JpK-V%ymr9N2FnedVXa_gbf);Aei3}MUi zS|`LGI=yY6qpe_>aIxu~EqDOo^#B8SRE`Rw7C_QJ@4YHD!z(@?H8Y z2?CHB;7C|c@a$(vpl~=tWYcv|z0@CWrBQ53RQh8Lc`8rkjgeG}Wa@(k_=?WZ#1G=N9wF?fEERJoKrBZqlgdo^|AZ&>=+DCiB zmaf`CK?nwC-CUk#H)F*q1H9%W;^Lw}evCa|2JnGTMUcc}^N5j_f`6hUEw|S%5kG>C zp6y;`*I$OXKO`Q`s63V+le#ubfdk?=fvP+u7o} zO^XI=;b_w*8?EiA49}8GN`d^G$DYThwG6AF04-{*75C~`G&`=tv6_cPGJqx*;jnaD zZ5^G>77jI=#&c#tB$x<)?d7YS`KTy5Y17#c_~`ROv!Meft^+Fq3HoFyQ)edXS3+Rm zK%tv>Qii%Xfxdu&13r0piU@&%!P#xhct%MBh$MJC4W2re4G{GrGLU>ZNQo7m3fl)q z|Ne&ueey1E8!Wule~AI z0=VTbed;?-rFu}$z>YNwvJbzrsViv^ID>oFoz{3-R0Tb{dmJ0aK@rqKW}>XzH`$mGuC~N7s=$NF%jG zxhlp3nAV8sNNo0bL_v9B15uhwU4!~4tDO_xIJT=q1Y!Ba!}5LL*A2D<@u^=4bXSBb z*~Aq<*>f{sp+icw{9s8p1N)=H~911z$#r~cMZAJ?OE!6>XqPYOr!$@d(*>_dc zPvCt3bVtN~IEwX0Yu{Y2XLl^o-x1>U=cet>rldM7R%IF~_fH(d_sAIe4!L%b8RQTq$Zq6>dE0(7BT63ecGZxfMo4F1>!Du$+s3J2aw>7hSknv5`0V2t4 z3TCFe3d2;#vnT?g+;!&So#yWREKUuEO>}N94|&Pv$aSD|?C&hnjeOwb%4uEI4Dc`f zgF0UC_IA4ir?2%J{ihHdoRkxDBzkIViX~i}G#PBa^~w**O2%8RT7Ex@bSP;|(GYSL zxmQq`kOn2b2mxyAr-Hl-!?qRue#aj0MC1#8BzF2Gtp?$G!*u~ulKJKxG+>eMpQ2rH z5!jI;-5_}5$IC^ezI;_U^En-BnB(#AACme)gFV7~IqcouD3eftrioD^Fn#fki>!F& zXR`}OqQiYGpYfh-gQ+v{jTVZxe^DP8M)vg1>1y(E;_*FNc^=xXkOn5Uod}CJ`KzM0{BgvScb<(4-60eg0pLP5&?8gAi%{k$nKvm#7 zGliGJ*P z<1bjccUHkJLt48mFClHvhUkC~?RNc)G}64NVs$BYxgSLuH`;ZcdnL zxZhA(S`Um_Sv06!YPsfgO;RO7D?^Mdl$bSqD$_E-fVW0r(Ilt+w$sUo5dUh73-2c< zRUOhK`xHZ3q4WY5Qx?qHF<%CUC+cTBX%(piUzMIatKF6Ru#_5br6x14`gKN^vgnEB zDyrp+Ds=3N)K!&_nSabvcG~puO5}w2!aO$>sdI1} z*kij(J=IuJrQ<7=*ApBn!?@E27 z{M89nMkU@_e$-Qo#e&{M{E1LrPalhzDoXIrAvl@N@K#!0-h$NwZwaH%_5Srt2BEF~ z_2YYMpRs81GQyy>bog|HbOy;r-x77Bb;}RF+fbN8qqQssFqlR)|S3|GC?b4}<<#O-9KB*D;Ni!&}2+W;rgywOV-NBLr?349dtldqf}lThq~?2 zg`N4+OUTQHviAdYiTKrUb_ZdEhaNBqM8B}I$?s!9Iq&7sKTEaI6UQxMVY(37L#0H^ zIo*4q30Nuo6bfb`wYe-dkCDWGaMhjSXsapKfY@Ua756nGSnU{TI)71n)ld{@8ilH8 zO0pL%2WP%>FF49}LlNVrZ)l^0jKEiTawmqZ5y=!9%_XJyY^#>*f^6T(@@0tjGi9^` z4UdMEPd}~B_6O0AYNm8EMGfv1fHHC2sB<$b-ovxM5fs!BsY|l-BB66PL~?j7cAh++ z548)x(dGB^n!1CoI2Ff*Y$B;c#7}_|sy)(02OS8QBkggp8vZn#eQdvvDkJwQTeoSp zI`(+p-O6#Z27E1wiLmXkaHW!4ScCeN|z>BcXRlh&fxs6BL*etnRi4o0@w}`aFd0d0o#_ zj@sz8J+rO?B=U#2zk4|4D!Xlc8JPJoD6v|Ocm~TlHVvoNG01m(+~#iKmH_*!IG7vZ z_<@Mlq{JApO?m;b=y(6ld*-RxUM zW*|caD@lm$S2)cjoEU|RR&#TGB*9+oN!4VN_{qP%bKv-E*i1Xcn%aLC$n;Eq`pCU9v4=(_hObq_jT82t3Qi zWOcQDQSo#A!^)9)&Y1b)Br(NbPUbTYIa;)T!Psy3XqG_!%8N5pvS$UPcIv4~Sh{R- zww?2-!=Qn4-|Cz!6qF(z&8_B8!c6x$tH0x}CNrO3AJfh2K_ACRzMggDN zjsW_ydULreNTQy{&w}YESIY-2_53*U>4YsNpb#AlWrwjoAfdVIlAlJlp|5>UHlzvYdJnDcpAc`3L$|kFXRS*$eBE_5|@@y)0h}S zktbO*1dRDfb@>(9?slzeb=$fXcPrc$?;D$rK}itDLq5Bz_vi}U)mxzEph1bmh0`kM z+CY=QdtB8z{^$qBAAJH*`PkYs|FxCD0=E{6fVGiC)!;k(T+AidXu*vbn(py)KrLaK zo}(jTKL?HJ3J4}s%Qq92_z5M)Q2D5(=flC&lGs8S{-iC>zsclv+V!2R%>3p_rrZ(Z z{DHyx z5VX}G<*G3s4>p?box18TExcjfC*uEmN2f%}t4}Kq0{(k}z zsnObT+}1*WZQ)0?WRYHKHn-Nspp99N9p_2&0utY;{H?7-h9-lHVckiaF!nk-jGJ&{ zV3?6-96Vo&(oGcPby5Tf?F+Lc`0P9^`Wfu)=Uyv#N-71xo9T)7yX8L1f8r%v6ErY% zq;~rH*pj#_;6yv2{q1pa?*r8LrixWA5>*!~m4Vc)>E~YJ&JL)1_twmCE#69WZR#Si zEVdojzNuBo<1nDvSye>eETLsY?;Kp6ZE?u4QA~?}z&UZ3@}9XED=L7p@F}J`1|eV% zU3%R}syfxF=&(poYT|$s0f?7Qrs|dVN6CFZP{ElDplS}6ni3oxQrh)%!RlGG$h8wZ z8h10^-6vn$CuH;1S2j5k6fiyA_rpP!2J*gh=)QzTg+jKZ)e=%H$CFY*VYRVLY6H+~ zQgfVH0S}vap?Gf7`QE@#e@s$%2BWgZ%{Y4Rf^}o9bpWC2kSn3 zC=ViU7Uo~aTJOXWEPWWY?oH-i|N1Ow<%SL*RukTiFqIvN|vWGMZvQdwLnmthwQo5k6o z(4Am%QRsx&mQC&dqOnyDsh#{_Z|{>|v-%(p`q^~LbLhWx_l|QI>=v$DqR*^b!2Lrp zo%09d3u1chg7;4jlj8yg(Xm1&Ao1om+u8B+Cu$&qr3IRIkP#n;xK=xPs}6ej^kNTz z&}@)<#$h=TmFDp$gMMP)T;iD*a~1F%>X_pCSZ;*x9v@!jbO<;QnSh)G9TG(#6;zlR zTYTom74VM%BWP;X3B$spQnGO~^blI`xJD^WY$ys-D$jccXJT%5yHY?|>w-S(OK*K( z8z!86Dw?Oo!>sa6b|ddrLLOEKi!SMc;ZfU8#dHTz8_U{{xZQ|Dmn+!9a;t50iLJ+T zZ}9_!#J)~mp?Xh-aRxlZA~r|;W2n8Zl@?ggUgXfqg4QPda_&=JqPkomOvL4o?AOO~$l zLsv$^79Cz1c%L!gVal%kLrHE_=W36-RGcy7W%obb;(HR*<1fy{oolpebcZu@ z#PXW1+7a0Myxx@^{<%0KB2g?CSMP!UWM!?W01k(r!oZY8mPlcWJNiQ7@XzvT%^B~F z0x2;dm}OQ!83SQk7vWhPZRRWvghI7kcCajtS?Co-^>3rfQN&Og+&VTZ)Iv93>EN#S z4~p&|v&}a&t~~33yvK@JQmqD+8ld3|?nAgUQlQIXP7ryV^mO55O2R96EZuS_SOZ@z zOQ1POne_Al!1Z5P_S$I9cqUr01 zL$gwsA~~eC{_6C|K(N7J;ATAF^~(fd)G7q(4tWkI6vK5^@~1OoiO?g#F0%6YGqj-K zT%qKjXJ|0jI!*1`HCF5K#v62AXN*@v=f+(;#~&ZF756D0$Zi#rvzn>Vu_-9vqO+Zh3?Pig<>Dmp09}+0!^WU8f%K za4jzN_6WkejEpq zZ~S2Mr3!9~B)FRt%2Bng%N(C;>93=W-iD?us&`Y-{|vG|Z^AoYRWw}#eVWjWb?O-! z92*5{mRU4hwUMn^o1J}RRp~DCbMs5!tTw6B`|F|f^nDCN;vABWR?3U)$m!^odaX2Z zg`b%@Zi*|7bWOEjck zC`(XgJh4(-D3)yC5(1GksFVWEqSH>m>>IHCBqjPO(%Uf)=vg;d>fY%a*iE#}@*vkZ z8NFMcepWfJKCnCD^#8D!_U^c?UtNpkGZQr<3RqMY@vr0v$U7i3Jw#_V-NgO*swZh4 zz=}?MX)pZ2g?8$c;PHE`i*dzDc=1LNq*3VKJ8)2qDUP9Ddf97D)?3#d_CH0}ibSDq zEC5UCy?8g)zv;V<(szmz6`ScjkIMP=tW&=A!+b;>My+3Ci$YW4Br}gnGrfU<+T-BO z{8_@wYLa$lT?V%pT%|-&_CFOAfDK~u;_M{-R^vPR*HCsGOpk;w*>CwshIMz=FwE;pw)0feq5+zJGKC@?*wKG+arA%bwjt_1^Y z$MTXde$x@A*zh9k?2_y1;_nc!K{U40_V+(uo08bDePdcNgyxNLR!#&8?wqls7d6IM z2!0mQG*be`0@q9mIU7f6k{CNJIF{B4&DbvwX4LN5t$EAs8Q8A^n@axlRw=E~@ZFzF zajXBcUCto;b1(lNzqJqg{};dYKdTrwfRp+E3BNT>UEgtw0}Y*;`+L0YX@kwS1q16c zVzeKp1Xs!JDcKFSzFqXtB-0RVwFI>#`6S6r#d2e6n=qO0788E{DiHV`78K(8QD)W* z)Sd;K;@C9#EPu)eu`es1LHpkKbxm-W@cyMN4S@IN4{!J!sD;tS1znxU2F-lOR#+rY zZxW`!Dp~DF@8EjYGm-~vuK+dZ3c5HQ^C>Qj#t2=o|Ne?WL@wsyNC8ZHPhz4vLC5SY z4Tw@C;mDiK5q67KB<0EZn zn&Lns#bvRX+n#4ToIlsAW8p43+zcO`=|(c|ro4EB*YBnA)W+ku6{Iw^iZlaU*bSx_ z1Ldqkd23~Ckl)-dqeGvfW$zMPt`q0Q)lc2!VBw-S*zwb0g|?%8AApYmGZ!P9k15fO znfXdR_t7yH{HLJ%?4cZ-QG8BSxFqdNS{3&PKH%xn87-P7A-M1kT%bPYGU_~64fMyo zWovCr`D|izsq(SKYumi0?OLk5Qs9OBRj%Z#@@G?%%d3Xy(i!n_?E*^sM2?kI$lmQ^ z5rQU#+nft_xhQh@)j2QgGHq)vo32Xj z5gupE0T=8L5lf}~+Hg#!f}-_>XFMOnW;N!ykA zY|ID70ykhcDDQ2}Kil{AZC=yR62%Kz2CbMbX=5#B%Nv=wyXl)ttka3E>`?bDXyu#e zN_KpPot@^Ky`5MZ%Z8b_?0N;yPTeCXH5c6y60)f{=l}lv&y_>DnFN=ma9QCt7@Xg4 z^Qpy^BF!Zyvl_sYYNi}9-lALK;b9ZEgDak~$OD4=MpNwdrGYjwy8C@=T-^FI6vx*D zo$w-B#0J~Ni{0pRWVVcR6${)!l%cVS-2k74dFHKeHK;4a3fP^h_wxqDjrjh7pXTBo z)vw=1B~D{rX^jJ%#6uNmp`mea8d!!U4Rm^5^UHXT|pdfa5OGSs;qkRk?J3^eAt2OPX{6|?+Te{ZPwC0;$8QX(Pw;Q z-cwJ#ZU~eB*6{3RW8r90!+|*JJ6t=A2K-EW1Q>lZKRSkPre08m2nW=&*JM>;)ks(N zrTq1zd8c`?b#ABip?}(r(d-yXgPW|%e$?0}Ug==vjdl3!l9GG6j^Kiv@zf2A==7T` z-vNd*jDsdS0dPG0h}4)LkpFj6rzne~bos9w0>c9V5&YLp-T&Cb~ef2D1 zg38tm;JKrW#wP#xz8CxFoR9S>@M?CDA~;rT$*{FW`)FW=k4X7b!6(K7BiQ1SuL3 z{K;jOanN?z^^~*Mc#YVdC)U}S={7J*Kw6chY$bGRbw}VabEvJfGwD^O3|wmC4%gx~ zP%F(P-d}*p&{3V3bcGvv0y&%18?%!W5+?p8OjWV_=K8%c>DHz^q3ihvzMr@xRi{jm z_wtOxGCD4)cSA{Rwr8!)El707S_TaX{e$5@eKk zl!`0cTOli=3=|Ax44j8zo8Xe8iz7i~%!w=^;Om%ZQmYHC#4Jka*h5dUsfV;T2E6kaQPc`aap#e#|kisFRCd!_JhI z+Be(;RfjX14@}oeM2ycMDY*n`Fzb|#9dwcbYh#M8dP7}&y0)~H-^olhm*y)48Qr*+ z87Kp6$hSujY<^0#sdM;#Ux)PzJmt?V>!^^vHq2h~`#7r4$gJL8sov9C+u~dUj&V5@ zNeT@QF*LPIEK2#OHa(VLW^^lPEZ}JRGF8Udv(RVyFh7BxQk6q)x{HwI7LJtB#I+~N zXU{nD(&k1Z0897( z(t6M)cOhM49ObQfDe6`ZX1n0MBRd+Ez9x>K3VtkbK3**)1`*2c~y z3=kqWMn(>}ieKR?ck*fcV8h&)L%`N{VJzopaS?)Do`U%YI;*21XXXI1SsbXkcdC_C z|Nf%9E8i-?PLI9i%}Y6@k`s#KywbB(CoY8~F^@X0`=Zr3FYYi(@%~(rTffpO_dzGW zeU0KvWb^m}Yo<>D|NEJbuO2!?Z|ip{QhR2Ev3%@_yG7E==hE#iF*k?Di z%#mVq`H*0p;>w=)4_D~7^R~MET|+pweupwA$Fn;vhHazYL+?ss+novUG5Dnxrt!J! z+!b3CG%Lpyf!{b=kYOp8O9Th2CfAmrmq|_7uK1p~ou-c~7YG5(h@wH_J%1Z-ukVf2 z>&CsG$!#Bz!!?l@p`>Vqg}ay$oK4SU1$msrC%JZH?wmYp(V!`>iU;yu{%*YvwL59{ zRhg`H2dghD-&(xN@X4P?kc@eAS;CQvLbuo9(*7+O%*(5Ngb=9GYOoav&V3r&$^gN9kV$v!5FRV5E%O(m^v zu^3CYK1!!CZrH1-^rKLs)26(KYX_^O0{2k>Mbb=4IdYL7StoR3D~zcUI(-V{fBE`z zllCDpGK8mWc(IY6VIIHvK6>!s7AWrDhQd+aO)wr!=oTm;q0))=p^+~licwIB;1qY4 zrs{yvxe6<(&x<$8Vf1UZZi>;$XuMnMSo9-MTXiVVsF{Huj|7s-uR_OQ1uUA-IOQ7D zdFGzxWXi=*O&M`zftJA%k(lPKc-QS7hfq>>9!tqUPHjKl?d)7}{a5Hx-D z!V0H-RT;`9)Wyujhvo)H*s7?kpLl>EbwV=XRT3Y3?SYFLz=*z}j-|Q9rZ<7Cla<-v zvBIZbyb#h+F4!M`*@UR|FY^oeb7A?1=_wbflne(aftW~A zztbW}7%@2@p&h0qo^a3w&@jPX*yNmggNi*tk?z&u>NxnZd_Dik6y?jrEvfhwTP30g zqNk>gC^5{W$NjqXZVa>t59hBqDaCzF+z1gQgE#zG&1Aq6kGk}``HHLxOX3SjLX;YK zjrd-fR1yrNtgghhB<}}?z%T{4T}J{xfzO9NcmO@()DYwN8F_4SvZlQs@o*XKoE)gE zI~XgOV)_QsFJUg}V)Bb#hSNY!^?(KVL@kCYPDYIv3#9mW zOg^14aeDy}T-0JJ-a}Ztz-QON?LaE2SJ`R6Je+!vku;+E=S$YN)Bz?rw+%#+5m<)F zU+|1b_6$SVrpz<$;~3v>$dYVZ`cv#F!)HM)>Vd^VIf5vk?&bICcO*<*v!4#KFx_RTE zlFd`YneFr;44;R3iOK@V{J%y6wKU_Sap9fM*z&p!4h;6tl6v4Q>bF49YhjZDd=D#s zMLQ0IxG#7ebHgiIz$j*L-rZ|nEN;uN;g_z-0G+hKh}pJAUIA_`kGoBJ2EcgoV@+(+NSBqrs~qWg2E(d<#ty^>@yYDU@9VeI^(S&>Dw6HqS z-Dj2J7McTX|*4vm~VCx})CVYZMji+BOQQn}1x4VHuap`oBHs-2jT zU^$}{ENb7rS}1Sk29LEpy`appX>MDyx!#2r*LWGFu=F(@;`TS4oAWO)JJ$nuNc-}4 z6!b~*nfB^ULT0ELkc%dW*vh~g;}C=2Qvd#vSBJMCUw)yNy+Esy)cN>z`d2@^iYKqO zvz#|mwBs49=4rIk24o27!VP5M(bN5DPbe*_mj!2ny;FyKu{ z%skltX{=Nvf0-M^aE_U~jWi4fc##Q$XIFOU(a!0Js?fBPzysa{7mMvcDodoe|GC`u zUX!$5G*v>gfUS8pA}v6lsQM{z}%6 zf0{t^l*vLLi`KZ4O^oa`(m34ACtPcpd{$^v^#RV_W5V>b%!O<-pV0HPKM{5Xndl+)5G*@wS}S zonbA-Y}yn`#t)myg-tc1v3B@n`-2@XU|FpT(iYx^ciuGqQ1F{B$S|mkQC0z*6YTPa zG}{nffq9dy%^<0k2)~pruoWMq>s0Bb8p+aCm+pBTW_b?>O;itS$(ARc^Un*7-Mqx9 z;A@@2~?W|*moJ^9fV7vURO}`9Cz>KG5 z#eZ|>&z9D9`*>&8;I#7ht28TiOVBW9RO7-!W>+cPetk2rVsLF~RF}bk?y3W6jnt$a zepi#1FYKh>W12tQH8iHwrE>hfeS94#$wWt)V_i0C$-DP5p1Gt4apbzW{aOHTh%zmZ!X< ztG1fW^zr%HjEWGY6-OVo(z4Oiws9nCU%6S^ye7_9rVe(U5kh^zp`*+TE9QO$Blz?G zh#~p<7a^C9X$5%)3j~Bi3j~DwU-wD>%>?q_+oYNQIwx*K$H#SR6#0ku6HU{L1&kN& zSTfsUeGzp<#YRDIBF)T)1qa@ZEdF?f2oOC2C2tRu4Hwc=?d%E3uycs~eH5eG0VLW+=_}XJ2I{H4A?omF;C7T|r z<6uRHpg0N$_gGV&q(Bo*LsiSLe{0b|(uy40KvSKWo2$K}BdXktZE|WwxDq2O#IuQW zjPh)354g$YJ%Y|$Br(}<;}K;z_YR6aFatafG=jQ^7$ALKX~v{^GJYjLL7QJQDt0_| z8J-STPNt_uUf@V8sT|IlxMl!U`3oBwVQ%7@k0^kbuOKfWXtZo&WNQf#^LXiCW8+|A zG&|TC+;^(QF$g=AEUtoIR)^bxM1o zc}vV&F6?`sSJLSg83y|Jhnq^&L^5{c$*#jb@4jRuC;GhC- zL^_cXA;ZipJU)i0?p&nDSTu5d`}5G;+~6)>K7kb86$}<~O28-&)h*R^-@858N`CgK zQT;e7hGs{Dc&=Jk@CdY_TK&0#gSRk+85iUZ!r0>%dGAy8-vb~1dl<-cYlWPmJ7=gH zQkqWcKl=qvNbqHQ0sWrc>Kif%pV|Z?tU9-U#uK z<;XLEUfxS@(8i5ThN*O{lIh+1>f*3G(>+GNtaM9&)CQnKC^NZ~GCl}P0KEeN%gx;X z_H9>DXdt7W65(bg3CIji>-Xz!(3Ii&OiLBjdY4+FPZ2;}WqpIa#rxV$nbA-)Lm<{+ z>H4^xN-sr zoZSjUq7{uc*TUzuDa#u#7v)zdBU-TSRVJ)1l(A3gbTO+FsUz9R{cSnFq&?TLDZ}8G zf2Bg-si@KXJzk_-A_^Uluu$D}Va(3CX{Vp+{Q8`jbHb1gv%2Z1KkuH^wMFjYO+E zIJYoTnqTphd!M|RUm9KAINg@gh5VYg>7eUQ?Kc2400R(?g^jH5=e6w9`8zHDDNf<9 zLLXBE5aHJuTlUu}ZpR^Zhp{|6(1MJ2cQ-DwkWW&|Tr7GhZP|ykf}K{&Y)UpOSY*TG z&;Hr3hhO>CH!MF8LR#t$z~xFa0oJvk%JZtQ;>*a&|2UQo_^(CiScteLJI>rBP2?TD z9M=YhZBm)tg+P3u*uM|WWcD{mb|{cut&f75ik+`Evk7O7|FXylJB0B-(}6q3Yg9sX z_QvlETH4ZThLxdAE&f3n?!8RJ^&2{bVg$?ngkar!ZdurTW|wxMII)eX5pyNCUFS&= zIMp7C%R_y3(AG5R5yRd%{YC8K3tNtQSS9=Ve6>b!Gwv~PVDYM-C&|f+YRkzB7<5{m zFhvtLMvZ%zpT~7)tgkR7PWevVX6fX~Om* z{=$8Zn5Gq$UQMN5&sI1b7cHnt6;h-^mMQ6dbQB#BhC5vp$>l3ZDwP7AUZF9N&tx4> zc7Ti)iNz*_1MSNb65(0y6T8~jG1f@`?B6QL)NUkYF2Sh9Hy(z|7c1s*@3h_L-xwIX z;if%wtBs=9a-SncHlxFTP1jN=+oXiRGesBS+YgBw;i*>}ZV8_qlHK=G#=&BhT*V=0 z>N6LM!YTsysCONNwD(xxkye|S9}%ZuYt6sYF_LCp5*4wrt!0*FwHU0qt77`^ zH6h#lRc4LD-@?%5Pp;|H+#P^9#!AcweH*eoA+?aqpIty*p_8oE_$5 zvfSdz9fvFSmAL|{io1Phb;bJDnp!n`(2GJY9KAqp$ingz4Nd>2_*}k0@be6@Q%M9&B3o`?Dy#=vHccxPDI|J81@;Bb+1-N zG_R7AI30)qN750s*f*c)Ljzvz3&W*;lm34Mi)9ew*43bVq?!c-9AsZR+Y zgBjo!86J>k-N9cZn$P%jdes}@9vX)&(9&^)$&VLrcH|7fBy7Sk`9-wOwaiRC6 z+#1mFu%04A97lpy!+bUPrMzIq+YHnx!!9JGT`CbVCnn-e9GK2gH2_V>Ylj4fZX}1P zF*e>CLdGAuC9$KYu9*mvGcBux%^1{UNNnt&3%J}!9Kiiw<401vVc+_L&x`XUOY;h+ z7%TAz6bLiMNaEaH zx?b26O!i^t>eCl)B6Sf93{_X5VTTQnKKK|$YpRQ`s|ET8Skk%aRS#*4EskK{fkv`W zehu*2ToG)-A50sah2$nSjvz> zxgSZ3{OBR0`~Gl3Cpl z-ioK(?#cck&hfAC#>x8Hncy$P2;j$`{~F)k`z-fQSh8!|;;Ti{iUvzxcm4QBIIZ_ z5BTbU+;TnG@morG!tsc5faXHdaQXb7OyyO5-ChO%@=dY+iFdI7>mK4inWq2igX$G^ z`F{s3NIg%QaK|C!fu;Dxl;{kFR4%e?i6FApNNC_qBoWIb#|%APvC{zs8;Fz1HyrW~ z2f}*JyR&0koV=Dy8TNHvEv!y{?q4we8I6_5DoK6#giL4XyIAY8jEs*j0b_;*vC7NF7b3rRjy%Tns}XSW ztNlCiIGvEek9b-d*DX+6>Wo}_>Y?l#VH%?}kw$E5*Bx4i_HvW?MmC?)#BMY_+>y3C zZepA9*h_v0Bz3udtJZKg{@}GH_s;l+{kzl9Ra308lRQ`s##c(qM6x^dKP0>Zj`wCIi z5Sq(zYHx&XWa@|**&W-BNBsaAYA6e=IB};f*U|2?p+r7g_EFrKJ+i1Q$c1W+4Uo0b z)#j6bp!ZyE{2R9&rroYQD7b$!#MTJYfbLds&g7l&^8NF`>)G6^J875_;I+$lppn-> zblLfZ5sU2Uumszm*j0y9!5Gr45k$deg(d=7lHK}ngl!qt8jL8h7=W^6PPuDGWUz{k#sPMk5%CkLO5GF%t| zHnvs(U_U?afCbcFzxz=@@J+XHnGsVWo!zOeIv9-;73sh<>u+$h@CFDI<8N1$Dy;lBG0(?H-bHW@WyiYeb7A zo|lHO`@HOkt3avLT9olS9gRchcg>13=Uck!Fl40J{W>qwS8(*7ywchgus@oRH`gyx z9!$K?|Mp!d{S-konU5h@=sHjR`Dda8WAOJmK=CP2AZ9J;#biOwe_T@jRnWU&z$h0anu90%$?f19>4|oGYK2dz>F3()fm;FD`#v8`(-SIyHmC?N`)$Lr&R$#@8MIx&%!AuL@ocUn_5_53_=w`QM zXr&Sf73J8XJwHil0~5B|n^{Jb7(V{jt_aIyOD6Rdi6;~LgTt8-#QQFlFd-=OOCQT! z?CpEB-ve+qSw#?C5>Pbhi+%nUAOS@jdeZIpd;R0!aDOWX-XvAl%W-5{fh*{iA*6$G z$GOP|%Ft+G^+YNsWt=EfL&LY+b4oFZ;k?etmnT`GEzk@XRKHmeGVO!bRy7D`=Wbqw#&h>Vm)=RT0~G7+-+Iuj;H*BoPO~TV>;u(@lN-^IVO9ed`(fhPEYnQw z+1m6lP?>1j;FqLmusJoJ)a^ZEHq90Ci3$83Nl+WlgCQ?vfZ65ZC0%zB~~*#;h0hcu&+ z1I(&c%3`q7BEdXI>Zox43uo(ahP8%bBt;*Pb;z=p?4_D~?bfS8nbgrTfGz={wODEy zQraA>4v@G3d?6V+AMqLqnwE4`B(fy2uRe9FF;$Y3HfXV0i{*G!mW)t-41oTHg9rhl zqb-E}-C`tC9{#PfZ2#6ujB|+wfxeVp)?ySY-`EUQ6MJ(qL8bC%vw@6RL_3A-o;7w< z>_`grRl`I*8Aja_TAh^q(j#>+e+m~KQ4BHfunscVHx9f7YC^dnwg>Jhp3T{wG$s@R zi<^Iwhx1l$!OE8YAn=W<$$!;vqt_n?wMVd+bLX+!Y)fQg$?Do{P9#BLn8fi#y2r$L zlXAnGDail$$NT+X40pwvNP`0-x1RW|NgYF;Pn`>5onyMNuRAv%Itf_=NP9S^LrNTcEOH)E~4Pq(|bmdBCo;3b$OmgK?whaAd9kT;}+J+avip)Kl>F37*<|J?r{5;a3ZYX@sLL&N{g z%nt+eKcpM~`LdarF{RnRsPt7-ARzqzdO7i*;_@FC)Bn1()M))T?c1%Pvy$L2gx>EN zNlffh@(YV(?!9J>K`JA{*e!9rx=g{G$MMozfZWQN1QCsFWm@pv> z#r(_+vQ&Jlm{|x$=`oNVkTlo``$$U-skc4F=l`4>$bN8HNXQ zBahxNXx1X^f0!{h{n?VOVXUj)b(A+*jHYBm%oIpl&5{46k%LQ9k$v)_V|Rjbqx(>} z6Om_QlFCfJ$RYn&2N4xvbkKV7?lLor$?j; zw+s{@JOj#4`HS@UR%TT}h*@3$1pkr)m)!SvTWJ1@Rb^7-4zkbrCBW#G+!T{`J>wpSYB1thsscX0k zHS=-8>8X{K&V_IT><6U^U5_Y+Y9`?Nqbo6MuCsE$X#K*?4!&hL_lSHBy0i5>M=ss+ zLhU2O_@_w^9!x}jDWF?SVm!lnzet{8N18AQePJ})CrSucE0-}|%2wcORiDH=7w{D9 zfal<$g(OvFD{Fg#!X9I#jtJj$G*-xEI_(Q$Ms-Ps3ynoO9RJ&29>Y-_L8z;pMY*&aC z8-#?#hZ5Zu#N*+&V$HyszA#B-EF`!8AoHwd$&=iN7mkS4nfSf?&D(meg+ePWE) zUmcPy`gYf^V>z3|6UW9>TEVXByt*%?ywad8jXejru?=hO^yg1Wl`D3EOj@f0;fMeX zdt1M>>1u&}q1m{Nc;?DfZGPqGu%*k|L-sy?kok}~;e#4xyQ&Vsi9;p@M49;IJTbV$ zZ3(m-d3aL|-zMuCxzI4Xp)(i6J8NHByj3f9!HkxmcC_zomSJ3lX=ktJVF7ymL@8yo zT(Sl+2iKQ0;E0bRx?ydbgN023M(VhtYnZN2rDA1)!V!IN0aVif`vh^4FCuQj0pD7V z8-8|zLlZ1cXiNwYz0ob4SN7J2yCk~`rpb4yIb$?LIAw@qIjq1t`%#YPa!bW)F|M}z zH89tkqjw5oFSWC1XnZ&pgm=)1aA-Zlt=)x~v72C=*?29uX3T{PmixUQ9QceFZzEKH z%Kt75yB23vm&JfDR-S%oj4ity6;{C$*m!ZDrRnN=bB6UKO;6F8=HsT2%e+c$gg+&5 zZRdh$larS$T_S{n9%i}3;rP*`Tzq|Mx)v3t69*Bg#k{eyenH{xiFxh)ztcmFu0Y`6 z9-6-tX2p-ns+4|ub~P^SL8t)a`6ipg+($mHJbyGI{y_{_b&JJ#Z)GbW3VLQwK#G~`)_`$<`c zLq}1e`$&1)+9Xf-2X-;a1Mca7JnL}r>)bGn#b^pnqoFyJ0kn zUBJq~7w!Jme7Zy`JOK*q?PtlyO6nACucmt)pzA0>t+8n=e@f+wzvD@3d+T0x2ITYu zl{%G3IRl(oxO_}#2XulC=^UQg%zgWGEYBRfwm27SLw!DDFdA%1@^mt^GHEOp3&Go> z5!Kc$+QmX<2X8Y(Y4hld(l_7VcaiBxn7+(=^Jr;pE9k~}iyxc89)8VhX{WW$YkkVF zg#NL&r8fLdG0rAGzo<`&MfT;fnKguUNY{)ze*Qyb16<`3Nz~}o4$Es!c)-AsMvy1w?>YO;WYs|nI@_UXgmGZy$Feh6f4u3 z1WPNIwNvJ&FrwBng5t&Uht43F`dgxTWe!@YOd(T3B`iAq_JFEHj@x2RNVQ=s%sf)s zd@>1&w-c{{HwyroQ!hdXr<&hjU!jOhc9B~z`*M{xJ0jADv|LzeA{ zDTD2Nbv2tB$3A$X1U@$TbuP;G5(<=8A|UvOdhe#0r(E1-Aj2OPT9GC1VVpUk)^L

6uUq$4a}- zq;C12ZWV+@mfX(nrgt8xT=eA-e8~XOqzI@`$0;8_Ua1^)*D0CKM8 z?GE|RpK$W@N1gXT$|xw4l&I4`1^fv%QZiLZnxvfFNxp?L1-p!fYR!=s@SEEHHTY64 z$@wJJ`Y;I&qJSHb{-M%r91k2#;$2zoB#kl2mnx|z&et;)GpY66Xbu5O!)%D zul;nVrL_ciC;#Ymh@`Ik6D>`-%PBbvoQey|7p$ey1y|z2!`xPDJjAjSw3^#-`@=J_>MEIcw z{Gyz`&xIl<%pj=}zGX&{06UpcyJ$bki4t;vOdwUj*J+d8YiLJbVwBS7Sg%hVZPADy2=d~d0fBb1}57USCh@GoS? ZZvU>m=}F2Q0AM2>#d`sOl(?O{{{b>wTO|Mh diff --git a/third_party/ibis/ibis_cloud_spanner/compiler.py b/third_party/ibis/ibis_cloud_spanner/compiler.py index 6aef76dc7..a32ee87be 100644 --- a/third_party/ibis/ibis_cloud_spanner/compiler.py +++ b/third_party/ibis/ibis_cloud_spanner/compiler.py @@ -74,9 +74,307 @@ def _to_sql(self, expr, ctx): return to_sql(expr, context=ctx) +def _extract_field(sql_attr): + def extract_field_formatter(translator, expr): + op = expr.op() + arg = translator.translate(op.args[0]) + if sql_attr == 'epochseconds': + return f'UNIX_SECONDS({arg})' + else: + return f'EXTRACT({sql_attr} from {arg})' -_operation_registry = impala_compiler._operation_registry.copy() + return extract_field_formatter + + +cloud_spanner_cast = Dispatcher('cloud_spanner_cast') + + +@cloud_spanner_cast.register(str, dt.Timestamp, dt.Integer) +def cloud_spanner_cast_timestamp_to_integer(compiled_arg, from_, to): + return 'UNIX_MICROS({})'.format(compiled_arg) + + +@cloud_spanner_cast.register(str, dt.DataType, dt.DataType) +def cloud_spanner_cast_generate(compiled_arg, from_, to): + sql_type = ibis_type_to_cloud_spanner_type(to) + return 'CAST({} AS {})'.format(compiled_arg, sql_type) + + +def _cast(translator, expr): + op = expr.op() + arg, target_type = op.args + arg_formatted = translator.translate(arg) + return cloud_spanner_cast(arg_formatted, arg.type(), target_type) + + +# def _struct_field(translator, expr): +# arg, field = expr.op().args +# arg_formatted = translator.translate(arg) +# return '{}.`{}`'.format(arg_formatted, field) + + +def _array_concat(translator, expr): + return 'ARRAY_CONCAT({})'.format( + ', '.join(map(translator.translate, expr.op().args)) + ) + + +def _array_index(translator, expr): + # SAFE_OFFSET returns NULL if out of bounds + return '{}[OFFSET({})]'.format( + *map(translator.translate, expr.op().args) + ) + + +def _string_find(translator, expr): + haystack, needle, start, end = expr.op().args + + if start is not None: + raise NotImplementedError('start not implemented for string find') + if end is not None: + raise NotImplementedError('end not implemented for string find') + + return 'STRPOS({}, {}) - 1'.format( + translator.translate(haystack), translator.translate(needle) + ) + + +def _translate_pattern(translator, pattern): + # add 'r' to string literals to indicate to Cloud Spanner this is a raw string + return 'r' * isinstance(pattern.op(), ops.Literal) + translator.translate( + pattern + ) + + +def _regex_search(translator, expr): + arg, pattern = expr.op().args + regex = _translate_pattern(translator, pattern) + result = 'REGEXP_CONTAINS({}, {})'.format(translator.translate(arg), regex) + return result + + +def _regex_extract(translator, expr): + arg, pattern, index = expr.op().args + regex = _translate_pattern(translator, pattern) + result = 'REGEXP_EXTRACT({}, {})'.format( + translator.translate(arg), regex + ) + return result + + +def _regex_replace(translator, expr): + arg, pattern, replacement = expr.op().args + regex = _translate_pattern(translator, pattern) + result = 'REGEXP_REPLACE({}, {}, {})'.format( + translator.translate(arg), regex, translator.translate(replacement) + ) + return result + + +def _string_concat(translator, expr): + return 'CONCAT({})'.format( + ', '.join(map(translator.translate, expr.op().arg)) + ) + + +def _string_join(translator, expr): + sep, args = expr.op().args + return 'ARRAY_TO_STRING([{}], {})'.format( + ', '.join(map(translator.translate, args)), translator.translate(sep) + ) + + +def _string_ascii(translator, expr): + (arg,) = expr.op().args + return 'TO_CODE_POINTS({})[SAFE_OFFSET(0)]'.format( + translator.translate(arg) + ) + + +def _string_right(translator, expr): + arg, nchars = map(translator.translate, expr.op().args) + return 'SUBSTR({arg}, -LEAST(LENGTH({arg}), {nchars}))'.format( + arg=arg, nchars=nchars + ) + + +def _array_literal_format(expr): + return str(list(expr.op().value)) + + +def _log(translator, expr): + op = expr.op() + arg, base = op.args + arg_formatted = translator.translate(arg) + + if base is None: + return 'ln({})'.format(arg_formatted) + + base_formatted = translator.translate(base) + return 'log({}, {})'.format(arg_formatted, base_formatted) + + +def _literal(translator, expr): + + if isinstance(expr, ir.NumericValue): + value = expr.op().value + if not np.isfinite(value): + return 'CAST({!r} AS FLOAT64)'.format(str(value)) + + # special case literal timestamp, date, and time scalars + if isinstance(expr.op(), ops.Literal): + value = expr.op().value + if isinstance(expr, ir.DateScalar): + if isinstance(value, datetime.datetime): + raw_value = value.date() + else: + raw_value = value + return "DATE '{}'".format(raw_value) + elif isinstance(expr, ir.TimestampScalar): + return "TIMESTAMP '{}'".format(value) + elif isinstance(expr, ir.TimeScalar): + # TODO: define extractors on TimeValue expressions + return "TIME '{}'".format(value) + + try: + return impala_compiler._literal(translator, expr) + except NotImplementedError: + if isinstance(expr, ir.ArrayValue): + return _array_literal_format(expr) + raise NotImplementedError(type(expr).__name__) + + +def _arbitrary(translator, expr): + arg, how, where = expr.op().args + + if where is not None: + arg = where.ifelse(arg, ibis.NA) + if how not in (None, 'first'): + raise com.UnsupportedOperationError( + '{!r} value not supported for arbitrary in Cloud Spanner'.format(how) + ) + + return 'ANY_VALUE({})'.format(translator.translate(arg)) + + +_date_units = { + 'Y': 'YEAR', + 'Q': 'QUARTER', + 'W': 'WEEK', + 'M': 'MONTH', + 'D': 'DAY', +} + + +_timestamp_units = { + 'us': 'MICROSECOND', + 'ms': 'MILLISECOND', + 's': 'SECOND', + 'm': 'MINUTE', + 'h': 'HOUR', +} +_time_units = _timestamp_units.copy() +_timestamp_units.update(_date_units) + + +def _truncate(kind, units): + def truncator(translator, expr): + arg, unit = expr.op().args + trans_arg = translator.translate(arg) + valid_unit = units.get(unit) + if valid_unit is None: + raise com.UnsupportedOperationError( + 'Cloud Spanner does not support truncating {} values to unit ' + '{!r}'.format(arg.type(), unit) + ) + return '{}_TRUNC({}, {})'.format(kind, trans_arg, valid_unit) + + return truncator + + +def _timestamp_op(func, units): + def _formatter(translator, expr): + op = expr.op() + arg, offset = op.args + + unit = offset.type().unit + if unit not in units: + raise com.UnsupportedOperationError( + 'Cloud Spanner does not allow binary operation ' + '{} with INTERVAL offset {}'.format(func, unit) + ) + formatted_arg = translator.translate(arg) + formatted_offset = translator.translate(offset) + result = '{}({}, {})'.format(func, formatted_arg, formatted_offset) + return result + + return _formatter + + +STRFTIME_FORMAT_FUNCTIONS = { + dt.Date: 'DATE', + dt.Time: 'TIME', + dt.Timestamp: 'TIMESTAMP', +} + + +_operation_registry = impala_compiler._operation_registry.copy() +_operation_registry.update( + { + ops.ExtractYear: _extract_field('year'), + #ops.ExtractQuarter: _extract_field('quarter'), + ops.ExtractMonth: _extract_field('month'), + ops.ExtractDay: _extract_field('day'), + ops.ExtractHour: _extract_field('hour'), + ops.ExtractMinute: _extract_field('minute'), + ops.ExtractSecond: _extract_field('second'), + ops.ExtractMillisecond: _extract_field('millisecond'), + #ops.ExtractEpochSeconds: _extract_field('epochseconds'), + ops.StringReplace: fixed_arity('REPLACE', 3), + ops.StringSplit: fixed_arity('SPLIT', 2), + ops.StringConcat: _string_concat, + ops.StringJoin: _string_join, + ops.StringAscii: _string_ascii, + ops.StringFind: _string_find, + ops.StrRight: _string_right, + ops.Repeat: fixed_arity('REPEAT', 2), + ops.RegexSearch: _regex_search, + ops.RegexExtract: _regex_extract, + ops.RegexReplace: _regex_replace, + ops.GroupConcat: _reduction('STRING_AGG'), + ops.IfNull: fixed_arity('IFNULL', 2), + ops.Cast: _cast, + #ops.StructField: _struct_field, + ops.ArrayCollect: unary('ARRAY_AGG'), + ops.ArrayConcat: _array_concat, + ops.ArrayIndex: _array_index, + ops.ArrayLength: unary('ARRAY_LENGTH'), + ops.HLLCardinality: _reduction('APPROX_COUNT_DISTINCT'), + ops.Log: _log, + ops.Sign: unary('SIGN'), + ops.Modulus: fixed_arity('MOD', 2), + ops.Date: unary('DATE'), + # Cloud Spanner doesn't have these operations built in. + # ops.ArrayRepeat: _array_repeat, + # ops.ArraySlice: _array_slice, + ops.Literal: _literal, + ops.Arbitrary: _arbitrary, + ops.TimestampTruncate: _truncate('TIMESTAMP', _timestamp_units), + ops.DateTruncate: _truncate('DATE', _date_units), + ops.TimeTruncate: _truncate('TIME', _timestamp_units), + ops.Time: unary('TIME'), + ops.TimestampAdd: _timestamp_op( + 'TIMESTAMP_ADD', {'h', 'm', 's', 'ms', 'us'} + ), + ops.TimestampSub: _timestamp_op( + 'TIMESTAMP_DIFF', {'h', 'm', 's', 'ms', 'us'} + ), + ops.DateAdd: _timestamp_op('DATE_ADD', {'D', 'W', 'M', 'Q', 'Y'}), + ops.DateSub: _timestamp_op('DATE_SUB', {'D', 'W', 'M', 'Q', 'Y'}), + ops.TimestampNow: fixed_arity('CURRENT_TIMESTAMP', 0), + } +) _invalid_operations = { ops.Translate, @@ -110,7 +408,54 @@ def _trans_param(self, expr): rewrites = CloudSpannerExprTranslator.rewrites - +@compiles(ops.DayOfWeekIndex) +def cloud_spanner_day_of_week_index(t, e): + arg = e.op().args[0] + arg_formatted = t.translate(arg) + return 'MOD(EXTRACT(DAYOFWEEK FROM {}) + 5, 7)'.format(arg_formatted) + + +@rewrites(ops.DayOfWeekName) +def cloud_spanner_day_of_week_name(e): + arg = e.op().args[0] + return arg.strftime('%A') + + +@compiles(ops.Divide) +def cloud_spanner_compiles_divide(t, e): + return 'IEEE_DIVIDE({}, {})'.format(*map(t.translate, e.op().args)) + + +@compiles(ops.Strftime) +def compiles_strftime(translator, expr): + arg, format_string = expr.op().args + arg_type = arg.type() + strftime_format_func_name = STRFTIME_FORMAT_FUNCTIONS[type(arg_type)] + fmt_string = translator.translate(format_string) + arg_formatted = translator.translate(arg) + if isinstance(arg_type, dt.Timestamp): + return 'FORMAT_{}({}, {}, {!r})'.format( + strftime_format_func_name, + fmt_string, + arg_formatted, + arg_type.timezone if arg_type.timezone is not None else 'UTC', + ) + return 'FORMAT_{}({}, {})'.format( + strftime_format_func_name, fmt_string, arg_formatted + ) + + +@compiles(ops.StringToTimestamp) +def compiles_string_to_timestamp(translator, expr): + arg, format_string, timezone_arg = expr.op().args + fmt_string = translator.translate(format_string) + arg_formatted = translator.translate(arg) + if timezone_arg is not None: + timezone_str = translator.translate(timezone_arg) + return 'PARSE_TIMESTAMP({}, {}, {})'.format( + fmt_string, arg_formatted, timezone_str + ) + return 'PARSE_TIMESTAMP({}, {})'.format(fmt_string, arg_formatted) class CloudSpannerTableSetFormatter(ImpalaTableSetFormatter): @@ -129,8 +474,122 @@ def table_set_formatter(self): return CloudSpannerTableSetFormatter +@rewrites(ops.IdenticalTo) +def identical_to(expr): + left, right = expr.op().args + return (left.isnull() & right.isnull()) | (left == right) + + +@rewrites(ops.Log2) +def log2(expr): + (arg,) = expr.op().args + return arg.log(2) + + +@rewrites(ops.Sum) +def bq_sum(expr): + arg = expr.op().args[0] + where = expr.op().args[1] + if isinstance(arg, ir.BooleanColumn): + return arg.cast('int64').sum(where=where) + else: + return expr + + +@rewrites(ops.Mean) +def bq_mean(expr): + arg = expr.op().args[0] + where = expr.op().args[1] + if isinstance(arg, ir.BooleanColumn): + return arg.cast('int64').mean(where=where) + else: + return expr +UNIT_FUNCS = {'s': 'SECONDS', 'ms': 'MILLIS', 'us': 'MICROS'} + + +@compiles(ops.TimestampFromUNIX) +def compiles_timestamp_from_unix(t, e): + value, unit = e.op().args + return 'TIMESTAMP_{}({})'.format(UNIT_FUNCS[unit], t.translate(value)) + + +@compiles(ops.Floor) +def compiles_floor(t, e): + cs_type = ibis_type_to_cloud_spanner_type(e.type()) + arg = e.op().arg + return 'CAST(FLOOR({}) AS {})'.format(t.translate(arg), cs_type) + + +@compiles(ops.CMSMedian) +def compiles_approx(translator, expr): + expr = expr.op() + arg = expr.arg + where = expr.where + + if where is not None: + arg = where.ifelse(arg, ibis.NA) + + return 'APPROX_QUANTILES({}, 2)[OFFSET(1)]'.format( + translator.translate(arg) + ) + + +@compiles(ops.Covariance) +def compiles_covar(translator, expr): + expr = expr.op() + left = expr.left + right = expr.right + where = expr.where + + if expr.how == 'sample': + how = 'SAMP' + elif expr.how == 'pop': + how = 'POP' + else: + raise ValueError( + "Covariance with how={!r} is not supported.".format(how) + ) + + if where is not None: + left = where.ifelse(left, ibis.NA) + right = where.ifelse(right, ibis.NA) + + return "COVAR_{}({}, {})".format(how, left, right) + + +@rewrites(ops.Any) +@rewrites(ops.All) +@rewrites(ops.NotAny) +@rewrites(ops.NotAll) +def cloud_spanner_any_all_no_op(expr): + return expr + + +@compiles(ops.Any) +def cloud_spanner_compile_any(translator, expr): + return "LOGICAL_OR({})".format(*map(translator.translate, expr.op().args)) + + +@compiles(ops.NotAny) +def cloud_spanner_compile_notany(translator, expr): + return "LOGICAL_AND(NOT ({}))".format( + *map(translator.translate, expr.op().args) + ) + + +@compiles(ops.All) +def cloud_spanner_compile_all(translator, expr): + return "LOGICAL_AND({})".format(*map(translator.translate, expr.op().args)) + + +@compiles(ops.NotAll) +def cloud_spanner_compile_notall(translator, expr): + return "LOGICAL_OR(NOT ({}))".format( + *map(translator.translate, expr.op().args) + ) + class CloudSpannerDialect(impala_compiler.ImpalaDialect): translator = CloudSpannerExprTranslator @@ -139,3 +598,4 @@ class CloudSpannerDialect(impala_compiler.ImpalaDialect): dialect = CloudSpannerDialect + From 531b3ff9613f1e6e0e9fbba5195382d097f38b63 Mon Sep 17 00:00:00 2001 From: dollylipare Date: Sun, 21 Feb 2021 13:30:46 +0000 Subject: [PATCH 04/11] Updated compiler.py by adding Bigquery Connector Reference --- third_party/ibis/ibis_cloud_spanner/client.py | 1 - .../ibis/ibis_cloud_spanner/compiler.py | 543 +----------------- 2 files changed, 12 insertions(+), 532 deletions(-) diff --git a/third_party/ibis/ibis_cloud_spanner/client.py b/third_party/ibis/ibis_cloud_spanner/client.py index 9ab05bd70..620b0f08b 100644 --- a/third_party/ibis/ibis_cloud_spanner/client.py +++ b/third_party/ibis/ibis_cloud_spanner/client.py @@ -264,7 +264,6 @@ class CloudSpannerClient(SQLClient): query_class = CloudSpannerQuery database_class = CloudSpannerDatabase table_class = CloudSpannerTable - dialect = comp.CloudSpannerDialect def __init__(self, instance_id, database_id=None, credentials=None): """Construct a CloudSpannerClient. diff --git a/third_party/ibis/ibis_cloud_spanner/compiler.py b/third_party/ibis/ibis_cloud_spanner/compiler.py index a32ee87be..e0c56c90f 100644 --- a/third_party/ibis/ibis_cloud_spanner/compiler.py +++ b/third_party/ibis/ibis_cloud_spanner/compiler.py @@ -27,131 +27,31 @@ import ibis.expr.operations as ops import ibis.expr.types as ir import ibis.sql.compiler as comp -from third_party.ibis.ibis_cloud_spanner.datatypes import ibis_type_to_cloud_spanner_type -from ibis.impala import compiler as impala_compiler -from ibis.impala.compiler import ( - ImpalaSelect, - ImpalaTableSetFormatter, - _reduction, - fixed_arity, - unary, -) - - -class CloudSpannerSelectBuilder(comp.SelectBuilder): - @property - def _select_class(self): - return CloudSpannerSelect - - +from ibis.bigquery import compiler as bigquery_compiler -class CloudSpannerUnion(comp.Union): - @staticmethod - def keyword(distinct): - return 'UNION DISTINCT' if distinct else 'UNION ALL' -class CloudSpannerQueryBuilder(comp.QueryBuilder): - - select_builder = CloudSpannerSelectBuilder - union_class = CloudSpannerUnion - - def build_ast(expr, context): - builder = CloudSpannerQueryBuilder(expr, context=context) + builder = bigquery_compiler.BigQueryQueryBuilder(expr, context=context) return builder.get_result() - def to_sql(expr, context): query_ast = build_ast(expr, context) compiled = query_ast.compile() return compiled - -class CloudSpannerContext(comp.QueryContext): - def _to_sql(self, expr, ctx): - return to_sql(expr, context=ctx) - - -def _extract_field(sql_attr): - def extract_field_formatter(translator, expr): - op = expr.op() - arg = translator.translate(op.args[0]) - if sql_attr == 'epochseconds': - return f'UNIX_SECONDS({arg})' - else: - return f'EXTRACT({sql_attr} from {arg})' - - return extract_field_formatter - - -cloud_spanner_cast = Dispatcher('cloud_spanner_cast') - - -@cloud_spanner_cast.register(str, dt.Timestamp, dt.Integer) -def cloud_spanner_cast_timestamp_to_integer(compiled_arg, from_, to): - return 'UNIX_MICROS({})'.format(compiled_arg) - - -@cloud_spanner_cast.register(str, dt.DataType, dt.DataType) -def cloud_spanner_cast_generate(compiled_arg, from_, to): - sql_type = ibis_type_to_cloud_spanner_type(to) - return 'CAST({} AS {})'.format(compiled_arg, sql_type) - - -def _cast(translator, expr): - op = expr.op() - arg, target_type = op.args - arg_formatted = translator.translate(arg) - return cloud_spanner_cast(arg_formatted, arg.type(), target_type) - - -# def _struct_field(translator, expr): -# arg, field = expr.op().args -# arg_formatted = translator.translate(arg) -# return '{}.`{}`'.format(arg_formatted, field) - - -def _array_concat(translator, expr): - return 'ARRAY_CONCAT({})'.format( - ', '.join(map(translator.translate, expr.op().args)) - ) - - def _array_index(translator, expr): # SAFE_OFFSET returns NULL if out of bounds return '{}[OFFSET({})]'.format( *map(translator.translate, expr.op().args) ) - - -def _string_find(translator, expr): - haystack, needle, start, end = expr.op().args - - if start is not None: - raise NotImplementedError('start not implemented for string find') - if end is not None: - raise NotImplementedError('end not implemented for string find') - - return 'STRPOS({}, {}) - 1'.format( - translator.translate(haystack), translator.translate(needle) - ) - - -def _translate_pattern(translator, pattern): - # add 'r' to string literals to indicate to Cloud Spanner this is a raw string - return 'r' * isinstance(pattern.op(), ops.Literal) + translator.translate( - pattern - ) - - -def _regex_search(translator, expr): - arg, pattern = expr.op().args - regex = _translate_pattern(translator, pattern) - result = 'REGEXP_CONTAINS({}, {})'.format(translator.translate(arg), regex) - return result - + +def _translate_pattern(translator, pattern): + # add 'r' to string literals to indicate to Cloud Spanner this is a raw string + return 'r' * isinstance(pattern.op(), ops.Literal) + translator.translate( + pattern + ) def _regex_extract(translator, expr): arg, pattern, index = expr.op().args @@ -162,440 +62,21 @@ def _regex_extract(translator, expr): return result -def _regex_replace(translator, expr): - arg, pattern, replacement = expr.op().args - regex = _translate_pattern(translator, pattern) - result = 'REGEXP_REPLACE({}, {}, {})'.format( - translator.translate(arg), regex, translator.translate(replacement) - ) - return result - - -def _string_concat(translator, expr): - return 'CONCAT({})'.format( - ', '.join(map(translator.translate, expr.op().arg)) - ) - - -def _string_join(translator, expr): - sep, args = expr.op().args - return 'ARRAY_TO_STRING([{}], {})'.format( - ', '.join(map(translator.translate, args)), translator.translate(sep) - ) - - -def _string_ascii(translator, expr): - (arg,) = expr.op().args - return 'TO_CODE_POINTS({})[SAFE_OFFSET(0)]'.format( - translator.translate(arg) - ) - - -def _string_right(translator, expr): - arg, nchars = map(translator.translate, expr.op().args) - return 'SUBSTR({arg}, -LEAST(LENGTH({arg}), {nchars}))'.format( - arg=arg, nchars=nchars - ) - - -def _array_literal_format(expr): - return str(list(expr.op().value)) - - -def _log(translator, expr): - op = expr.op() - arg, base = op.args - arg_formatted = translator.translate(arg) - - if base is None: - return 'ln({})'.format(arg_formatted) - - base_formatted = translator.translate(base) - return 'log({}, {})'.format(arg_formatted, base_formatted) - - -def _literal(translator, expr): - - if isinstance(expr, ir.NumericValue): - value = expr.op().value - if not np.isfinite(value): - return 'CAST({!r} AS FLOAT64)'.format(str(value)) - - # special case literal timestamp, date, and time scalars - if isinstance(expr.op(), ops.Literal): - value = expr.op().value - if isinstance(expr, ir.DateScalar): - if isinstance(value, datetime.datetime): - raw_value = value.date() - else: - raw_value = value - return "DATE '{}'".format(raw_value) - elif isinstance(expr, ir.TimestampScalar): - return "TIMESTAMP '{}'".format(value) - elif isinstance(expr, ir.TimeScalar): - # TODO: define extractors on TimeValue expressions - return "TIME '{}'".format(value) - - try: - return impala_compiler._literal(translator, expr) - except NotImplementedError: - if isinstance(expr, ir.ArrayValue): - return _array_literal_format(expr) - raise NotImplementedError(type(expr).__name__) - - -def _arbitrary(translator, expr): - arg, how, where = expr.op().args - - if where is not None: - arg = where.ifelse(arg, ibis.NA) - - if how not in (None, 'first'): - raise com.UnsupportedOperationError( - '{!r} value not supported for arbitrary in Cloud Spanner'.format(how) - ) - - return 'ANY_VALUE({})'.format(translator.translate(arg)) - - -_date_units = { - 'Y': 'YEAR', - 'Q': 'QUARTER', - 'W': 'WEEK', - 'M': 'MONTH', - 'D': 'DAY', -} - - -_timestamp_units = { - 'us': 'MICROSECOND', - 'ms': 'MILLISECOND', - 's': 'SECOND', - 'm': 'MINUTE', - 'h': 'HOUR', -} -_time_units = _timestamp_units.copy() -_timestamp_units.update(_date_units) - - -def _truncate(kind, units): - def truncator(translator, expr): - arg, unit = expr.op().args - trans_arg = translator.translate(arg) - valid_unit = units.get(unit) - if valid_unit is None: - raise com.UnsupportedOperationError( - 'Cloud Spanner does not support truncating {} values to unit ' - '{!r}'.format(arg.type(), unit) - ) - return '{}_TRUNC({}, {})'.format(kind, trans_arg, valid_unit) - - return truncator - - -def _timestamp_op(func, units): - def _formatter(translator, expr): - op = expr.op() - arg, offset = op.args - - unit = offset.type().unit - if unit not in units: - raise com.UnsupportedOperationError( - 'Cloud Spanner does not allow binary operation ' - '{} with INTERVAL offset {}'.format(func, unit) - ) - formatted_arg = translator.translate(arg) - formatted_offset = translator.translate(offset) - result = '{}({}, {})'.format(func, formatted_arg, formatted_offset) - return result - - return _formatter - - -STRFTIME_FORMAT_FUNCTIONS = { - dt.Date: 'DATE', - dt.Time: 'TIME', - dt.Timestamp: 'TIMESTAMP', -} - - -_operation_registry = impala_compiler._operation_registry.copy() +_operation_registry = bigquery_compiler._operation_registry.copy() _operation_registry.update( { - ops.ExtractYear: _extract_field('year'), - #ops.ExtractQuarter: _extract_field('quarter'), - ops.ExtractMonth: _extract_field('month'), - ops.ExtractDay: _extract_field('day'), - ops.ExtractHour: _extract_field('hour'), - ops.ExtractMinute: _extract_field('minute'), - ops.ExtractSecond: _extract_field('second'), - ops.ExtractMillisecond: _extract_field('millisecond'), - #ops.ExtractEpochSeconds: _extract_field('epochseconds'), - ops.StringReplace: fixed_arity('REPLACE', 3), - ops.StringSplit: fixed_arity('SPLIT', 2), - ops.StringConcat: _string_concat, - ops.StringJoin: _string_join, - ops.StringAscii: _string_ascii, - ops.StringFind: _string_find, - ops.StrRight: _string_right, - ops.Repeat: fixed_arity('REPEAT', 2), - ops.RegexSearch: _regex_search, ops.RegexExtract: _regex_extract, - ops.RegexReplace: _regex_replace, - ops.GroupConcat: _reduction('STRING_AGG'), - ops.IfNull: fixed_arity('IFNULL', 2), - ops.Cast: _cast, - #ops.StructField: _struct_field, - ops.ArrayCollect: unary('ARRAY_AGG'), - ops.ArrayConcat: _array_concat, ops.ArrayIndex: _array_index, - ops.ArrayLength: unary('ARRAY_LENGTH'), - ops.HLLCardinality: _reduction('APPROX_COUNT_DISTINCT'), - ops.Log: _log, - ops.Sign: unary('SIGN'), - ops.Modulus: fixed_arity('MOD', 2), - ops.Date: unary('DATE'), - # Cloud Spanner doesn't have these operations built in. - # ops.ArrayRepeat: _array_repeat, - # ops.ArraySlice: _array_slice, - ops.Literal: _literal, - ops.Arbitrary: _arbitrary, - ops.TimestampTruncate: _truncate('TIMESTAMP', _timestamp_units), - ops.DateTruncate: _truncate('DATE', _date_units), - ops.TimeTruncate: _truncate('TIME', _timestamp_units), - ops.Time: unary('TIME'), - ops.TimestampAdd: _timestamp_op( - 'TIMESTAMP_ADD', {'h', 'm', 's', 'ms', 'us'} - ), - ops.TimestampSub: _timestamp_op( - 'TIMESTAMP_DIFF', {'h', 'm', 's', 'ms', 'us'} - ), - ops.DateAdd: _timestamp_op('DATE_ADD', {'D', 'W', 'M', 'Q', 'Y'}), - ops.DateSub: _timestamp_op('DATE_SUB', {'D', 'W', 'M', 'Q', 'Y'}), - ops.TimestampNow: fixed_arity('CURRENT_TIMESTAMP', 0), } ) -_invalid_operations = { - ops.Translate, - ops.FindInSet, - ops.Capitalize, - ops.DateDiff, - ops.TimestampDiff, -} - -_operation_registry = { - k: v - for k, v in _operation_registry.items() - if k not in _invalid_operations -} - - -class CloudSpannerExprTranslator(impala_compiler.ImpalaExprTranslator): - _registry = _operation_registry - _rewrites = impala_compiler.ImpalaExprTranslator._rewrites.copy() - - context_class = CloudSpannerContext - - def _trans_param(self, expr): - op = expr.op() - if op not in self.context.params: - raise KeyError(op) - return '@{}'.format(expr.get_name()) - - -compiles = CloudSpannerExprTranslator.compiles -rewrites = CloudSpannerExprTranslator.rewrites - - -@compiles(ops.DayOfWeekIndex) -def cloud_spanner_day_of_week_index(t, e): - arg = e.op().args[0] - arg_formatted = t.translate(arg) - return 'MOD(EXTRACT(DAYOFWEEK FROM {}) + 5, 7)'.format(arg_formatted) - - -@rewrites(ops.DayOfWeekName) -def cloud_spanner_day_of_week_name(e): - arg = e.op().args[0] - return arg.strftime('%A') - - -@compiles(ops.Divide) -def cloud_spanner_compiles_divide(t, e): - return 'IEEE_DIVIDE({}, {})'.format(*map(t.translate, e.op().args)) - - -@compiles(ops.Strftime) -def compiles_strftime(translator, expr): - arg, format_string = expr.op().args - arg_type = arg.type() - strftime_format_func_name = STRFTIME_FORMAT_FUNCTIONS[type(arg_type)] - fmt_string = translator.translate(format_string) - arg_formatted = translator.translate(arg) - if isinstance(arg_type, dt.Timestamp): - return 'FORMAT_{}({}, {}, {!r})'.format( - strftime_format_func_name, - fmt_string, - arg_formatted, - arg_type.timezone if arg_type.timezone is not None else 'UTC', - ) - return 'FORMAT_{}({}, {})'.format( - strftime_format_func_name, fmt_string, arg_formatted - ) - - -@compiles(ops.StringToTimestamp) -def compiles_string_to_timestamp(translator, expr): - arg, format_string, timezone_arg = expr.op().args - fmt_string = translator.translate(format_string) - arg_formatted = translator.translate(arg) - if timezone_arg is not None: - timezone_str = translator.translate(timezone_arg) - return 'PARSE_TIMESTAMP({}, {}, {})'.format( - fmt_string, arg_formatted, timezone_str - ) - return 'PARSE_TIMESTAMP({}, {})'.format(fmt_string, arg_formatted) - - -class CloudSpannerTableSetFormatter(ImpalaTableSetFormatter): - def _quote_identifier(self, name): - if re.match(r'^[A-Za-z][A-Za-z_0-9]*$', name): - return name - return '`{}`'.format(name) - - -class CloudSpannerSelect(ImpalaSelect): - - translator = CloudSpannerExprTranslator - - @property - def table_set_formatter(self): - return CloudSpannerTableSetFormatter - - -@rewrites(ops.IdenticalTo) -def identical_to(expr): - left, right = expr.op().args - return (left.isnull() & right.isnull()) | (left == right) - - -@rewrites(ops.Log2) -def log2(expr): - (arg,) = expr.op().args - return arg.log(2) - - -@rewrites(ops.Sum) -def bq_sum(expr): - arg = expr.op().args[0] - where = expr.op().args[1] - if isinstance(arg, ir.BooleanColumn): - return arg.cast('int64').sum(where=where) - else: - return expr - - -@rewrites(ops.Mean) -def bq_mean(expr): - arg = expr.op().args[0] - where = expr.op().args[1] - if isinstance(arg, ir.BooleanColumn): - return arg.cast('int64').mean(where=where) - else: - return expr - - -UNIT_FUNCS = {'s': 'SECONDS', 'ms': 'MILLIS', 'us': 'MICROS'} - - -@compiles(ops.TimestampFromUNIX) -def compiles_timestamp_from_unix(t, e): - value, unit = e.op().args - return 'TIMESTAMP_{}({})'.format(UNIT_FUNCS[unit], t.translate(value)) - - -@compiles(ops.Floor) -def compiles_floor(t, e): - cs_type = ibis_type_to_cloud_spanner_type(e.type()) - arg = e.op().arg - return 'CAST(FLOOR({}) AS {})'.format(t.translate(arg), cs_type) - - -@compiles(ops.CMSMedian) -def compiles_approx(translator, expr): - expr = expr.op() - arg = expr.arg - where = expr.where - - if where is not None: - arg = where.ifelse(arg, ibis.NA) - - return 'APPROX_QUANTILES({}, 2)[OFFSET(1)]'.format( - translator.translate(arg) - ) - - -@compiles(ops.Covariance) -def compiles_covar(translator, expr): - expr = expr.op() - left = expr.left - right = expr.right - where = expr.where - - if expr.how == 'sample': - how = 'SAMP' - elif expr.how == 'pop': - how = 'POP' - else: - raise ValueError( - "Covariance with how={!r} is not supported.".format(how) - ) - - if where is not None: - left = where.ifelse(left, ibis.NA) - right = where.ifelse(right, ibis.NA) - - return "COVAR_{}({}, {})".format(how, left, right) - - -@rewrites(ops.Any) -@rewrites(ops.All) -@rewrites(ops.NotAny) -@rewrites(ops.NotAll) -def cloud_spanner_any_all_no_op(expr): - return expr - - -@compiles(ops.Any) -def cloud_spanner_compile_any(translator, expr): - return "LOGICAL_OR({})".format(*map(translator.translate, expr.op().args)) - - -@compiles(ops.NotAny) -def cloud_spanner_compile_notany(translator, expr): - return "LOGICAL_AND(NOT ({}))".format( - *map(translator.translate, expr.op().args) - ) - - -@compiles(ops.All) -def cloud_spanner_compile_all(translator, expr): - return "LOGICAL_AND({})".format(*map(translator.translate, expr.op().args)) - - -@compiles(ops.NotAll) -def cloud_spanner_compile_notall(translator, expr): - return "LOGICAL_OR(NOT ({}))".format( - *map(translator.translate, expr.op().args) - ) +compiles = bigquery_compiler.BigQueryExprTranslator.compiles +rewrites = bigquery_compiler.BigQueryExprTranslator.rewrites -class CloudSpannerDialect(impala_compiler.ImpalaDialect): - translator = CloudSpannerExprTranslator +dialect = bigquery_compiler.BigQueryDialect -dialect = CloudSpannerDialect From 41627c79135bd77d9d29e92ef671a8b26cfec80d Mon Sep 17 00:00:00 2001 From: dollylipare Date: Fri, 26 Feb 2021 14:13:58 +0000 Subject: [PATCH 05/11] Changed table.py file --- third_party/ibis/ibis_cloud_spanner/api.py | 30 +- third_party/ibis/ibis_cloud_spanner/client.py | 231 +++++----- .../ibis/ibis_cloud_spanner/compiler.py | 42 +- .../ibis/ibis_cloud_spanner/dataset.py | 149 ------- .../ibis/ibis_cloud_spanner/datatypes.py | 33 +- third_party/ibis/ibis_cloud_spanner/table.py | 395 ++++-------------- .../ibis/ibis_cloud_spanner/tests/conftest.py | 37 +- .../ibis/ibis_cloud_spanner/tests/schema.sql | 2 +- .../ibis_cloud_spanner/tests/test_client.py | 283 +++++++------ .../ibis_cloud_spanner/tests/test_compiler.py | 209 +++++---- .../tests/test_datatypes.py | 44 +- .../ibis/ibis_cloud_spanner/to_pandas.py | 81 ++-- 12 files changed, 511 insertions(+), 1025 deletions(-) delete mode 100644 third_party/ibis/ibis_cloud_spanner/dataset.py diff --git a/third_party/ibis/ibis_cloud_spanner/api.py b/third_party/ibis/ibis_cloud_spanner/api.py index 199998d41..52a9d078e 100644 --- a/third_party/ibis/ibis_cloud_spanner/api.py +++ b/third_party/ibis/ibis_cloud_spanner/api.py @@ -13,29 +13,16 @@ # limitations under the License. - """CloudScanner public API.""" from third_party.ibis.ibis_cloud_spanner.client import CloudSpannerClient from third_party.ibis.ibis_cloud_spanner.compiler import dialect -from ibis.config import options -from typing import Optional import google.cloud.spanner # noqa: F401, fail early if spanner is missing import ibis.common.exceptions as com -import pydata_google_auth - - - - -__all__ = ('compile', 'connect', 'verify', '' - '' - '' - '' - '' - '') +__all__ = ("compile", "connect", "verify") def compile(expr, params=None): @@ -64,12 +51,9 @@ def verify(expr, params=None): return False - - def connect( - instance_id: Optional[str] = None, - database_id: Optional[str] = None, - + instance_id, + database_id, ) -> CloudSpannerClient: """Create a CloudSpannerClient for use with Ibis. @@ -78,8 +62,7 @@ def connect( instance_id : str A Cloud Spanner Instance id. database_id : str - A database id that lives inside of the Cloud Spanner Instance indicated by - `instance_id`. + A database id inside of the Cloud Spanner Instance Returns ------- @@ -87,7 +70,4 @@ def connect( """ - return CloudSpannerClient( - instance_id=instance_id, database_id=database_id - ) - + return CloudSpannerClient(instance_id=instance_id, database_id=database_id) diff --git a/third_party/ibis/ibis_cloud_spanner/client.py b/third_party/ibis/ibis_cloud_spanner/client.py index 620b0f08b..7c40a34be 100644 --- a/third_party/ibis/ibis_cloud_spanner/client.py +++ b/third_party/ibis/ibis_cloud_spanner/client.py @@ -15,41 +15,37 @@ """Cloud Spanner ibis client implementation.""" import datetime -from collections import OrderedDict from typing import Optional, Tuple import google.cloud.spanner as cs from google.cloud import spanner import pandas as pd -import regex as re -from google.api_core.exceptions import NotFound +import re from multipledispatch import Dispatcher -from pkg_resources import parse_version import ibis import ibis.common.exceptions as com import ibis.expr.datatypes as dt import ibis.expr.lineage as lin import ibis.expr.operations as ops -import ibis.expr.schema as sch import ibis.expr.types as ir from third_party.ibis.ibis_cloud_spanner import compiler as comp -from third_party.ibis.ibis_cloud_spanner.datatypes import ibis_type_to_cloud_spanner_type +from third_party.ibis.ibis_cloud_spanner.datatypes import ( + ibis_type_to_cloud_spanner_type, +) from ibis.client import Database, Query, SQLClient -from third_party.ibis.ibis_cloud_spanner import dataset as dataset_class + from third_party.ibis.ibis_cloud_spanner import table -from google.cloud import spanner -from google.cloud import spanner -from pandas import DataFrame -from third_party.ibis.ibis_cloud_spanner.to_pandas import pandas_df +from google.cloud.spanner_v1 import TypeCode +from third_party.ibis.ibis_cloud_spanner.to_pandas import pandas_df def parse_instance_and_dataset( instance: str, dataset: Optional[str] = None ) -> Tuple[str, str, Optional[str]]: try: - data_instance, dataset = dataset.split('.') + data_instance, dataset = dataset.split(".") except (ValueError, AttributeError): billing_instance = data_instance = instance else: @@ -57,9 +53,11 @@ def parse_instance_and_dataset( return data_instance, billing_instance, dataset + class CloudSpannerTable(ops.DatabaseTable): pass + def _find_scalar_parameter(expr): """Find all :class:`~ibis.expr.types.ScalarParameter` instances. @@ -81,23 +79,25 @@ def _find_scalar_parameter(expr): result = None return lin.proceed, result + def convert_to_cs_type(dtype): - if (dtype == 'FLOAT64'): + if dtype == "FLOAT64": return spanner.param_types.FLOAT64 - elif (dtype == 'INT64'): + elif dtype == "INT64": return spanner.param_types.INT64 - elif (dtype == 'DATE'): + elif dtype == "DATE": return spanner.param_types.DATE - elif (dtype == 'TIMESTAMP'): + elif dtype == "TIMESTAMP": return spanner.param_types.TIMESTAMP - elif (dtype == 'NUMERIC'): + elif dtype == "NUMERIC": return spanner.param_types.NUMERIC - elif (dtype == 'INT64'): + elif dtype == "INT64": return spanner.param_types.INT64 else: return spanner.param_types.STRING -cloud_spanner_param = Dispatcher('cloud_spanner_param') + +cloud_spanner_param = Dispatcher("cloud_spanner_param") @cloud_spanner_param.register(ir.ArrayValue, list) @@ -111,18 +111,15 @@ def cs_param_array(param, value): raise com.UnsupportedBackendType(param_type) else: if isinstance(param_type.value_type, dt.Struct): - raise TypeError('ARRAY> is not supported in Cloud Spanner') + raise TypeError("ARRAY> is not supported in Cloud Spanner") elif isinstance(param_type.value_type, dt.Array): - raise TypeError('ARRAY> is not supported in Cloud Spanner') + raise TypeError("ARRAY> is not supported in Cloud Spanner") else: query_value = value - params={param.get_name(): query_value}, - param_types={param.get_name(): convert_to_cs_type(spanner_type)} - final_dict={ - 'params':params, - 'param_types':param_types - } + params = ({param.get_name(): query_value},) + param_types = {param.get_name(): convert_to_cs_type(spanner_type)} + final_dict = {"params": params, "param_types": param_types} return final_dict @@ -133,102 +130,66 @@ def cs_param_array(param, value): def cs_param_timestamp(param, value): assert isinstance(param.type(), dt.Timestamp), str(param.type()) - timestamp_value = pd.Timestamp(value, tz='UTC').to_pydatetime() - params={param.get_name(): timestamp_value}, - param_types={param.get_name(): spanner.param_types.TIMESTAMP} - final_dict={ - 'params':params[0], - 'param_types':param_types - - } + timestamp_value = pd.Timestamp(value, tz="UTC").to_pydatetime() + params = ({param.get_name(): timestamp_value},) + param_types = {param.get_name(): spanner.param_types.TIMESTAMP} + final_dict = {"params": params[0], "param_types": param_types} return final_dict @cloud_spanner_param.register(ir.StringScalar, str) def cs_param_string(param, value): - params={param.get_name(): value}, - param_types={param.get_name(): spanner.param_types.STRING} - final_dict={ - 'params':params[0], - 'param_types':param_types - - } + params = ({param.get_name(): value},) + param_types = {param.get_name(): spanner.param_types.STRING} + final_dict = {"params": params[0], "param_types": param_types} return final_dict - @cloud_spanner_param.register(ir.IntegerScalar, int) def cs_param_integer(param, value): - params={param.get_name(): value}, - param_types={param.get_name(): spanner.param_types.INT64} - final_dict={ - 'params':params[0], - 'param_types':param_types - - } + params = ({param.get_name(): value},) + param_types = {param.get_name(): spanner.param_types.INT64} + final_dict = {"params": params[0], "param_types": param_types} return final_dict - @cloud_spanner_param.register(ir.FloatingScalar, float) def cs_param_double(param, value): - params={param.get_name(): value}, - param_types={param.get_name(): spanner.param_types.FLOAT64} - final_dict={ - 'params':params[0], - 'param_types':param_types - - } + params = ({param.get_name(): value},) + param_types = {param.get_name(): spanner.param_types.FLOAT64} + final_dict = {"params": params[0], "param_types": param_types} return final_dict @cloud_spanner_param.register(ir.BooleanScalar, bool) def cs_param_boolean(param, value): - params={param.get_name(): value}, - param_types={param.get_name(): spanner.param_types.BOOL} - final_dict={ - 'params':params[0], - 'param_types':param_types - - } + params = ({param.get_name(): value},) + param_types = {param.get_name(): spanner.param_types.BOOL} + final_dict = {"params": params[0], "param_types": param_types} return final_dict - - @cloud_spanner_param.register(ir.DateScalar, str) def cs_param_date_string(param, value): - params={param.get_name(): pd.Timestamp(value).to_pydatetime().date()}, - param_types={param.get_name(): spanner.param_types.DATE} - final_dict={ - 'params':params[0], - 'param_types':param_types - - } + params = ({param.get_name(): pd.Timestamp(value).to_pydatetime().date()},) + param_types = {param.get_name(): spanner.param_types.DATE} + final_dict = {"params": params[0], "param_types": param_types} return final_dict @cloud_spanner_param.register(ir.DateScalar, datetime.datetime) def cs_param_date_datetime(param, value): - params={param.get_name(): value.date()}, - param_types={param.get_name(): spanner.param_types.DATE} - final_dict={ - 'params':params[0], - 'param_types':param_types - - } + params = ({param.get_name(): value.date()},) + param_types = {param.get_name(): spanner.param_types.DATE} + final_dict = {"params": params[0], "param_types": param_types} return final_dict @cloud_spanner_param.register(ir.DateScalar, datetime.date) def cs_param_date(param, value): - params={param.get_name(): value}, - param_types={param.get_name(): spanner.param_types.DATE} - final_dict={ - 'params':params[0], - 'param_types':param_types - - } + params = ({param.get_name(): value},) + param_types = {param.get_name(): spanner.param_types.DATE} + final_dict = {"params": params[0], "param_types": param_types} return final_dict @@ -237,9 +198,7 @@ def __init__(self, client, ddl, query_parameters=None): super().__init__(client, ddl) # self.expr comes from the parent class - query_parameter_names = dict( - lin.traverse(_find_scalar_parameter, self.expr) - ) + query_parameter_names = dict(lin.traverse(_find_scalar_parameter, self.expr)) self.query_parameters = [ cloud_spanner_param( @@ -248,14 +207,16 @@ def __init__(self, client, ddl, query_parameters=None): for param, value in (query_parameters or {}).items() ] - def execute(self): - dataframe_output = self.client._execute(self.compiled_sql,results=True,query_parameters=self.query_parameters) + dataframe_output = self.client._execute( + self.compiled_sql, results=True, query_parameters=self.query_parameters + ) return dataframe_output + class CloudSpannerDatabase(Database): - """A Cloud scanner dataset.""" + """A Cloud scanner dataset.""" class CloudSpannerClient(SQLClient): @@ -287,17 +248,15 @@ def __init__(self, instance_id, database_id=None, credentials=None): ) = parse_instance_and_dataset(instance_id, database_id) self.client = cs.Client() - - def _parse_instance_and_dataset(self, dataset): if not dataset and not self.dataset: raise ValueError("Unable to determine Cloud Spanner dataset.") instance, _, dataset = parse_instance_and_dataset( self.billing_instance, - dataset or '{}.{}'.format(self.data_instance, self.dataset), + dataset or "{}.{}".format(self.data_instance, self.dataset), ) return instance, dataset - + def get_data_using_query(self, query, results=False): return self._execute(query, results=results) @@ -309,14 +268,8 @@ def instance_id(self): def dataset_id(self): return self.dataset - def table(self,name,database=None): + def table(self, name, database=None): t = super().table(name, database=database) - name = t.op().name - instance = self.instance_id - dataset = self.dataset_id - dataset_ref = dataset_class.DatasetReference(instance,dataset) - table_ref = dataset_ref.table(name) - cs_table = table.Table(table_ref) return t def _build_ast(self, expr, context): @@ -338,26 +291,24 @@ def _get_table_schema(self, qualified_name): @property def current_database(self): - return self.database(self.dataset) - + return self.database(self.dataset) + def list_databases(self, like=None): - databases=self.instance.list_databases() - list_db=[] + databases = self.instance.list_databases() + list_db = [] for row in databases: - list_db.append((row.name).rsplit('/', 1)[1]) + list_db.append((row.name).rsplit("/", 1)[1]) return list_db - - - def list_tables(self,like=None,database=None): + def list_tables(self, like=None, database=None): if database is None: db_value = self.dataset_id else: db_value = database db = self.instance.database(db_value) - tables=[] + tables = [] with db.snapshot() as snapshot: - query="SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES where SPANNER_STATE = 'COMMITTED' " + query = "SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES where SPANNER_STATE = 'COMMITTED' " results = snapshot.execute_sql(query) for row in results: tables.append(row[0]) @@ -377,27 +328,42 @@ def exists_table(self, name, database=None): db_value = database db = self.instance.database(db_value) with db.snapshot() as snapshot: - query = "SELECT EXISTS(SELECT * FROM INFORMATION_SCHEMA.TABLES where TABLE_NAME = '{}' )".format(name) + query = "SELECT EXISTS(SELECT * FROM INFORMATION_SCHEMA.TABLES where TABLE_NAME = '{}' )".format( + name + ) output = snapshot.execute_sql(query) - result = '' + result = "" for row in output: result = row[0] return result - - def get_schema(self, name, database=None): + def get_schema(self, table_id, database=None): if database is None: database = self.dataset_id - instance, dataset = self._parse_instance_and_dataset(database) - dataset_ref = dataset_class.DatasetReference(instance,dataset) - table_ref = dataset_ref.table(name) - cs_table = table.Table(table_ref).schema - return (ibis.schema(cs_table)) + db_value = self.instance.database(database) + table_schema = table.Table(table_id, db_value).schema + + t_schema = [] + for item in table_schema: + field_name = item.name + + if item.type_.code == TypeCode.ARRAY: + field_type = "array<{}>".format(item.type_.array_element_type.code.name) + elif item.type_.code == TypeCode.BYTES: + field_type = "binary" + elif item.type_.code == TypeCode.NUMERIC: + field_type = "decimal" + else: + field_type = item.type_.code.name + + final_item = (field_name, field_type) + t_schema.append(final_item) + + return ibis.schema(t_schema) def _execute(self, stmt, results=True, query_parameters=None): - from google.cloud import spanner spanner_client = spanner.Client() instance_id = self.instance_id instance = spanner_client.instance(instance_id) @@ -405,11 +371,9 @@ def _execute(self, stmt, results=True, query_parameters=None): database_1 = instance.database(database_id) with database_1.snapshot() as snapshot: - data_qry = pandas_df.to_pandas(snapshot,stmt,query_parameters) + data_qry = pandas_df.to_pandas(snapshot, stmt, query_parameters) return data_qry - - def database(self, name=None): if name is None and self.dataset is None: raise ValueError( @@ -422,17 +386,10 @@ def database(self, name=None): def set_database(self, name): self.data_instance, self.dataset = self._parse_instance_and_dataset(name) - def dataset(self,database): + def dataset(self, database): + spanner_client = spanner.Client() instance = spanner_client.instance(self.data_instance) database = instance.database(database) - def exists_database(self, name): return self.instance.database(name).exists() - - - - - - - diff --git a/third_party/ibis/ibis_cloud_spanner/compiler.py b/third_party/ibis/ibis_cloud_spanner/compiler.py index e0c56c90f..1d492e701 100644 --- a/third_party/ibis/ibis_cloud_spanner/compiler.py +++ b/third_party/ibis/ibis_cloud_spanner/compiler.py @@ -12,53 +12,35 @@ # See the License for the specific language governing permissions and # limitations under the License. -import datetime -from functools import partial - -import numpy as np -import regex as re -import toolz -from multipledispatch import Dispatcher - -import ibis -import ibis.common.exceptions as com -import ibis.expr.datatypes as dt -import ibis.expr.lineage as lin import ibis.expr.operations as ops -import ibis.expr.types as ir -import ibis.sql.compiler as comp from ibis.bigquery import compiler as bigquery_compiler - - def build_ast(expr, context): builder = bigquery_compiler.BigQueryQueryBuilder(expr, context=context) return builder.get_result() + def to_sql(expr, context): query_ast = build_ast(expr, context) compiled = query_ast.compile() return compiled + def _array_index(translator, expr): # SAFE_OFFSET returns NULL if out of bounds - return '{}[OFFSET({})]'.format( - *map(translator.translate, expr.op().args) - ) - -def _translate_pattern(translator, pattern): - # add 'r' to string literals to indicate to Cloud Spanner this is a raw string - return 'r' * isinstance(pattern.op(), ops.Literal) + translator.translate( - pattern - ) + return "{}[OFFSET({})]".format(*map(translator.translate, expr.op().args)) + + +def _translate_pattern(translator, pattern): + # add 'r' to string literals to indicate to Cloud Spanner this is a raw string + return "r" * isinstance(pattern.op(), ops.Literal) + translator.translate(pattern) + def _regex_extract(translator, expr): arg, pattern, index = expr.op().args regex = _translate_pattern(translator, pattern) - result = 'REGEXP_EXTRACT({}, {})'.format( - translator.translate(arg), regex - ) + result = "REGEXP_EXTRACT({}, {})".format(translator.translate(arg), regex) return result @@ -76,7 +58,3 @@ def _regex_extract(translator, expr): dialect = bigquery_compiler.BigQueryDialect - - - - diff --git a/third_party/ibis/ibis_cloud_spanner/dataset.py b/third_party/ibis/ibis_cloud_spanner/dataset.py deleted file mode 100644 index 34be92610..000000000 --- a/third_party/ibis/ibis_cloud_spanner/dataset.py +++ /dev/null @@ -1,149 +0,0 @@ -# Copyright 2021 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from third_party.ibis.ibis_cloud_spanner import table - -import six -import copy -import google.cloud._helpers -import ibis -import third_party.ibis.ibis_cloud_spanner - - - -def _get_table_reference(self, table_id): - """Constructs a TableReference. - Args: - table_id (str): The ID of the table. - Returns: - cloud_spanner.table.TableReference: - A table reference for a table in this dataset. - """ - return table.TableReference(self, table_id) - -class DatasetReference(object): - """DatasetReferences are pointers to datasets. - - Args: - instance_id (str): The ID of the instance - dataset_id (str): The ID of the dataset - Raises: - ValueError: If either argument is not of type ``str``. - """ - - def __init__(self, instance_id, dataset_id): - '''if not isinstance(instance_id, six.string_types): - raise ValueError("Pass a string for instance_id") - if not isinstance(dataset_id, six.string_types): - raise ValueError("Pass a string for dataset_id")''' - self._instance_id = instance_id - self._dataset_id = dataset_id - - table = _get_table_reference - - @property - def instance_id(self): - """str: Project ID of the dataset.""" - return self._instance_id - - @property - def dataset_id(self): - """str: Dataset ID.""" - return self._dataset_id - - - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct a dataset reference given its API representation - Args: - resource (Dict[str, str]): - Dataset reference resource representation returned from the API - Returns: - cloud_spanner.dataset.DatasetReference: - Dataset reference parsed from ``resource``. - """ - instance_id = resource["instanceId"] - dataset_id = resource["datasetId"] - return cls(instance_id, dataset_id) - - @classmethod - def from_string(cls, dataset_id, default_instance_id=None): - """Construct a dataset reference from dataset ID string. - Args: - dataset_id (str): - A dataset ID in standard SQL format. If ``instance_id`` - is not specified, this must include both the instance ID and - the dataset ID, separated by ``.``. - default_instance_id (Optional[str]): - The instance ID to use when ``dataset_id`` does not include a - instance ID. - Returns: - DatasetReference: - Dataset reference parsed from ``dataset_id``. - - """ - output_dataset_id = dataset_id - output_instance_id = default_instance_id - parts = dataset_id.split(".") - parts = [part for part in parts if part] - - - if len(parts) == 1 and not default_instance_id: - raise ValueError( - "When instance is not set, dataset_id must be a " - "fully-qualified dataset ID in standard SQL format, " - 'e.g., "instance_id.dataset_id" got {}'.format(dataset_id) - ) - elif len(parts) == 2: - output_instance_id, output_dataset_id = parts - elif len(parts) > 2: - raise ValueError( - "Too many parts in dataset_id. Expected a fully-qualified " - "dataset ID in standard SQL format. e.g. " - '"instance _id.dataset_id", got {}'.format(dataset_id) - ) - - return cls(output_instance_id, output_dataset_id) - - def to_api_repr(self): - """Construct the API resource representation of this dataset reference - Returns: - Dict[str, str]: dataset reference represented as an API resource - """ - return {"instanceId": self._instance_id, "datasetId": self._dataset_id} - - def _key(self): - """A tuple key that uniquely describes this field. - Used to compute this instance's hashcode and evaluate equality. - Returns: - Tuple[str]: The contents of this :class:`.DatasetReference`. - """ - return (self._instance_id, self._dataset_id) - - def __eq__(self, other): - if not isinstance(other, DatasetReference): - return NotImplemented - return self._key() == other._key() - - def __ne__(self, other): - return not self == other - - def __hash__(self): - return hash(self._key()) - - def __repr__(self): - return "DatasetReference{}".format(self._key()) - diff --git a/third_party/ibis/ibis_cloud_spanner/datatypes.py b/third_party/ibis/ibis_cloud_spanner/datatypes.py index b69313a83..4f1c2e093 100644 --- a/third_party/ibis/ibis_cloud_spanner/datatypes.py +++ b/third_party/ibis/ibis_cloud_spanner/datatypes.py @@ -24,7 +24,7 @@ class TypeTranslationContext: __slots__ = () -ibis_type_to_cloud_spanner_type = Dispatcher('ibis_type_to_cloud_spanner_type') +ibis_type_to_cloud_spanner_type = Dispatcher("ibis_type_to_cloud_spanner_type") @ibis_type_to_cloud_spanner_type.register(str) @@ -44,55 +44,46 @@ def trans_string_context(datatype, context): @ibis_type_to_cloud_spanner_type.register(dt.Floating, TypeTranslationContext) def trans_float64(t, context): - return 'FLOAT64' + return "FLOAT64" @ibis_type_to_cloud_spanner_type.register(dt.Integer, TypeTranslationContext) def trans_integer(t, context): - return 'INT64' + return "INT64" @ibis_type_to_cloud_spanner_type.register(dt.Array, TypeTranslationContext) def trans_array(t, context): - return 'ARRAY<{}>'.format( - ibis_type_to_cloud_spanner_type(t.value_type, context) - ) - + return "ARRAY<{}>".format(ibis_type_to_cloud_spanner_type(t.value_type, context)) @ibis_type_to_cloud_spanner_type.register(dt.Date, TypeTranslationContext) def trans_date(t, context): - return 'DATE' + return "DATE" @ibis_type_to_cloud_spanner_type.register(dt.Timestamp, TypeTranslationContext) def trans_timestamp(t, context): - return 'TIMESTAMP' + return "TIMESTAMP" @ibis_type_to_cloud_spanner_type.register(dt.DataType, TypeTranslationContext) def trans_type(t, context): return str(t).upper() -@ibis_type_to_cloud_spanner_type.register( - dt.UInt64, TypeTranslationContext -) + +@ibis_type_to_cloud_spanner_type.register(dt.UInt64, TypeTranslationContext) def trans_lossy_integer(t, context): raise TypeError( - 'Conversion from uint64 to Cloud Spanner integer type (int64) is lossy' + "Conversion from uint64 to Cloud Spanner integer type (int64) is lossy" ) - @ibis_type_to_cloud_spanner_type.register(dt.Decimal, TypeTranslationContext) def trans_numeric(t, context): if (t.precision, t.scale) != (38, 9): raise TypeError( - 'Cloud Spanner only supports decimal types with precision of 38 and ' - 'scale of 9' + "Cloud Spanner only supports decimal types with precision of 38 and " + "scale of 9" ) - return 'NUMERIC' - - - - + return "NUMERIC" diff --git a/third_party/ibis/ibis_cloud_spanner/table.py b/third_party/ibis/ibis_cloud_spanner/table.py index cd8cbe3ef..68d8649eb 100644 --- a/third_party/ibis/ibis_cloud_spanner/table.py +++ b/third_party/ibis/ibis_cloud_spanner/table.py @@ -1,10 +1,10 @@ -# Copyright 2021 Google Inc. +# Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -12,343 +12,102 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import absolute_import -from google.api_core.page_iterator import HTTPIterator -from google.cloud.spanner_v1 import TypeCode -from third_party.ibis.ibis_cloud_spanner import dataset as dataset_class - -import copy -import datetime -import functools -import logging -import operator -import pytz -import warnings -import six -import google.api_core.exceptions -import google.cloud._helpers - -try: - import pandas -except ImportError: # pragma: NO COVER - pandas = None - -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None - +"""User friendly container for Cloud Spanner Table.""" +from google.cloud.exceptions import NotFound -code_to_spanner_dtype_dict = { - 1 : 'bool', - 2 : 'int64', - 3 : 'float64', - 4 : 'timestamp', - 5 : 'date', - 6 : 'string', - 7 : 'binary', - 8 : 'array', - 10 : 'decimal' -} +from google.cloud.spanner_v1 import Type +from google.cloud.spanner_v1 import TypeCode -def _parse_3_part_id(full_id, default_instance=None, property_name="table_id"): - output_instance_id = default_instance - output_dataset_id = None - output_resource_id = None - parts = full_id.split(".") - parts = [part for part in parts if part] - if len(parts) != 2 and len(parts) != 3: - raise ValueError( - "{property_name} must be a fully-qualified ID in " - 'standard SQL format, e.g., "instance.dataset.{property_name}", ' - "got {}".format(full_id, property_name=property_name) - ) - if len(parts) == 2 and not default_instance: - raise ValueError( - "When default_instance is not set, {property_name} must be a " - "fully-qualified ID in standard SQL format, " - 'e.g., "instance.dataset_id.{property_name}", got {}'.format( - full_id, property_name=property_name - ) - ) - if len(parts) == 2: - output_dataset_id, output_resource_id = parts - else: - output_instance_id, output_dataset_id, output_resource_id = parts - return output_instance_id, output_dataset_id, output_resource_id +_EXISTS_TEMPLATE = """ +SELECT EXISTS( + SELECT TABLE_NAME + FROM INFORMATION_SCHEMA.TABLES + WHERE TABLE_NAME = @table_id +) +""" +_GET_SCHEMA_TEMPLATE = "SELECT * FROM {} LIMIT 0" -class TableReference(object): - """TableReferences are pointers to tables. - Args: - dataset_ref (cloud_spanner.dataset.DatasetReference): - A pointer to the dataset - table_id (str): The ID of the table +class Table(object): + """Representation of a Cloud Spanner Table. + :type table_id: str + :param table_id: The ID of the table. + :type database: :class:`~google.cloud.spanner_v1.database.Database` + :param database: The database that owns the table. """ - def __init__(self, dataset_ref, table_id): - self._instance = dataset_ref.instance_id - self._dataset_id = dataset_ref.dataset_id + def __init__(self, table_id, database): self._table_id = table_id + self._database = database - @property - def instance(self): - """str: instance bound to the table""" - return self._instance - - @property - def dataset_id(self): - """str: ID of dataset containing the table.""" - return self._dataset_id + # Calculated properties. + self._schema = None @property def table_id(self): - """str: The table ID.""" - return self._table_id - - - # we use this in Tableclass.py so keep it - @classmethod - def from_string(cls, table_id, default_instance=None): - """Construct a table reference from table ID string. - Args: - table_id (str): - A table ID in standard SQL format. If ``instance`` - is not specified, this must included a instance ID, dataset - ID, and table ID, each separated by ``.``. - default_instance (Optional[str]): - The instance ID to use when ``table_id`` does not - include a instance ID. - Returns: - TableReference: Table reference parsed from ``table_id``. - Examples: - >>> TableReference.from_string('my-instance.mydataset.mytable') - TableRef...(DatasetRef...('my-instance', 'mydataset'), 'mytable') - Raises: - ValueError: - If ``table_id`` is not a fully-qualified table ID in - standard SQL format. + """The ID of the table used in SQL. + :rtype: str + :returns: The table ID. """ - - - - ( - output_instance_id, - output_dataset_id, - output_table_id, - ) = _parse_3_part_id( - table_id, default_instance=default_instance, property_name="table_id" - ) - - return cls( - dataset_class.DatasetReference(output_instance_id, output_dataset_id), output_table_id - ) + return self._table_id - @classmethod - def from_api_repr(cls, resource): - """Factory: construct a table reference given its API representation - Args: - resource (Dict[str, object]): - Table reference representation returned from the API - Returns: - cloud_spanner.table.TableReference: - Table reference parsed from ``resource``. + def exists(self): + """Test whether this table exists. + :rtype: bool + :returns: True if the table exists, else false. """ - - - instance = resource["instanceId"] - dataset_id = resource["datasetId"] - table_id = resource["tableId"] - return cls(dataset_class.DatasetReference(instance, dataset_id), table_id) - - def to_api_repr(self): - """Construct the API resource representation of this table reference. - Returns: - Dict[str, object]: Table reference represented as an API resource + with self._database.snapshot() as snapshot: + return self._exists(snapshot) + + def _exists(self, snapshot): + """Query to check that the table exists. + :type snapshot: :class:`~google.cloud.spanner_v1.snapshot.Snapshot` + :param snapshot: snapshot to use for database queries + :rtype: bool + :returns: True if the table exists, else false. """ - return { - "instanceId": self._instance, - "datasetId": self._dataset_id, - "tableId": self._table_id, - } - - - - def __eq__(self, other): - if not isinstance(other, TableReference): - return NotImplemented - return self._key() == other._key() - - def __ne__(self, other): - return not self == other - - def __hash__(self): - return hash(self._key()) - - def __repr__(self): - - dataset_ref = dataset_class.DatasetReference(self._instance, self._dataset_id) - return "TableReference({}, '{}')".format(repr(dataset_ref), self._table_id) - - - -def _table_arg_to_table_ref(value, default_instance=None): - """Helper to convert a string or Table to TableReference. - This function keeps TableReference and other kinds of objects unchanged. - """ - if isinstance(value, six.string_types): - value = TableReference.from_string(value, default_instance=default_instance) - - return value - -class Table(object): - """Tables represent a set of rows whose values correspond to a schema. - - Args: - table_ref (Union[cloud_spanner.table.TableReference, str]): - A pointer to a table. If ``table_ref`` is a string, it must - included a instance ID, dataset ID, and table ID, each separated - by ``.``. - schema (Optional): - The table's schema. If any item is a mapping, its content must be - compatible with - """ - - - def __init__(self, table_ref, schema=None): - table_ref = _table_arg_to_table_ref(table_ref) - self._properties = {"tableReference": table_ref.to_api_repr(), "labels": {}} - # Let the @property do validation. - if schema is not None: - self.schema = schema - - @property - def instance(self): - """str: instance bound to the table.""" - return self._properties["tableReference"]["instanceId"] - - @property - def dataset_id(self): - """str: ID of dataset containing the table.""" - return self._properties["tableReference"]["datasetId"] - - @property - def table_id(self): - """str: ID of the table.""" - return self._properties["tableReference"]["tableId"] - - - @property - def path(self): - """str: URL path for the table's APIs.""" - return "/instances/%s/datasets/%s/tables/%s" % ( - self.instance, - self.dataset_id, - self.table_id, + results = snapshot.execute_sql( + _EXISTS_TEMPLATE, + params={"table_id": self.table_id}, + param_types={"table_id": Type(code=TypeCode.STRING)}, ) - - + return next(iter(results))[0] @property def schema(self): - - records_list=[] - from google.cloud import spanner - spanner_client = spanner.Client() - instance_id = self.instance - instance = spanner_client.instance(instance_id) - db_id = self.dataset_id - database = instance.database(db_id) - with database.snapshot() as snapshot: - query="select * from {} limit 0".format(self.table_id) - results=snapshot.execute_sql(query) - for row in results: - records_list.append(row) - - schema_list=list(results.fields) - - final=[] - - for item in schema_list: - field_name = item.name - if(item.type_.code == TypeCode.ARRAY): - field_type = 'array<{}>'.format(code_to_spanner_dtype_dict[item.type_.array_element_type.code]) - else : - field_type = code_to_spanner_dtype_dict[item.type_.code] - final_item = ( field_name , field_type ) - - final.append(final_item) - - - return final - - @schema.setter - def schema(self, value): - if value is None: - self._properties["schema"] = None - else: - value = _to_schema_fields(value) - self._properties["schema"] = {"fields": _build_schema_resource(value)} - - - @classmethod - def from_string(cls, full_table_id): - """Construct a table from fully-qualified table ID. - Args: - full_table_id (str): - A fully-qualified table ID in standard SQL format. Must - included a instance ID, dataset ID, and table ID, each - separated by ``.``. - Returns: - Table: Table parsed from ``full_table_id``. - + """The schema of this table. + :rtype: list of :class:`~google.cloud.spanner_v1.types.StructType.Field` + :returns: The table schema. """ - return cls(TableReference.from_string(full_table_id)) - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct a table given its API representation - Args: - resource (Dict[str, object]): - Table resource representation from the API - Returns: - cloud_spanner.table.Table: Table parsed from ``resource``. - + if self._schema is None: + with self._database.snapshot() as snapshot: + self._schema = self._get_schema(snapshot) + return self._schema + + def _get_schema(self, snapshot): + """Get the schema of this table. + :type snapshot: :class:`~google.cloud.spanner_v1.snapshot.Snapshot` + :param snapshot: snapshot to use for database queries + :rtype: list of :class:`~google.cloud.spanner_v1.types.StructType.Field` + :returns: The table schema. """ - - - if ( - "tableReference" not in resource - or "tableId" not in resource["tableReference"] - ): - raise KeyError( - "Resource lacks required identity information:" - '["tableReference"]["tableId"]' - ) - instance_id = resource["tableReference"]["instanceId"] - table_id = resource["tableReference"]["tableId"] - dataset_id = resource["tableReference"]["datasetId"] - dataset_ref = dataset_class.DatasetReference(instance_id, dataset_id) - - table = cls(dataset_ref.table(table_id)) - table._properties = resource - - return table - - def to_api_repr(self): - """Constructs the API resource of this table - Returns: - Dict[str, object]: Table represented as an API resource + query = _GET_SCHEMA_TEMPLATE.format(self.table_id) + results = snapshot.execute_sql(query) + # Start iterating to force the schema to download. + try: + next(iter(results)) + except StopIteration: + pass + return list(results.fields) + + def reload(self): + """Reload this table. + Refresh any configured schema into :attr:`schema`. + :raises NotFound: if the table does not exist """ - return copy.deepcopy(self._properties) - - - - def __repr__(self): - d_ref = dataset_class.DatasetReference(self.instance, self.dataset_id) - return "Table({})".format(TableReference(d_ref, self.table_id)) - - - + with self._database.snapshot() as snapshot: + if not self._exists(snapshot): + raise NotFound("table '{}' does not exist".format(self.table_id)) + self._schema = self._get_schema(snapshot) diff --git a/third_party/ibis/ibis_cloud_spanner/tests/conftest.py b/third_party/ibis/ibis_cloud_spanner/tests/conftest.py index 8eb911a97..e30880b71 100644 --- a/third_party/ibis/ibis_cloud_spanner/tests/conftest.py +++ b/third_party/ibis/ibis_cloud_spanner/tests/conftest.py @@ -12,58 +12,53 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os - import pytest - -import ibis from third_party.ibis.ibis_cloud_spanner.api import connect as connect_to_cs -instance_id = 'cloud_spanner_instance_id' -database_id = 'cloud_spanner_databae_id' +instance_id = "cloud_spanner_instance_id" +database_id = "cloud_spanner_databae_id" def connect(instance_id, database_id): - return connect_to_cs(instance_id, database_id ) + return connect_to_cs(instance_id, database_id) -@pytest.fixture(scope='session') + +@pytest.fixture(scope="session") def inst_id(): return instance_id -@pytest.fixture(scope='session') + +@pytest.fixture(scope="session") def client(): return connect(instance_id, database_id) -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def client2(): return connect(instance_id, database_id) -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def alltypes(client): - return client.table('functional_alltypes') + return client.table("functional_alltypes") -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def df(alltypes): return alltypes.execute() -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def students(client): - return client.table('students_pointer') + return client.table("students_pointer") -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def students_df(students): return students.execute() -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def array_table(client): - return client.table('array_table') - - - + return client.table("array_table") diff --git a/third_party/ibis/ibis_cloud_spanner/tests/schema.sql b/third_party/ibis/ibis_cloud_spanner/tests/schema.sql index ab8501d3f..efcdca231 100644 --- a/third_party/ibis/ibis_cloud_spanner/tests/schema.sql +++ b/third_party/ibis/ibis_cloud_spanner/tests/schema.sql @@ -90,4 +90,4 @@ CREATE TABLE array_table ( ) PRIMARY KEY (id) INSERT into array_table (id,string_col,int_col) values (1,['Peter','David'],[11,12]) -INSERT into array_table_1 (id,string_col,int_col) values (2,['Raj','Dev','Neil'],[1,2,3]) +INSERT into array_table_1 (id,string_col,int_col) values (2,['Raj','Dev','Neil'],[1,2,3]) \ No newline at end of file diff --git a/third_party/ibis/ibis_cloud_spanner/tests/test_client.py b/third_party/ibis/ibis_cloud_spanner/tests/test_client.py index a65a0bb27..d66fb083f 100644 --- a/third_party/ibis/ibis_cloud_spanner/tests/test_client.py +++ b/third_party/ibis/ibis_cloud_spanner/tests/test_client.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import collections import datetime -import decimal - import numpy as np import pandas as pd import pandas.util.testing as tm @@ -32,42 +29,45 @@ from third_party.ibis.ibis_cloud_spanner.tests.conftest import connect + def test_table(alltypes): assert isinstance(alltypes, ir.TableExpr) def test_column_execute(alltypes, df): - col_name = 'float_col' + col_name = "float_col" expr = alltypes[col_name] result = expr.execute() expected = df[col_name] tm.assert_series_equal( - (result.sort_values(col_name).reset_index(drop=True)).iloc[:,0], + (result.sort_values(col_name).reset_index(drop=True)).iloc[:, 0], expected.sort_values().reset_index(drop=True), ) def test_literal_execute(client): - expected = '1234' + expected = "1234" expr = ibis.literal(expected) - result = (client.execute(expr)).iloc[0]['tmp'] + result = (client.execute(expr)).iloc[0]["tmp"] assert result == expected def test_simple_aggregate_execute(alltypes, df): - col_name = 'float_col' + col_name = "float_col" expr = alltypes[col_name].sum() result = expr.execute() expected = df[col_name].sum() - final_result = result.iloc[0]['sum'] + final_result = result.iloc[0]["sum"] assert final_result == expected + def test_list_tables(client): - tables = client.list_tables(like='functional_alltypes') - assert set(tables) == {'functional_alltypes'} + tables = client.list_tables(like="functional_alltypes") + assert set(tables) == {"functional_alltypes"} + def test_current_database(client): - assert client.current_database.name == 'spanner_dev_db' + assert client.current_database.name == "spanner_dev_db" assert client.current_database.name == client.dataset_id assert client.current_database.tables == client.list_tables() @@ -78,7 +78,7 @@ def test_database(client): def test_compile_toplevel(): - t = ibis.table([('foo', 'double')], name='t0') + t = ibis.table([("foo", "double")], name="t0") expr = t.foo.sum() result = third_party.ibis.ibis_cloud_spanner.compile(expr) @@ -90,38 +90,37 @@ def test_compile_toplevel(): def test_count_distinct_with_filter(alltypes): - expr = alltypes.float_col.nunique( - where=alltypes.float_col.cast('int64') > 1 - ) + expr = alltypes.float_col.nunique(where=alltypes.float_col.cast("int64") > 1) result = expr.execute() - result = result.iloc[:,0] + result = result.iloc[:, 0] result = result.iloc[0] expected = alltypes.float_col.execute() - expected = expected[expected.astype('int64') > 1].nunique() + expected = expected[expected.astype("int64") > 1].nunique() expected = expected.iloc[0] assert result == expected -@pytest.mark.parametrize('type', ['date', dt.date]) +@pytest.mark.parametrize("type", ["date", dt.date]) def test_cast_string_to_date(alltypes, df, type): import toolz - string_col = alltypes.date_string_col - month, day, year = toolz.take(3, string_col.split('/')) + string_col = alltypes.date_string_col + month, day, year = toolz.take(3, string_col.split("/")) - expr = ibis.literal('-').join([year, month, day]) + expr = ibis.literal("-").join([year, month, day]) expr = expr.cast(type) result = ( - expr.execute().iloc[:,0] - .astype('datetime64[ns]') + expr.execute() + .iloc[:, 0] + .astype("datetime64[ns]") .sort_values() .reset_index(drop=True) - .rename('date_string_col') + .rename("date_string_col") ) expected = ( - pd.to_datetime(df.date_string_col ) + pd.to_datetime(df.date_string_col) .dt.normalize() .sort_values() .reset_index(drop=True) @@ -131,16 +130,16 @@ def test_cast_string_to_date(alltypes, df, type): def test_subquery_scalar_params(alltypes): t = alltypes - param = ibis.param('timestamp').name('my_param') + param = ibis.param("timestamp").name("my_param") expr = ( - t[['float_col', 'timestamp_col', 'int_col', 'string_col']][ + t[["float_col", "timestamp_col", "int_col", "string_col"]][ lambda t: t.timestamp_col < param ] - .groupby('string_col') + .groupby("string_col") .aggregate(foo=lambda t: t.float_col.sum()) .foo.count() ) - result = cs_compile.compile(expr,params={param: '20140101'}) + result = cs_compile.compile(expr, params={param: "20140101"}) expected = """\ SELECT count(`foo`) AS `count` FROM ( @@ -154,131 +153,124 @@ def test_subquery_scalar_params(alltypes): ) t0""" assert result == expected + def test_scalar_param_string(alltypes, df): - param = ibis.param('string') + param = ibis.param("string") expr = alltypes[alltypes.string_col == param] - string_value = 'David' + string_value = "David" result = ( expr.execute(params={param: string_value}) - .sort_values('id') + .sort_values("id") .reset_index(drop=True) ) expected = ( - df.loc[df.string_col == string_value] - .sort_values('id') - .reset_index(drop=True) + df.loc[df.string_col == string_value].sort_values("id").reset_index(drop=True) ) tm.assert_frame_equal(result, expected) def test_scalar_param_int64(alltypes, df): - param = ibis.param('int64') + param = ibis.param("int64") expr = alltypes[alltypes.int_col == param] int64_value = 22 result = ( expr.execute(params={param: int64_value}) - .sort_values('id') + .sort_values("id") .reset_index(drop=True) ) expected = ( - df.loc[df.int_col == int64_value] - .sort_values('id') - .reset_index(drop=True) + df.loc[df.int_col == int64_value].sort_values("id").reset_index(drop=True) ) tm.assert_frame_equal(result, expected) def test_scalar_param_double(alltypes, df): - param = ibis.param('double') + param = ibis.param("double") expr = alltypes[alltypes.double_col == param] double_value = 2.5 result = ( expr.execute(params={param: double_value}) - .sort_values('id') + .sort_values("id") .reset_index(drop=True) ) expected = ( - df.loc[df.double_col == double_value] - .sort_values('id') - .reset_index(drop=True) + df.loc[df.double_col == double_value].sort_values("id").reset_index(drop=True) ) tm.assert_frame_equal(result, expected) def test_scalar_param_boolean(alltypes, df): - param = ibis.param('boolean') + param = ibis.param("boolean") expr = alltypes[(alltypes.bool_col == param)] bool_value = True result = ( expr.execute(params={param: bool_value}) - .sort_values('id') + .sort_values("id") .reset_index(drop=True) ) expected = ( - df.loc[df.bool_col == bool_value] - .sort_values('id') - .reset_index(drop=True) + df.loc[df.bool_col == bool_value].sort_values("id").reset_index(drop=True) ) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( - 'timestamp_value', - [ - '2019-04-12 12:09:00+00:00' - ], + "timestamp_value", + ["2019-04-12 12:09:00+00:00"], ) def test_scalar_param_timestamp(alltypes, df, timestamp_value): - param = ibis.param('timestamp') - expr = (alltypes[alltypes.timestamp_col <= param]).select(['timestamp_col']) + param = ibis.param("timestamp") + expr = (alltypes[alltypes.timestamp_col <= param]).select(["timestamp_col"]) result = ( expr.execute(params={param: timestamp_value}) - .sort_values('timestamp_col') + .sort_values("timestamp_col") .reset_index(drop=True) ) value = pd.Timestamp(timestamp_value) expected = ( - df.loc[df.timestamp_col <= value, ['timestamp_col']] - .sort_values('timestamp_col') + df.loc[df.timestamp_col <= value, ["timestamp_col"]] + .sort_values("timestamp_col") .reset_index(drop=True) ) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( - 'date_value', - ['2009-02-12', datetime.date(2009, 2, 12), datetime.datetime(2009, 2, 12)], + "date_value", + ["2009-02-12", datetime.date(2009, 2, 12), datetime.datetime(2009, 2, 12)], ) def test_scalar_param_date(alltypes, df, date_value): - param = ibis.param('date') - expr = alltypes[alltypes.timestamp_col.cast('date') <= param] + param = ibis.param("date") + expr = alltypes[alltypes.timestamp_col.cast("date") <= param] result = ( expr.execute(params={param: date_value}) - .sort_values('timestamp_col') + .sort_values("timestamp_col") .reset_index(drop=True) ) value = pd.Timestamp(date_value) - value = pd.to_datetime(value).tz_localize('UTC') + value = pd.to_datetime(value).tz_localize("UTC") expected = ( df.loc[df.timestamp_col.dt.normalize() <= value] - .sort_values('timestamp_col') + .sort_values("timestamp_col") .reset_index(drop=True) ) tm.assert_frame_equal(result, expected) + def test_raw_sql(client): - assert (client.raw_sql('SELECT 1')).iloc[0][0] == 1 + assert (client.raw_sql("SELECT 1")).iloc[0][0] == 1 def test_scalar_param_scope(alltypes): t = alltypes - param = ibis.param('timestamp') - mut = t.mutate(param=param).compile(params={param: '2017-01-01'}) + param = ibis.param("timestamp") + mut = t.mutate(param=param).compile(params={param: "2017-01-01"}) assert ( mut == """\ @@ -288,91 +280,99 @@ def test_scalar_param_scope(alltypes): def test_column_names(alltypes): - assert 'bigint_col' in alltypes.columns - assert 'string_col' in alltypes.columns + assert "bigint_col" in alltypes.columns + assert "string_col" in alltypes.columns + def test_column_names_in_schema(alltypes): - assert 'int_col' in alltypes.schema() + assert "int_col" in alltypes.schema() + def test_exists_table(client): - assert client.exists_table('functional_alltypes') - assert not client.exists_table('footable') + assert client.exists_table("functional_alltypes") + assert not client.exists_table("footable") def test_exists_database(client): - assert client.exists_database('spanner_dev_db') - assert not client.exists_database('foodataset') + assert client.exists_database("spanner_dev_db") + assert not client.exists_database("foodataset") def test_set_database(client2): - client2.set_database('demo-db') + client2.set_database("demo-db") tables = client2.list_tables() - assert 'awards' in tables + assert "awards" in tables def test_exists_table_different_project(client): - name = 'functional_alltypes' + name = "functional_alltypes" assert client.exists_table(name) - assert not client.exists_table('foobar') + assert not client.exists_table("foobar") + @pytest.mark.parametrize( - ('name', 'expected'), + ("name", "expected"), [ - ('spanner_dev_db', True), - ('database_one', False), + ("spanner_dev_db", True), + ("database_one", False), ], ) -def test_exists_database(client, name, expected): +def test_exists_databases(client, name, expected): assert client.exists_database(name) is expected + def test_repeated_project_name(inst_id): - con = connect(inst_id,"spanner_dev_db") - assert 'functional_alltypes' in con.list_tables() + con = connect(inst_id, "spanner_dev_db") + assert "functional_alltypes" in con.list_tables() + def test_large_timestamp(client): - huge_timestamp = datetime.datetime(2012,10,10,10,10,10,154117) - expr = ibis.timestamp('2012-10-10 10:10:10.154117') + huge_timestamp = datetime.datetime(2012, 10, 10, 10, 10, 10, 154117) + expr = ibis.timestamp("2012-10-10 10:10:10.154117") result = client.execute(expr) - huge_timestamp = (pd.to_datetime(huge_timestamp).tz_localize('UTC')).date() - result = (result['tmp'][0]).date() + huge_timestamp = (pd.to_datetime(huge_timestamp).tz_localize("UTC")).date() + result = (result["tmp"][0]).date() assert result == huge_timestamp + def test_string_to_timestamp(client): timestamp = pd.Timestamp( - datetime.datetime(year=2017, month=2, day=6), tz=pytz.timezone('UTC') + datetime.datetime(year=2017, month=2, day=6), tz=pytz.timezone("UTC") ) - expr = ibis.literal('2017-02-06').to_timestamp('%F') + expr = ibis.literal("2017-02-06").to_timestamp("%F") result = client.execute(expr) - result = result.iloc[:,0][0] + result = result.iloc[:, 0][0] result = result.date() timestamp = timestamp.date() assert result == timestamp timestamp_tz = pd.Timestamp( datetime.datetime(year=2017, month=2, day=6, hour=5), - tz=pytz.timezone('UTC'), + tz=pytz.timezone("UTC"), ) - expr_tz = ibis.literal('2017-02-06').to_timestamp('%F', 'America/New_York') + expr_tz = ibis.literal("2017-02-06").to_timestamp("%F", "America/New_York") result_tz = client.execute(expr_tz) - result_tz = result_tz.iloc[:,0][0] + result_tz = result_tz.iloc[:, 0][0] result_tz = result_tz.date() timestamp_tz = timestamp_tz.date() assert result_tz == timestamp_tz + def test_client_sql_query(client): - expr = client.get_data_using_query('select * from functional_alltypes limit 20') + expr = client.get_data_using_query("select * from functional_alltypes limit 20") result = expr - expected = client.table('functional_alltypes').head(20).execute() + expected = client.table("functional_alltypes").head(20).execute() tm.assert_frame_equal(result, expected) + def test_prevent_rewrite(alltypes): t = alltypes expr = ( t.groupby(t.string_col) .aggregate(collected_double=t.double_col.collect()) .pipe(ibis.prevent_rewrite) - .filter(lambda t: t.string_col != 'wat') + .filter(lambda t: t.string_col != "wat") ) result = cs_compile.compile(expr) expected = """\ @@ -387,58 +387,69 @@ def test_prevent_rewrite(alltypes): @pytest.mark.parametrize( - ('case', 'dtype'), + ("case", "dtype"), [ (datetime.date(2017, 1, 1), dt.date), - (pd.Timestamp('2017-01-01'), dt.date), - ('2017-01-01', dt.date), + (pd.Timestamp("2017-01-01"), dt.date), + ("2017-01-01", dt.date), (datetime.datetime(2017, 1, 1, 4, 55, 59), dt.timestamp), - ('2017-01-01 04:55:59', dt.timestamp), - (pd.Timestamp('2017-01-01 04:55:59'), dt.timestamp), + ("2017-01-01 04:55:59", dt.timestamp), + (pd.Timestamp("2017-01-01 04:55:59"), dt.timestamp), ], ) def test_day_of_week(client, case, dtype): date_var = ibis.literal(case, type=dtype) expr_index = date_var.day_of_week.index() result = client.execute(expr_index) - result = result['tmp'][0] + result = result["tmp"][0] assert result == 6 expr_name = date_var.day_of_week.full_name() result = client.execute(expr_name) - result = result['tmp'][0] - assert result == 'Sunday' + result = result["tmp"][0] + assert result == "Sunday" + def test_boolean_reducers(alltypes): b = alltypes.bool_col bool_avg = b.mean().execute() - bool_avg = bool_avg.iloc[:,0] + bool_avg = bool_avg.iloc[:, 0] bool_avg = bool_avg[0] assert type(bool_avg) == np.float64 bool_sum = b.sum().execute() - bool_sum = bool_sum.iloc[:,0] + bool_sum = bool_sum.iloc[:, 0] bool_sum = bool_sum[0] assert type(bool_sum) == np.int64 + def test_students_table_schema(students): assert students.schema() == ibis.schema( - [('id', dt.int64), ('name', dt.string), ('division',dt.int64), ('marks',dt.int64), ('exam', dt.string), ('overall_pointer',dt.float64), ('date_of_exam',dt.timestamp)] + [ + ("id", dt.int64), + ("name", dt.string), + ("division", dt.int64), + ("marks", dt.int64), + ("exam", dt.string), + ("overall_pointer", dt.float64), + ("date_of_exam", dt.timestamp), + ] ) + def test_numeric_sum(students): t = students expr = t.overall_pointer.sum() result = expr.execute() - result = (result.iloc[:,0])[0] + result = (result.iloc[:, 0])[0] assert isinstance(result, np.float64) def test_boolean_casting(alltypes): t = alltypes - expr = t.groupby(k=t.string_col.nullif('1') == '9').count() - result = expr.execute().set_index('k') - count = result['count'] + expr = t.groupby(k=t.string_col.nullif("1") == "9").count() + result = expr.execute().set_index("k") + count = result["count"] assert count.loc[False] == 6 @@ -452,27 +463,25 @@ def test_approx_median(alltypes): def test_struct_field_access(array_table): expr = array_table.string_col result = expr.execute() - result = result.iloc[:,0] + result = result.iloc[:, 0] expected = pd.Series( - [ - ['Peter', 'David'], - ['Raj', 'Dev', 'Neil'] - ], - name='string_col', + [["Peter", "David"], ["Raj", "Dev", "Neil"]], + name="string_col", ) - + tm.assert_series_equal(result, expected) + def test_array_index(array_table): expr = array_table.string_col[1] result = expr.execute() - result = result.iloc[:,0] + result = result.iloc[:, 0] expected = pd.Series( [ - 'David', - 'Dev', + "David", + "Dev", ], - name='tmp', + name="tmp", ) tm.assert_series_equal(result, expected) @@ -481,13 +490,13 @@ def test_array_concat(array_table): c = array_table.string_col expr = c + c result = expr.execute() - result = result.iloc[:,0] + result = result.iloc[:, 0] expected = pd.Series( [ - ['Peter', 'David', 'Peter', 'David'], - ['Raj', 'Dev', 'Neil', 'Raj', 'Dev', 'Neil'] + ["Peter", "David", "Peter", "David"], + ["Raj", "Dev", "Neil", "Raj", "Dev", "Neil"], ], - name='tmp', + name="tmp", ) tm.assert_series_equal(result, expected) @@ -495,18 +504,14 @@ def test_array_concat(array_table): def test_array_length(array_table): expr = array_table.string_col.length() result = expr.execute() - result = result.iloc[:,0] - expected = pd.Series([2, 3], name='tmp') + result = result.iloc[:, 0] + expected = pd.Series([2, 3], name="tmp") tm.assert_series_equal(result, expected) def test_scalar_param_array(alltypes, df, client): - expr = alltypes.sort_by('id').limit(1).double_col.collect() + expr = alltypes.sort_by("id").limit(1).double_col.collect() result = client.get_data_using_query(cs_compile.compile(expr)) - result = result['tmp'][0] - expected = [df.sort_values('id').double_col.iat[0]] + result = result["tmp"][0] + expected = [df.sort_values("id").double_col.iat[0]] assert result == expected - - - - diff --git a/third_party/ibis/ibis_cloud_spanner/tests/test_compiler.py b/third_party/ibis/ibis_cloud_spanner/tests/test_compiler.py index b95a2efc5..f1be898c0 100644 --- a/third_party/ibis/ibis_cloud_spanner/tests/test_compiler.py +++ b/third_party/ibis/ibis_cloud_spanner/tests/test_compiler.py @@ -26,9 +26,10 @@ pytestmark = pytest.mark.cloud_spanner + def test_timestamp_accepts_date_literals(alltypes): - date_string = '2009-03-01' - param = ibis.param(dt.timestamp).name('param_0') + date_string = "2009-03-01" + param = ibis.param(dt.timestamp).name("param_0") expr = alltypes.mutate(param=param) params = {param: date_string} result = expr.compile(params=params) @@ -37,8 +38,9 @@ def test_timestamp_accepts_date_literals(alltypes): FROM functional_alltypes""" assert result == expected + @pytest.mark.parametrize( - ('distinct', 'expected_keyword'), [(True, 'DISTINCT'), (False, 'ALL')] + ("distinct", "expected_keyword"), [(True, "DISTINCT"), (False, "ALL")] ) def test_union(alltypes, distinct, expected_keyword): expr = alltypes.union(alltypes, distinct=distinct) @@ -61,10 +63,9 @@ def test_ieee_divide(alltypes): assert result == expected - def test_identical_to(alltypes): t = alltypes - pred = t.string_col.identical_to('a') & t.date_string_col.identical_to('b') + pred = t.string_col.identical_to("a") & t.date_string_col.identical_to("b") expr = t[pred] result = cs_compile.compile(expr) expected = f"""\ @@ -75,10 +76,9 @@ def test_identical_to(alltypes): assert result == expected - -@pytest.mark.parametrize('timezone', [None, 'America/New_York']) +@pytest.mark.parametrize("timezone", [None, "America/New_York"]) def test_to_timestamp(alltypes, timezone): - expr = alltypes.date_string_col.to_timestamp('%F', timezone) + expr = alltypes.date_string_col.to_timestamp("%F", timezone) result = cs_compile.compile(expr) if timezone: expected = f"""\ @@ -91,25 +91,28 @@ def test_to_timestamp(alltypes, timezone): assert result == expected - @pytest.mark.parametrize( - ('case', 'expected', 'dtype'), + ("case", "expected", "dtype"), [ (datetime.date(2017, 1, 1), "DATE '2017-01-01'", dt.date), - (pd.Timestamp('2017-01-01'), "DATE '2017-01-01'", dt.date,), - ('2017-01-01', "DATE '2017-01-01'", dt.date), + ( + pd.Timestamp("2017-01-01"), + "DATE '2017-01-01'", + dt.date, + ), + ("2017-01-01", "DATE '2017-01-01'", dt.date), ( datetime.datetime(2017, 1, 1, 4, 55, 59), "TIMESTAMP '2017-01-01 04:55:59'", dt.timestamp, ), ( - '2017-01-01 04:55:59', + "2017-01-01 04:55:59", "TIMESTAMP '2017-01-01 04:55:59'", dt.timestamp, ), ( - pd.Timestamp('2017-01-01 04:55:59'), + pd.Timestamp("2017-01-01 04:55:59"), "TIMESTAMP '2017-01-01 04:55:59'", dt.timestamp, ), @@ -122,38 +125,43 @@ def test_literal_date(case, expected, dtype): @pytest.mark.parametrize( - ('case', 'expected', 'dtype', 'strftime_func'), + ("case", "expected", "dtype", "strftime_func"), [ ( datetime.date(2017, 1, 1), "DATE '2017-01-01'", dt.date, - 'FORMAT_DATE', + "FORMAT_DATE", + ), + ( + pd.Timestamp("2017-01-01"), + "DATE '2017-01-01'", + dt.date, + "FORMAT_DATE", ), ( - pd.Timestamp('2017-01-01'), + "2017-01-01", "DATE '2017-01-01'", dt.date, - 'FORMAT_DATE', + "FORMAT_DATE", ), - ('2017-01-01', "DATE '2017-01-01'", dt.date, 'FORMAT_DATE',), ( datetime.datetime(2017, 1, 1, 4, 55, 59), "TIMESTAMP '2017-01-01 04:55:59'", dt.timestamp, - 'FORMAT_TIMESTAMP', + "FORMAT_TIMESTAMP", ), ( - '2017-01-01 04:55:59', + "2017-01-01 04:55:59", "TIMESTAMP '2017-01-01 04:55:59'", dt.timestamp, - 'FORMAT_TIMESTAMP', + "FORMAT_TIMESTAMP", ), ( - pd.Timestamp('2017-01-01 04:55:59'), + pd.Timestamp("2017-01-01 04:55:59"), "TIMESTAMP '2017-01-01 04:55:59'", dt.timestamp, - 'FORMAT_TIMESTAMP', + "FORMAT_TIMESTAMP", ), ], ) @@ -161,24 +169,18 @@ def test_day_of_week(case, expected, dtype, strftime_func): date_var = ibis.literal(case, type=dtype) expr_index = date_var.day_of_week.index() result = cs_compile.compile(expr_index) - assert ( - result - == f"SELECT MOD(EXTRACT(DAYOFWEEK FROM {expected}) + 5, 7) AS `tmp`" - ) + assert result == f"SELECT MOD(EXTRACT(DAYOFWEEK FROM {expected}) + 5, 7) AS `tmp`" expr_name = date_var.day_of_week.full_name() result = cs_compile.compile(expr_name) - if strftime_func == 'FORMAT_TIMESTAMP': - assert ( - result - == f"SELECT {strftime_func}('%A', {expected}, 'UTC') AS `tmp`" - ) + if strftime_func == "FORMAT_TIMESTAMP": + assert result == f"SELECT {strftime_func}('%A', {expected}, 'UTC') AS `tmp`" else: assert result == f"SELECT {strftime_func}('%A', {expected}) AS `tmp`" @pytest.mark.parametrize( - ('case', 'expected', 'dtype'), + ("case", "expected", "dtype"), [ ( datetime.datetime(2017, 1, 1, 4, 55, 59), @@ -186,17 +188,17 @@ def test_day_of_week(case, expected, dtype, strftime_func): dt.timestamp, ), ( - '2017-01-01 04:55:59', + "2017-01-01 04:55:59", "TIMESTAMP '2017-01-01 04:55:59'", dt.timestamp, ), ( - pd.Timestamp('2017-01-01 04:55:59'), + pd.Timestamp("2017-01-01 04:55:59"), "TIMESTAMP '2017-01-01 04:55:59'", dt.timestamp, ), (datetime.time(4, 55, 59), "TIME '04:55:59'", dt.time), - ('04:55:59', "TIME '04:55:59'", dt.time), + ("04:55:59", "TIME '04:55:59'", dt.time), ], ) def test_literal_timestamp_or_time(case, expected, dtype): @@ -208,7 +210,7 @@ def test_literal_timestamp_or_time(case, expected, dtype): def test_window_function(alltypes): t = alltypes w1 = ibis.window( - preceding=1, following=0, group_by='year', order_by='timestamp_col' + preceding=1, following=0, group_by="year", order_by="timestamp_col" ) expr = t.mutate(win_avg=t.float_col.mean().over(w1)) result = cs_compile.compile(expr) @@ -219,7 +221,7 @@ def test_window_function(alltypes): assert result == expected w2 = ibis.window( - preceding=0, following=2, group_by='year', order_by='timestamp_col' + preceding=0, following=2, group_by="year", order_by="timestamp_col" ) expr = t.mutate(win_avg=t.float_col.mean().over(w2)) result = cs_compile.compile(expr) @@ -229,9 +231,7 @@ def test_window_function(alltypes): FROM functional_alltypes""" # noqa: E501 assert result == expected - w3 = ibis.window( - preceding=(4, 2), group_by='year', order_by='timestamp_col' - ) + w3 = ibis.window(preceding=(4, 2), group_by="year", order_by="timestamp_col") expr = t.mutate(win_avg=t.float_col.mean().over(w3)) result = cs_compile.compile(expr) expected = f"""\ @@ -243,9 +243,7 @@ def test_window_function(alltypes): def test_range_window_function(alltypes): t = alltypes - w = ibis.range_window( - preceding=1, following=0, group_by='year', order_by='month' - ) + w = ibis.range_window(preceding=1, following=0, group_by="year", order_by="month") expr = t.mutate(two_month_avg=t.float_col.mean().over(w)) result = cs_compile.compile(expr) expected = f"""\ @@ -254,9 +252,7 @@ def test_range_window_function(alltypes): FROM functional_alltypes""" # noqa: E501 assert result == expected - w3 = ibis.range_window( - preceding=(4, 2), group_by='year', order_by='timestamp_col' - ) + w3 = ibis.range_window(preceding=(4, 2), group_by="year", order_by="timestamp_col") expr = t.mutate(win_avg=t.float_col.mean().over(w3)) result = cs_compile.compile(expr) expected = f"""\ @@ -267,7 +263,7 @@ def test_range_window_function(alltypes): @pytest.mark.parametrize( - ('preceding', 'value'), + ("preceding", "value"), [ (5, 5), (ibis.interval(nanoseconds=1), 0.001), @@ -282,9 +278,7 @@ def test_range_window_function(alltypes): ) def test_trailing_range_window(alltypes, preceding, value): t = alltypes - w = ibis.trailing_range_window( - preceding=preceding, order_by=t.timestamp_col - ) + w = ibis.trailing_range_window(preceding=preceding, order_by=t.timestamp_col) expr = t.mutate(win_avg=t.float_col.mean().over(w)) result = cs_compile.compile(expr) expected = f"""\ @@ -294,38 +288,30 @@ def test_trailing_range_window(alltypes, preceding, value): assert result == expected -@pytest.mark.parametrize( - ('preceding', 'value'), [(ibis.interval(years=1), None)] -) +@pytest.mark.parametrize(("preceding", "value"), [(ibis.interval(years=1), None)]) def test_trailing_range_window_unsupported(alltypes, preceding, value): t = alltypes - w = ibis.trailing_range_window( - preceding=preceding, order_by=t.timestamp_col - ) + w = ibis.trailing_range_window(preceding=preceding, order_by=t.timestamp_col) expr = t.mutate(win_avg=t.float_col.mean().over(w)) with pytest.raises(ValueError): cs_compile.compile(expr) @pytest.mark.parametrize( - ('distinct1', 'distinct2', 'expected1', 'expected2'), + ("distinct1", "distinct2", "expected1", "expected2"), [ - (True, True, 'UNION DISTINCT', 'UNION DISTINCT'), - (True, False, 'UNION DISTINCT', 'UNION ALL'), - (False, True, 'UNION ALL', 'UNION DISTINCT'), - (False, False, 'UNION ALL', 'UNION ALL'), + (True, True, "UNION DISTINCT", "UNION DISTINCT"), + (True, False, "UNION DISTINCT", "UNION ALL"), + (False, True, "UNION ALL", "UNION DISTINCT"), + (False, False, "UNION ALL", "UNION ALL"), ], ) -def test_union_cte( - alltypes, distinct1, distinct2, expected1, expected2 -): +def test_union_cte(alltypes, distinct1, distinct2, expected1, expected2): t = alltypes expr1 = t.group_by(t.string_col).aggregate(metric=t.double_col.sum()) expr2 = expr1.view() expr3 = expr1.view() - expr = expr1.union(expr2, distinct=distinct1).union( - expr3, distinct=distinct2 - ) + expr = expr1.union(expr2, distinct=distinct1).union(expr3, distinct=distinct2) result = cs_compile.compile(expr) expected = f"""\ WITH t0 AS ( @@ -348,14 +334,14 @@ def test_union_cte( def test_projection_fusion_only_peeks_at_immediate_parent(): schema = [ - ('file_date', 'timestamp'), - ('PARTITIONTIME', 'date'), - ('val', 'int64'), + ("file_date", "timestamp"), + ("PARTITIONTIME", "date"), + ("val", "int64"), ] - table = ibis.table(schema, name='unbound_table') - table = table[table.PARTITIONTIME < ibis.date('2017-01-01')] - table = table.mutate(file_date=table.file_date.cast('date')) - table = table[table.file_date < ibis.date('2017-01-01')] + table = ibis.table(schema, name="unbound_table") + table = table[table.PARTITIONTIME < ibis.date("2017-01-01")] + table = table.mutate(file_date=table.file_date.cast("date")) + table = table[table.file_date < ibis.date("2017-01-01")] table = table.mutate(XYZ=table.val * 2) expr = table.join(table.view())[table] result = cs_compile.compile(expr) @@ -510,7 +496,7 @@ def test_cov(alltypes): FROM functional_alltypes""" assert result == expected - expr = d.cov(d, how='pop') + expr = d.cov(d, how="pop") result = cs_compile.compile(expr) expected = f"""\ SELECT @@ -564,32 +550,32 @@ def test_cov(alltypes): @pytest.mark.parametrize( - ('unit', 'expected_unit', 'expected_func'), + ("unit", "expected_unit", "expected_func"), [ - ('Y', 'YEAR', 'TIMESTAMP'), - ('Q', 'QUARTER', 'TIMESTAMP'), - ('M', 'MONTH', 'TIMESTAMP'), - ('W', 'WEEK', 'TIMESTAMP'), - ('D', 'DAY', 'TIMESTAMP'), - ('h', 'HOUR', 'TIMESTAMP'), - ('m', 'MINUTE', 'TIMESTAMP'), - ('s', 'SECOND', 'TIMESTAMP'), - ('ms', 'MILLISECOND', 'TIMESTAMP'), - ('us', 'MICROSECOND', 'TIMESTAMP'), - ('Y', 'YEAR', 'DATE'), - ('Q', 'QUARTER', 'DATE'), - ('M', 'MONTH', 'DATE'), - ('W', 'WEEK', 'DATE'), - ('D', 'DAY', 'DATE'), - ('h', 'HOUR', 'TIME'), - ('m', 'MINUTE', 'TIME'), - ('s', 'SECOND', 'TIME'), - ('ms', 'MILLISECOND', 'TIME'), - ('us', 'MICROSECOND', 'TIME'), + ("Y", "YEAR", "TIMESTAMP"), + ("Q", "QUARTER", "TIMESTAMP"), + ("M", "MONTH", "TIMESTAMP"), + ("W", "WEEK", "TIMESTAMP"), + ("D", "DAY", "TIMESTAMP"), + ("h", "HOUR", "TIMESTAMP"), + ("m", "MINUTE", "TIMESTAMP"), + ("s", "SECOND", "TIMESTAMP"), + ("ms", "MILLISECOND", "TIMESTAMP"), + ("us", "MICROSECOND", "TIMESTAMP"), + ("Y", "YEAR", "DATE"), + ("Q", "QUARTER", "DATE"), + ("M", "MONTH", "DATE"), + ("W", "WEEK", "DATE"), + ("D", "DAY", "DATE"), + ("h", "HOUR", "TIME"), + ("m", "MINUTE", "TIME"), + ("s", "SECOND", "TIME"), + ("ms", "MILLISECOND", "TIME"), + ("us", "MICROSECOND", "TIME"), ], ) def test_temporal_truncate(unit, expected_unit, expected_func): - t = ibis.table([('a', getattr(dt, expected_func.lower()))], name='t') + t = ibis.table([("a", getattr(dt, expected_func.lower()))], name="t") expr = t.a.truncate(unit) result = cs_compile.compile(expr) expected = f"""\ @@ -598,9 +584,9 @@ def test_temporal_truncate(unit, expected_unit, expected_func): assert result == expected -@pytest.mark.parametrize('kind', ['date', 'time']) +@pytest.mark.parametrize("kind", ["date", "time"]) def test_extract_temporal_from_timestamp(kind): - t = ibis.table([('ts', dt.timestamp)], name='t') + t = ibis.table([("ts", dt.timestamp)], name="t") expr = getattr(t.ts, kind)() result = cs_compile.compile(expr) expected = f"""\ @@ -612,14 +598,14 @@ def test_extract_temporal_from_timestamp(kind): def test_now(): expr = ibis.now() result = cs_compile.compile(expr) - expected = 'SELECT CURRENT_TIMESTAMP() AS `tmp`' + expected = "SELECT CURRENT_TIMESTAMP() AS `tmp`" assert result == expected def test_bucket(): - t = ibis.table([('value', 'double')], name='t') + t = ibis.table([("value", "double")], name="t") buckets = [0, 1, 3] - expr = t.value.bucket(buckets).name('foo') + expr = t.value.bucket(buckets).name("foo") result = cs_compile.compile(expr) expected = """\ SELECT @@ -633,14 +619,14 @@ def test_bucket(): @pytest.mark.parametrize( - ('kind', 'begin', 'end', 'expected'), + ("kind", "begin", "end", "expected"), [ - ('preceding', None, 1, 'UNBOUNDED PRECEDING AND 1 PRECEDING'), - ('following', 1, None, '1 FOLLOWING AND UNBOUNDED FOLLOWING'), + ("preceding", None, 1, "UNBOUNDED PRECEDING AND 1 PRECEDING"), + ("following", 1, None, "1 FOLLOWING AND UNBOUNDED FOLLOWING"), ], ) def test_window_unbounded(kind, begin, end, expected): - t = ibis.table([('a', 'int64')], name='t') + t = ibis.table([("a", "int64")], name="t") kwargs = {kind: (begin, end)} expr = t.a.sum().over(ibis.window(**kwargs)) result = cs_compile.compile(expr) @@ -665,11 +651,9 @@ def __init__(self): pass names = [f"col_{i}" for i in range(num_columns)] - schema = ibis.Schema(names, ['string'] * num_columns) + schema = ibis.Schema(names, ["string"] * num_columns) ibis_client = MockCloudSpannerClient() - table = TableExpr( - ops.SQLQueryResult("select * from t", schema, ibis_client) - ) + table = TableExpr(ops.SQLQueryResult("select * from t", schema, ibis_client)) for _ in range(num_joins): table = table.mutate(dummy=ibis.literal("")) table = table.left_join(table, ["dummy"])[[table]] @@ -678,4 +662,3 @@ def __init__(self): cs_compile.compile(table) delta = datetime.datetime.now() - start assert delta.total_seconds() < 10 - diff --git a/third_party/ibis/ibis_cloud_spanner/tests/test_datatypes.py b/third_party/ibis/ibis_cloud_spanner/tests/test_datatypes.py index fcdfe25ad..6bdff60b9 100644 --- a/third_party/ibis/ibis_cloud_spanner/tests/test_datatypes.py +++ b/third_party/ibis/ibis_cloud_spanner/tests/test_datatypes.py @@ -19,44 +19,46 @@ import ibis.expr.datatypes as dt from third_party.ibis.ibis_cloud_spanner.datatypes import ( TypeTranslationContext, - ibis_type_to_cloud_spanner_type + ibis_type_to_cloud_spanner_type, ) pytestmark = pytest.mark.cloud_spanner + def test_no_ambiguities(): ambs = ambiguities(ibis_type_to_cloud_spanner_type.funcs) assert not ambs @pytest.mark.parametrize( - ('datatype', 'expected'), + ("datatype", "expected"), [ - (dt.float32, 'FLOAT64'), - (dt.float64, 'FLOAT64'), - (dt.uint8, 'INT64'), - (dt.uint16, 'INT64'), - (dt.uint32, 'INT64'), - (dt.int8, 'INT64'), - (dt.int16, 'INT64'), - (dt.int32, 'INT64'), - (dt.int64, 'INT64'), - (dt.string, 'STRING'), - (dt.Array(dt.int64), 'ARRAY'), - (dt.Array(dt.string), 'ARRAY'), - (dt.date, 'DATE'), - (dt.timestamp, 'TIMESTAMP'), + (dt.float32, "FLOAT64"), + (dt.float64, "FLOAT64"), + (dt.uint8, "INT64"), + (dt.uint16, "INT64"), + (dt.uint32, "INT64"), + (dt.int8, "INT64"), + (dt.int16, "INT64"), + (dt.int32, "INT64"), + (dt.int64, "INT64"), + (dt.string, "STRING"), + (dt.Array(dt.int64), "ARRAY"), + (dt.Array(dt.string), "ARRAY"), + (dt.date, "DATE"), + (dt.timestamp, "TIMESTAMP"), param( - dt.Timestamp(timezone='US/Eastern'), - 'TIMESTAMP', - ) - ] + dt.Timestamp(timezone="US/Eastern"), + "TIMESTAMP", + ), + ], ) def test_simple(datatype, expected): context = TypeTranslationContext() assert ibis_type_to_cloud_spanner_type(datatype, context) == expected -@pytest.mark.parametrize('datatype', [dt.uint64, dt.Decimal(8, 3)]) + +@pytest.mark.parametrize("datatype", [dt.uint64, dt.Decimal(8, 3)]) def test_simple_failure_mode(datatype): with pytest.raises(TypeError): ibis_type_to_cloud_spanner_type(datatype) diff --git a/third_party/ibis/ibis_cloud_spanner/to_pandas.py b/third_party/ibis/ibis_cloud_spanner/to_pandas.py index 35501440f..902f88334 100644 --- a/third_party/ibis/ibis_cloud_spanner/to_pandas.py +++ b/third_party/ibis/ibis_cloud_spanner/to_pandas.py @@ -13,70 +13,55 @@ # limitations under the License. from pandas import DataFrame -from google.cloud import spanner -code_to_spanner_dtype_dict = { - 1 : 'BOOL', - 2 : 'INT64', - 3 : 'FLOAT64', - 4 : 'TIMESTAMP', - 5 : 'DATE', - 6 : 'STRING', - 7 : 'BYTES', - 8 : 'ARRAY', - 10 : 'NUMERIC' -} - -class pandas_df(): +class pandas_df: def to_pandas(snapshot, sql, query_parameters): - - if(query_parameters): - param={} - param_type={} - for i in query_parameters: - param.update(i['params']) - param_type.update(i['param_types']) + if query_parameters: + param = {} + param_type = {} + for i in query_parameters: + param.update(i["params"]) + param_type.update(i["param_types"]) - data_qry=snapshot.execute_sql(sql, params=param, param_types=param_type) + data_qry = snapshot.execute_sql(sql, params=param, param_types=param_type) else: - data_qry=snapshot.execute_sql(sql) + data_qry = snapshot.execute_sql(sql) - data=[] + data = [] for row in data_qry: data.append(row) - - columns_dict={} - - for item in data_qry.fields : - columns_dict[item.name]=code_to_spanner_dtype_dict[item.type_.code] + columns_dict = {} - #Creating list of columns to be mapped with the data - column_list=[k for k,v in columns_dict.items()] + for item in data_qry.fields: + columns_dict[item.name] = item.type_.code.name - #Creating pandas dataframe from data and columns_list - df = DataFrame(data,columns=column_list) + # Creating list of columns to be mapped with the data + column_list = [k for k, v in columns_dict.items()] + # Creating pandas dataframe from data and columns_list + df = DataFrame(data, columns=column_list) - #Mapping dictionary to map every spanner datatype to a pandas compatible datatype - mapping_dict={ - 'INT64':'int64', - 'STRING':'object', - 'BOOL':'bool', - 'BYTES':'object', - 'ARRAY':'object', - 'DATE':'datetime64[ns, UTC]', - 'FLOAT64':'float64', - 'NUMERIC':'object', - 'TIMESTAMP':'datetime64[ns, UTC]' + # Dictionary to map spanner datatype to a pandas compatible datatype + SPANNER_TO_PANDAS_DTYPE = { + "INT64": "int64", + "STRING": "object", + "BOOL": "bool", + "BYTES": "object", + "ARRAY": "object", + "DATE": "datetime64[ns, UTC]", + "FLOAT64": "float64", + "NUMERIC": "object", + "TIMESTAMP": "datetime64[ns, UTC]", } - for k,v in columns_dict.items() : + + for k, v in columns_dict.items(): try: - df[k]= df[k].astype(mapping_dict[v]) + df[k] = df[k].astype(SPANNER_TO_PANDAS_DTYPE[v]) except KeyError: - print("Spanner Datatype not present in datatype mapping dictionary") - + print("Spanner Datatype is not present in datatype mapping dictionary") + return df From ad1782a5e1b5ecfe43210a0dc2919fc04258c969 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 26 Feb 2021 16:49:12 -0600 Subject: [PATCH 06/11] update tests and add to cloudbuild --- cloudbuild.yaml | 7 + noxfile.py | 17 ++ requirements.txt | 1 + third_party/ibis/ibis_cloud_spanner/api.py | 5 +- .../ibis/ibis_cloud_spanner/compiler.py | 7 +- .../ibis/ibis_cloud_spanner/tests/conftest.py | 102 ++++++++- .../ibis/ibis_cloud_spanner/tests/ddl.sql | 54 +++++ .../ibis/ibis_cloud_spanner/tests/ddl2.sql | 20 ++ .../ibis/ibis_cloud_spanner/tests/dml.sql | 78 +++++++ .../ibis/ibis_cloud_spanner/tests/dml2.sql | 23 ++ .../ibis/ibis_cloud_spanner/tests/schema.sql | 93 -------- .../ibis_cloud_spanner/tests/test_client.py | 46 ++-- .../ibis_cloud_spanner/tests/test_compiler.py | 206 +----------------- .../tests/test_datatypes.py | 5 +- 14 files changed, 317 insertions(+), 347 deletions(-) create mode 100644 third_party/ibis/ibis_cloud_spanner/tests/ddl.sql create mode 100644 third_party/ibis/ibis_cloud_spanner/tests/ddl2.sql create mode 100644 third_party/ibis/ibis_cloud_spanner/tests/dml.sql create mode 100644 third_party/ibis/ibis_cloud_spanner/tests/dml2.sql delete mode 100644 third_party/ibis/ibis_cloud_spanner/tests/schema.sql diff --git a/cloudbuild.yaml b/cloudbuild.yaml index acb9367a0..fe18a65ab 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -41,3 +41,10 @@ steps: - 'NOX_SESSION=integration_bigquery' - 'PROJECT_ID=pso-kokoro-resources' waitFor: ['-'] +- id: integration_spanner + name: 'gcr.io/pso-kokoro-resources/python-multi' + args: ['bash', './ci/build.sh'] + env: + - 'NOX_SESSION=integration_spanner' + - 'PROJECT_ID=pso-kokoro-resources' + waitFor: ['-'] diff --git a/noxfile.py b/noxfile.py index e8dd0ec1c..0327cc0ff 100644 --- a/noxfile.py +++ b/noxfile.py @@ -137,3 +137,20 @@ def integration_bigquery(session): raise Exception("Expected Env Var: %s" % env_var) session.run("pytest", test_path, *session.posargs) + + +@nox.session(python=PYTHON_VERSIONS, venv_backend="venv") +def integration_spanner(session): + """Run Spanner integration tests. + Ensure Spanner validation is running as expected. + """ + _setup_session_requirements(session, extra_packages=[]) + + expected_env_vars = ["PROJECT_ID"] + for env_var in expected_env_vars: + if not os.environ.get(env_var, ""): + raise Exception("Expected Env Var: %s" % env_var) + + # TODO: Add tests for DVT data sources. See integration_bigquery. + session.run("pytest", "third_party/ibis/ibis_cloud_spanner/tests", *session.posargs) + diff --git a/requirements.txt b/requirements.txt index b24beb0ea..aa6804968 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,5 +16,6 @@ pyarrow==3.0.0 pydata-google-auth==1.1.0 google-cloud-bigquery==2.7.0 google-cloud-bigquery-storage==2.2.1 +google-cloud-spanner==3.1.0 setuptools>=34.0.0 jellyfish==0.8.2 diff --git a/third_party/ibis/ibis_cloud_spanner/api.py b/third_party/ibis/ibis_cloud_spanner/api.py index 52a9d078e..a09bbeb75 100644 --- a/third_party/ibis/ibis_cloud_spanner/api.py +++ b/third_party/ibis/ibis_cloud_spanner/api.py @@ -51,10 +51,7 @@ def verify(expr, params=None): return False -def connect( - instance_id, - database_id, -) -> CloudSpannerClient: +def connect(instance_id, database_id,) -> CloudSpannerClient: """Create a CloudSpannerClient for use with Ibis. Parameters diff --git a/third_party/ibis/ibis_cloud_spanner/compiler.py b/third_party/ibis/ibis_cloud_spanner/compiler.py index 1d492e701..2f0044a6d 100644 --- a/third_party/ibis/ibis_cloud_spanner/compiler.py +++ b/third_party/ibis/ibis_cloud_spanner/compiler.py @@ -13,7 +13,7 @@ # limitations under the License. import ibis.expr.operations as ops -from ibis.bigquery import compiler as bigquery_compiler +from ibis.backends.bigquery import compiler as bigquery_compiler def build_ast(expr, context): @@ -46,10 +46,7 @@ def _regex_extract(translator, expr): _operation_registry = bigquery_compiler._operation_registry.copy() _operation_registry.update( - { - ops.RegexExtract: _regex_extract, - ops.ArrayIndex: _array_index, - } + {ops.RegexExtract: _regex_extract, ops.ArrayIndex: _array_index,} ) diff --git a/third_party/ibis/ibis_cloud_spanner/tests/conftest.py b/third_party/ibis/ibis_cloud_spanner/tests/conftest.py index e30880b71..052e6f55e 100644 --- a/third_party/ibis/ibis_cloud_spanner/tests/conftest.py +++ b/third_party/ibis/ibis_cloud_spanner/tests/conftest.py @@ -1,4 +1,4 @@ -# Copyright 2021 Google Inc. +# Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,30 +12,112 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime +import random +import pathlib + +from google.cloud import spanner_v1 import pytest -from third_party.ibis.ibis_cloud_spanner.api import connect as connect_to_cs +from third_party.ibis.ibis_cloud_spanner.api import connect + + +DATA_DIR = pathlib.Path(__file__).parent + +RANDOM_MAX = 0xFFFFFFFF +INSTANCE_ID_TEMPLATE = "data-validation-tool-{timestamp}" +DATABASE_ID_TEMPLATE = "db_{timestamp}_{randint}" + + +def load_sql(filename): + lines = [] + with open(DATA_DIR / filename) as sql_file: + for line in sql_file: + if line.startswith("--"): + continue + lines.append(line) + return [ + statement.strip() + for statement in "".join(lines).split(";") + if statement.strip() + ] + + +def insert_rows(transaction): + dml_statements = load_sql("dml.sql") + for dml in dml_statements: + transaction.execute_update(dml) -instance_id = "cloud_spanner_instance_id" -database_id = "cloud_spanner_databae_id" +def insert_rows2(transaction): + dml_statements = load_sql("dml2.sql") + for dml in dml_statements: + transaction.execute_update(dml) -def connect(instance_id, database_id): - return connect_to_cs(instance_id, database_id) +@pytest.fixture(scope="session") +def spanner_client(): + return spanner_v1.Client() + + +@pytest.fixture(scope="session") +def instance_id(spanner_client): + config_name = "{}/instanceConfigs/regional-us-central1".format( + spanner_client.project_name + ) + instance_id = INSTANCE_ID_TEMPLATE.format( + timestamp=datetime.datetime.utcnow().strftime("%Y%m%d-%H%M%S") + ) + instance = spanner_client.instance( + instance_id, + configuration_name=config_name, + display_name="Test for Data Validation Tool", + node_count=1, + ) + operation = instance.create() + operation.result() + yield instance_id + instance.delete() + + +@pytest.fixture(scope="session") +def database_id(spanner_client, instance_id): + database_id = DATABASE_ID_TEMPLATE.format( + timestamp=datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S"), + randint=random.randint(0, RANDOM_MAX), + ) + ddl_statements = load_sql("ddl.sql") + instance = spanner_client.instance(instance_id) + database = instance.database(database_id, ddl_statements=ddl_statements) + operation = database.create() + operation.result() + database.run_in_transaction(insert_rows) + yield database_id + database.drop() @pytest.fixture(scope="session") -def inst_id(): - return instance_id +def database_id2(spanner_client, instance_id): + database_id = DATABASE_ID_TEMPLATE.format( + timestamp=datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S"), + randint=random.randint(0, RANDOM_MAX), + ) + ddl_statements = load_sql("ddl2.sql") + instance = spanner_client.instance(instance_id) + database = instance.database(database_id, ddl_statements=ddl_statements) + operation = database.create() + operation.result() + database.run_in_transaction(insert_rows2) + yield database_id + database.drop() @pytest.fixture(scope="session") -def client(): +def client(instance_id, database_id): return connect(instance_id, database_id) @pytest.fixture(scope="session") -def client2(): +def client2(instance_id, database_id): return connect(instance_id, database_id) diff --git a/third_party/ibis/ibis_cloud_spanner/tests/ddl.sql b/third_party/ibis/ibis_cloud_spanner/tests/ddl.sql new file mode 100644 index 000000000..a6be8533e --- /dev/null +++ b/third_party/ibis/ibis_cloud_spanner/tests/ddl.sql @@ -0,0 +1,54 @@ +-- Copyright 2021 Google LLC +-- +-- Licensed under the Apache License, Version 2.0 (the "License"); +-- you may not use this file except in compliance with the License. +-- You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. + +CREATE TABLE students_pointer +( + id INT64, + name STRING(30), + division INT64, + marks INT64, + exam STRING(30), + overall_pointer FLOAT64, + date_of_exam TIMESTAMP +) +PRIMARY KEY (id); + +CREATE TABLE functional_alltypes +( + id INT64, + bigint_col INT64, + bool_col BOOL, + date DATE, + date_string_col STRING(MAX), + double_col NUMERIC, + float_col NUMERIC, + index INT64, + int_col INT64, + month INT64, + smallint_col INT64, + string_col STRING(MAX), + timestamp_col TIMESTAMP, + tinyint_col INT64, + Unnamed0 INT64, + year INT64 +) +PRIMARY KEY (id); + +CREATE TABLE array_table +( + string_col ARRAY, + int_col ARRAY, + id INT64, +) +PRIMARY KEY (id); diff --git a/third_party/ibis/ibis_cloud_spanner/tests/ddl2.sql b/third_party/ibis/ibis_cloud_spanner/tests/ddl2.sql new file mode 100644 index 000000000..ca90d1dea --- /dev/null +++ b/third_party/ibis/ibis_cloud_spanner/tests/ddl2.sql @@ -0,0 +1,20 @@ +-- Copyright 2021 Google LLC +-- +-- Licensed under the Apache License, Version 2.0 (the "License"); +-- you may not use this file except in compliance with the License. +-- You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. + +CREATE TABLE awards +( + id INT64, + award_name STRING(20) +) +PRIMARY KEY (id); diff --git a/third_party/ibis/ibis_cloud_spanner/tests/dml.sql b/third_party/ibis/ibis_cloud_spanner/tests/dml.sql new file mode 100644 index 000000000..5849f7dac --- /dev/null +++ b/third_party/ibis/ibis_cloud_spanner/tests/dml.sql @@ -0,0 +1,78 @@ +-- Copyright 2021 Google LLC +-- +-- Licensed under the Apache License, Version 2.0 (the "License"); +-- you may not use this file except in compliance with the License. +-- You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. + +INSERT INTO students_pointer + (id,name,division,marks,exam,overall_pointer,date_of_exam) +VALUES(101, 'Ross', 12, 500, 'Biology', 9.8, '2002-02-10 15:30:00+00'); + +INSERT INTO students_pointer + (id,name,division,marks,exam,overall_pointer,date_of_exam) +VALUES(102, 'Rachel', 14, 460, 'Chemistry', 9.9, '2018-04-22'); + +INSERT INTO students_pointer + (id,name,division,marks,exam,overall_pointer,date_of_exam) +VALUES(103, 'Chandler', 12, 480, 'Biology', 8.2, '2016-04-14'); + +INSERT INTO students_pointer + (id,name,division,marks,exam,overall_pointer,date_of_exam) +VALUES(104, 'Monica', 12, 390, 'Maths', 9.2, '2019-04-29'); + +INSERT INTO students_pointer + (id,name,division,marks,exam,overall_pointer,date_of_exam) +VALUES(105, 'Joey', 16, 410, 'Maths', 9.7, '2019-06-21'); + +INSERT INTO students_pointer + (id,name,division,marks,exam,overall_pointer,date_of_exam) +VALUES(106, 'Phoebe', 10, 490, 'Chemistry', 9.6, '2019-02-09'); + + +INSERT INTO functional_alltypes + (id ,bigint_col ,bool_col ,date ,date_string_col ,double_col ,float_col ,index ,int_col ,month ,smallint_col ,string_col ,timestamp_col ,tinyint_col ,Unnamed0 ,year ) +VALUES + (1, 10001, TRUE, '2016-02-09', '01/01/2001', 2.5, 12.16, 101, 21, 4, 16, 'David', '2002-02-10 15:30:00+00', 6, 99, 2010); + +INSERT INTO functional_alltypes + (id ,bigint_col ,bool_col ,date ,date_string_col ,double_col ,float_col ,index ,int_col ,month ,smallint_col ,string_col ,timestamp_col ,tinyint_col ,Unnamed0 ,year ) +VALUES + (2, 10002, FALSE, '2016-10-10', '02/02/2002', 2.6, 13.16, 102, 22, 5, 18, 'Ryan', '2009-02-12 10:06:00+00', 7, 98, 2012); + +INSERT INTO functional_alltypes + (id ,bigint_col ,bool_col ,date ,date_string_col ,double_col ,float_col ,index ,int_col ,month ,smallint_col ,string_col ,timestamp_col ,tinyint_col ,Unnamed0 ,year ) +VALUES + (3, 10003, TRUE, '2018-02-09', '03/03/2003', 9.5, 44.16, 201, 41, 6, 56, 'Steve', '2010-06-10 12:12:00+00', 12, 66, 2006); + +INSERT INTO functional_alltypes + (id ,bigint_col ,bool_col ,date ,date_string_col ,double_col ,float_col ,index ,int_col ,month ,smallint_col ,string_col ,timestamp_col ,tinyint_col ,Unnamed0 ,year ) +VALUES + (4, 10004, TRUE, '2018-10-10', '04/04/2004', 9.6, 45.16, 202, 42, 9, 58, 'Chandler', '2014-06-12 10:04:00+00', 14, 69, 2009); + +INSERT INTO functional_alltypes + (id ,bigint_col ,bool_col ,date ,date_string_col ,double_col ,float_col ,index ,int_col ,month ,smallint_col ,string_col ,timestamp_col ,tinyint_col ,Unnamed0 ,year ) +VALUES + (5, 10005, FALSE, '2020-06-12', '05/05/2005', 6.6, 66.12, 401, 62, 12, 98, 'Rose', '2018-02-10 10:06:00+00', 16, 96, 2012); + +INSERT INTO functional_alltypes + (id ,bigint_col ,bool_col ,date ,date_string_col ,double_col ,float_col ,index ,int_col ,month ,smallint_col ,string_col ,timestamp_col ,tinyint_col ,Unnamed0 ,year ) +VALUES + (6, 10006, TRUE, '2020-12-12', '06/06/2006', 6.9, 66.19, 402, 69, 14, 99, 'Rachel', '2019-04-12 12:09:00+00', 18, 99, 2014); + +INSERT INTO array_table + (id,string_col,int_col) +VALUES + (1, ['Peter','David'], [11,12]); + +INSERT INTO array_table + (id,string_col,int_col) +VALUES + (2, ['Raj','Dev','Neil'], [1,2,3]); diff --git a/third_party/ibis/ibis_cloud_spanner/tests/dml2.sql b/third_party/ibis/ibis_cloud_spanner/tests/dml2.sql new file mode 100644 index 000000000..c5a097562 --- /dev/null +++ b/third_party/ibis/ibis_cloud_spanner/tests/dml2.sql @@ -0,0 +1,23 @@ +-- Copyright 2021 Google LLC +-- +-- Licensed under the Apache License, Version 2.0 (the "License"); +-- you may not use this file except in compliance with the License. +-- You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. + +INSERT INTO awards + (id,award_name) +VALUES + (101, 'LOTUS'); + +INSERT INTO awards + (id,award_name) +VALUES + (102, 'ROSE'); diff --git a/third_party/ibis/ibis_cloud_spanner/tests/schema.sql b/third_party/ibis/ibis_cloud_spanner/tests/schema.sql deleted file mode 100644 index efcdca231..000000000 --- a/third_party/ibis/ibis_cloud_spanner/tests/schema.sql +++ /dev/null @@ -1,93 +0,0 @@ -CREATE TABLE students_pointer - ( - id INT64, - name STRING(30), - division INT64, - marks INT64, - exam STRING(30), - overall_pointer FLOAT64, - date_of_exam TIMESTAMP - )PRIMARY KEY(id); - -INSERT into students_pointer(id,name,division,marks,exam,overall_pointer,date_of_exam) values(101,'Ross',12,500,'Biology',9.8,'2002-02-10 15:30:00+00'); - -INSERT into students_pointer(id,name,division,marks,exam,overall_pointer,date_of_exam) values(102,'Rachel',14,460,'Chemistry',9.9,'2018-04-22'); - -INSERT into students_pointer(id,name,division,marks,exam,overall_pointer,date_of_exam) values(103,'Chandler',12,480,'Biology',8.2,'2016-04-14'); - -INSERT into students_pointer(id,name,division,marks,exam,overall_pointer,date_of_exam) values(104,'Monica',12,390,'Maths',9.2,'2019-04-29'); - -INSERT into students_pointer(id,name,division,marks,exam,overall_pointer,date_of_exam) values(105,'Joey',16,410,'Maths',9.7,'2019-06-21'); - -INSERT into students_pointer(id,name,division,marks,exam,overall_pointer,date_of_exam) values(106,'Phoebe',10,490,'Chemistry',9.6,'2019-02-09'); - - - - - - -CREATE TABLE awards - ( - id INT64, - award_name STRING(20) - )PRIMARY KEY(id); - -Insert into awards (id,award_name) values (101,'LOTUS') -Insert into awards (id,award_name) values (102,'ROSE') - - - - - - -CREATE TABLE functional_alltypes ( - id INT64, - bigint_col INT64, - bool_col BOOL, - date DATE, - date_string_col STRING(MAX), - double_col NUMERIC, - float_col NUMERIC, - index INT64, - int_col INT64, - month INT64, - smallint_col INT64, - string_col STRING(MAX), - timestamp_col TIMESTAMP, - tinyint_col INT64, - Unnamed0 INT64, - year INT64 -) PRIMARY KEY (id) - -INSERT into functional_alltypes (id ,bigint_col ,bool_col ,date ,date_string_col ,double_col ,float_col ,index ,int_col ,month ,smallint_col ,string_col ,timestamp_col ,tinyint_col ,Unnamed0 ,year ) values -(1,10001,TRUE,'2016-02-09','01/01/2001',2.5,12.16,101,21,4,16,'David','2002-02-10 15:30:00+00',6,99,2010) - -INSERT into functional_alltypes (id ,bigint_col ,bool_col ,date ,date_string_col ,double_col ,float_col ,index ,int_col ,month ,smallint_col ,string_col ,timestamp_col ,tinyint_col ,Unnamed0 ,year ) values -(2,10002,FALSE,'2016-10-10','02/02/2002',2.6,13.16,102,22,5,18,'Ryan','2009-02-12 10:06:00+00',7,98,2012) - -INSERT into functional_alltypes (id ,bigint_col ,bool_col ,date ,date_string_col ,double_col ,float_col ,index ,int_col ,month ,smallint_col ,string_col ,timestamp_col ,tinyint_col ,Unnamed0 ,year ) values -(3,10003,TRUE,'2018-02-09','03/03/2003',9.5,44.16,201,41,6,56,'Steve','2010-06-10 12:12:00+00',12,66,2006) - -INSERT into functional_alltypes (id ,bigint_col ,bool_col ,date ,date_string_col ,double_col ,float_col ,index ,int_col ,month ,smallint_col ,string_col ,timestamp_col ,tinyint_col ,Unnamed0 ,year ) values -(4,10004,TRUE,'2018-10-10','04/04/2004',9.6,45.16,202,42,9,58,'Chandler','2014-06-12 10:04:00+00',14,69,2009) - - -INSERT into functional_alltypes (id ,bigint_col ,bool_col ,date ,date_string_col ,double_col ,float_col ,index ,int_col ,month ,smallint_col ,string_col ,timestamp_col ,tinyint_col ,Unnamed0 ,year ) values -(5,10005,FALSE,'2020-06-12','05/05/2005',6.6,66.12,401,62,12,98,'Rose','2018-02-10 10:06:00+00',16,96,2012) - -INSERT into functional_alltypes (id ,bigint_col ,bool_col ,date ,date_string_col ,double_col ,float_col ,index ,int_col ,month ,smallint_col ,string_col ,timestamp_col ,tinyint_col ,Unnamed0 ,year ) values -(6,10006,TRUE,'2020-12-12','06/06/2006',6.9,66.19,402,69,14,99,'Rachel','2019-04-12 12:09:00+00',18,99,2014) - - - - - - -CREATE TABLE array_table ( - string_col ARRAY, - int_col ARRAY, - id INT64, -) PRIMARY KEY (id) - -INSERT into array_table (id,string_col,int_col) values (1,['Peter','David'],[11,12]) -INSERT into array_table_1 (id,string_col,int_col) values (2,['Raj','Dev','Neil'],[1,2,3]) \ No newline at end of file diff --git a/third_party/ibis/ibis_cloud_spanner/tests/test_client.py b/third_party/ibis/ibis_cloud_spanner/tests/test_client.py index d66fb083f..32ca523df 100644 --- a/third_party/ibis/ibis_cloud_spanner/tests/test_client.py +++ b/third_party/ibis/ibis_cloud_spanner/tests/test_client.py @@ -66,8 +66,8 @@ def test_list_tables(client): assert set(tables) == {"functional_alltypes"} -def test_current_database(client): - assert client.current_database.name == "spanner_dev_db" +def test_current_database(client, database_id): + assert client.current_database.name == database_id assert client.current_database.name == client.dataset_id assert client.current_database.tables == client.list_tables() @@ -81,7 +81,7 @@ def test_compile_toplevel(): t = ibis.table([("foo", "double")], name="t0") expr = t.foo.sum() - result = third_party.ibis.ibis_cloud_spanner.compile(expr) + result = cs_compile.compile(expr) expected = """\ SELECT sum(`foo`) AS `sum` @@ -219,8 +219,7 @@ def test_scalar_param_boolean(alltypes, df): @pytest.mark.parametrize( - "timestamp_value", - ["2019-04-12 12:09:00+00:00"], + "timestamp_value", ["2019-04-12 12:09:00+00:00"], ) def test_scalar_param_timestamp(alltypes, df, timestamp_value): param = ibis.param("timestamp") @@ -293,13 +292,13 @@ def test_exists_table(client): assert not client.exists_table("footable") -def test_exists_database(client): - assert client.exists_database("spanner_dev_db") +def test_exists_database(client, database_id): + assert client.exists_database(database_id) assert not client.exists_database("foodataset") -def test_set_database(client2): - client2.set_database("demo-db") +def test_set_database(client2, database_id2): + client2.set_database(database_id2) tables = client2.list_tables() assert "awards" in tables @@ -310,19 +309,8 @@ def test_exists_table_different_project(client): assert not client.exists_table("foobar") -@pytest.mark.parametrize( - ("name", "expected"), - [ - ("spanner_dev_db", True), - ("database_one", False), - ], -) -def test_exists_databases(client, name, expected): - assert client.exists_database(name) is expected - - -def test_repeated_project_name(inst_id): - con = connect(inst_id, "spanner_dev_db") +def test_repeated_project_name(instance_id, database_id): + con = connect(instance_id, database_id) assert "functional_alltypes" in con.list_tables() @@ -348,8 +336,7 @@ def test_string_to_timestamp(client): assert result == timestamp timestamp_tz = pd.Timestamp( - datetime.datetime(year=2017, month=2, day=6, hour=5), - tz=pytz.timezone("UTC"), + datetime.datetime(year=2017, month=2, day=6, hour=5), tz=pytz.timezone("UTC"), ) expr_tz = ibis.literal("2017-02-06").to_timestamp("%F", "America/New_York") result_tz = client.execute(expr_tz) @@ -465,8 +452,7 @@ def test_struct_field_access(array_table): result = expr.execute() result = result.iloc[:, 0] expected = pd.Series( - [["Peter", "David"], ["Raj", "Dev", "Neil"]], - name="string_col", + [["Peter", "David"], ["Raj", "Dev", "Neil"]], name="string_col", ) tm.assert_series_equal(result, expected) @@ -476,13 +462,7 @@ def test_array_index(array_table): expr = array_table.string_col[1] result = expr.execute() result = result.iloc[:, 0] - expected = pd.Series( - [ - "David", - "Dev", - ], - name="tmp", - ) + expected = pd.Series(["David", "Dev",], name="tmp",) tm.assert_series_equal(result, expected) diff --git a/third_party/ibis/ibis_cloud_spanner/tests/test_compiler.py b/third_party/ibis/ibis_cloud_spanner/tests/test_compiler.py index f1be898c0..acd89e0f7 100644 --- a/third_party/ibis/ibis_cloud_spanner/tests/test_compiler.py +++ b/third_party/ibis/ibis_cloud_spanner/tests/test_compiler.py @@ -95,22 +95,14 @@ def test_to_timestamp(alltypes, timezone): ("case", "expected", "dtype"), [ (datetime.date(2017, 1, 1), "DATE '2017-01-01'", dt.date), - ( - pd.Timestamp("2017-01-01"), - "DATE '2017-01-01'", - dt.date, - ), + (pd.Timestamp("2017-01-01"), "DATE '2017-01-01'", dt.date,), ("2017-01-01", "DATE '2017-01-01'", dt.date), ( datetime.datetime(2017, 1, 1, 4, 55, 59), "TIMESTAMP '2017-01-01 04:55:59'", dt.timestamp, ), - ( - "2017-01-01 04:55:59", - "TIMESTAMP '2017-01-01 04:55:59'", - dt.timestamp, - ), + ("2017-01-01 04:55:59", "TIMESTAMP '2017-01-01 04:55:59'", dt.timestamp,), ( pd.Timestamp("2017-01-01 04:55:59"), "TIMESTAMP '2017-01-01 04:55:59'", @@ -127,24 +119,9 @@ def test_literal_date(case, expected, dtype): @pytest.mark.parametrize( ("case", "expected", "dtype", "strftime_func"), [ - ( - datetime.date(2017, 1, 1), - "DATE '2017-01-01'", - dt.date, - "FORMAT_DATE", - ), - ( - pd.Timestamp("2017-01-01"), - "DATE '2017-01-01'", - dt.date, - "FORMAT_DATE", - ), - ( - "2017-01-01", - "DATE '2017-01-01'", - dt.date, - "FORMAT_DATE", - ), + (datetime.date(2017, 1, 1), "DATE '2017-01-01'", dt.date, "FORMAT_DATE",), + (pd.Timestamp("2017-01-01"), "DATE '2017-01-01'", dt.date, "FORMAT_DATE",), + ("2017-01-01", "DATE '2017-01-01'", dt.date, "FORMAT_DATE",), ( datetime.datetime(2017, 1, 1, 4, 55, 59), "TIMESTAMP '2017-01-01 04:55:59'", @@ -187,11 +164,7 @@ def test_day_of_week(case, expected, dtype, strftime_func): "TIMESTAMP '2017-01-01 04:55:59'", dt.timestamp, ), - ( - "2017-01-01 04:55:59", - "TIMESTAMP '2017-01-01 04:55:59'", - dt.timestamp, - ), + ("2017-01-01 04:55:59", "TIMESTAMP '2017-01-01 04:55:59'", dt.timestamp,), ( pd.Timestamp("2017-01-01 04:55:59"), "TIMESTAMP '2017-01-01 04:55:59'", @@ -241,62 +214,6 @@ def test_window_function(alltypes): assert result == expected -def test_range_window_function(alltypes): - t = alltypes - w = ibis.range_window(preceding=1, following=0, group_by="year", order_by="month") - expr = t.mutate(two_month_avg=t.float_col.mean().over(w)) - result = cs_compile.compile(expr) - expected = f"""\ -SELECT *, - avg(`float_col`) OVER (PARTITION BY `year` ORDER BY `month` RANGE BETWEEN 1 PRECEDING AND CURRENT ROW) AS `two_month_avg` -FROM functional_alltypes""" # noqa: E501 - assert result == expected - - w3 = ibis.range_window(preceding=(4, 2), group_by="year", order_by="timestamp_col") - expr = t.mutate(win_avg=t.float_col.mean().over(w3)) - result = cs_compile.compile(expr) - expected = f"""\ -SELECT *, - avg(`float_col`) OVER (PARTITION BY `year` ORDER BY UNIX_MICROS(`timestamp_col`) RANGE BETWEEN 4 PRECEDING AND 2 PRECEDING) AS `win_avg` -FROM functional_alltypes""" # noqa: E501 - assert result == expected - - -@pytest.mark.parametrize( - ("preceding", "value"), - [ - (5, 5), - (ibis.interval(nanoseconds=1), 0.001), - (ibis.interval(microseconds=1), 1), - (ibis.interval(seconds=1), 1000000), - (ibis.interval(minutes=1), 1000000 * 60), - (ibis.interval(hours=1), 1000000 * 60 * 60), - (ibis.interval(days=1), 1000000 * 60 * 60 * 24), - (2 * ibis.interval(days=1), 1000000 * 60 * 60 * 24 * 2), - (ibis.interval(weeks=1), 1000000 * 60 * 60 * 24 * 7), - ], -) -def test_trailing_range_window(alltypes, preceding, value): - t = alltypes - w = ibis.trailing_range_window(preceding=preceding, order_by=t.timestamp_col) - expr = t.mutate(win_avg=t.float_col.mean().over(w)) - result = cs_compile.compile(expr) - expected = f"""\ -SELECT *, - avg(`float_col`) OVER (ORDER BY UNIX_MICROS(`timestamp_col`) RANGE BETWEEN {value} PRECEDING AND CURRENT ROW) AS `win_avg` -FROM functional_alltypes""" # noqa: E501 - assert result == expected - - -@pytest.mark.parametrize(("preceding", "value"), [(ibis.interval(years=1), None)]) -def test_trailing_range_window_unsupported(alltypes, preceding, value): - t = alltypes - w = ibis.trailing_range_window(preceding=preceding, order_by=t.timestamp_col) - expr = t.mutate(win_avg=t.float_col.mean().over(w)) - with pytest.raises(ValueError): - cs_compile.compile(expr) - - @pytest.mark.parametrize( ("distinct1", "distinct2", "expected1", "expected2"), [ @@ -366,7 +283,7 @@ def test_projection_fusion_only_peeks_at_immediate_parent(): ) SELECT t3.* FROM t3 - CROSS JOIN t3 t4""" + INNER JOIN t3 t4""" assert result == expected @@ -442,113 +359,6 @@ def test_approx_median(alltypes): assert result == expected -def test_cov(alltypes): - d = alltypes.double_col - expr = d.cov(d) - result = cs_compile.compile(expr) - expected = f"""\ -SELECT - COVAR_SAMP(ref_0 - CloudSpannerTable[table] - name: functional_alltypes - schema: - id : int64 - bigint_col : int64 - bool_col : boolean - date : date - date_string_col : string - double_col : decimal(9, 0) - float_col : decimal(9, 0) - index : int64 - int_col : int64 - month : int64 - smallint_col : int64 - string_col : string - timestamp_col : timestamp - tinyint_col : int64 - Unnamed0 : int64 - year : int64 - - double_col = Column[decimal(9, 0)*] 'double_col' from table - ref_0, ref_0 - CloudSpannerTable[table] - name: functional_alltypes - schema: - id : int64 - bigint_col : int64 - bool_col : boolean - date : date - date_string_col : string - double_col : decimal(9, 0) - float_col : decimal(9, 0) - index : int64 - int_col : int64 - month : int64 - smallint_col : int64 - string_col : string - timestamp_col : timestamp - tinyint_col : int64 - Unnamed0 : int64 - year : int64 - - double_col = Column[decimal(9, 0)*] 'double_col' from table - ref_0) AS `tmp` -FROM functional_alltypes""" - assert result == expected - - expr = d.cov(d, how="pop") - result = cs_compile.compile(expr) - expected = f"""\ -SELECT - COVAR_POP(ref_0 - CloudSpannerTable[table] - name: functional_alltypes - schema: - id : int64 - bigint_col : int64 - bool_col : boolean - date : date - date_string_col : string - double_col : decimal(9, 0) - float_col : decimal(9, 0) - index : int64 - int_col : int64 - month : int64 - smallint_col : int64 - string_col : string - timestamp_col : timestamp - tinyint_col : int64 - Unnamed0 : int64 - year : int64 - - double_col = Column[decimal(9, 0)*] 'double_col' from table - ref_0, ref_0 - CloudSpannerTable[table] - name: functional_alltypes - schema: - id : int64 - bigint_col : int64 - bool_col : boolean - date : date - date_string_col : string - double_col : decimal(9, 0) - float_col : decimal(9, 0) - index : int64 - int_col : int64 - month : int64 - smallint_col : int64 - string_col : string - timestamp_col : timestamp - tinyint_col : int64 - Unnamed0 : int64 - year : int64 - - double_col = Column[decimal(9, 0)*] 'double_col' from table - ref_0) AS `tmp` -FROM functional_alltypes""" - assert result == expected - - @pytest.mark.parametrize( ("unit", "expected_unit", "expected_func"), [ @@ -646,7 +456,7 @@ def test_large_compile(): num_columns = 20 num_joins = 7 - class MockCloudSpannerClient(cs.CloudSpannerClient): + class MockCloudSpannerClient(cs_compile.CloudSpannerClient): def __init__(self): pass diff --git a/third_party/ibis/ibis_cloud_spanner/tests/test_datatypes.py b/third_party/ibis/ibis_cloud_spanner/tests/test_datatypes.py index 6bdff60b9..f5490367f 100644 --- a/third_party/ibis/ibis_cloud_spanner/tests/test_datatypes.py +++ b/third_party/ibis/ibis_cloud_spanner/tests/test_datatypes.py @@ -47,10 +47,7 @@ def test_no_ambiguities(): (dt.Array(dt.string), "ARRAY"), (dt.date, "DATE"), (dt.timestamp, "TIMESTAMP"), - param( - dt.Timestamp(timezone="US/Eastern"), - "TIMESTAMP", - ), + param(dt.Timestamp(timezone="US/Eastern"), "TIMESTAMP",), ], ) def test_simple(datatype, expected): From 8034bfb21ad125012099e0228545eb95cab593e7 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 26 Feb 2021 17:02:25 -0600 Subject: [PATCH 07/11] blacken --- noxfile.py | 1 - 1 file changed, 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 0327cc0ff..a3fc3687c 100644 --- a/noxfile.py +++ b/noxfile.py @@ -153,4 +153,3 @@ def integration_spanner(session): # TODO: Add tests for DVT data sources. See integration_bigquery. session.run("pytest", "third_party/ibis/ibis_cloud_spanner/tests", *session.posargs) - From fa59391052102ae279436a581cb49bce383a1498 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 1 Mar 2021 11:00:02 -0600 Subject: [PATCH 08/11] increase large compile time for cloud build --- third_party/ibis/ibis_cloud_spanner/tests/test_compiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/ibis/ibis_cloud_spanner/tests/test_compiler.py b/third_party/ibis/ibis_cloud_spanner/tests/test_compiler.py index acd89e0f7..8e8e9d25e 100644 --- a/third_party/ibis/ibis_cloud_spanner/tests/test_compiler.py +++ b/third_party/ibis/ibis_cloud_spanner/tests/test_compiler.py @@ -471,4 +471,4 @@ def __init__(self): start = datetime.datetime.now() cs_compile.compile(table) delta = datetime.datetime.now() - start - assert delta.total_seconds() < 10 + assert delta.total_seconds() < 60 From e6384a6fff8e68ddac6688effeecb29b547b349b Mon Sep 17 00:00:00 2001 From: dollylipare Date: Tue, 2 Mar 2021 13:32:48 +0000 Subject: [PATCH 09/11] added TODO comment for list tables function --- third_party/ibis/ibis_cloud_spanner/api.py | 7 ++- third_party/ibis/ibis_cloud_spanner/client.py | 52 ++++++++----------- 2 files changed, 28 insertions(+), 31 deletions(-) diff --git a/third_party/ibis/ibis_cloud_spanner/api.py b/third_party/ibis/ibis_cloud_spanner/api.py index 52a9d078e..d7b52d05f 100644 --- a/third_party/ibis/ibis_cloud_spanner/api.py +++ b/third_party/ibis/ibis_cloud_spanner/api.py @@ -13,7 +13,7 @@ # limitations under the License. -"""CloudScanner public API.""" +"""CloudSpanner public API.""" from third_party.ibis.ibis_cloud_spanner.client import CloudSpannerClient @@ -54,6 +54,7 @@ def verify(expr, params=None): def connect( instance_id, database_id, + project=None ) -> CloudSpannerClient: """Create a CloudSpannerClient for use with Ibis. @@ -63,6 +64,8 @@ def connect( A Cloud Spanner Instance id. database_id : str A database id inside of the Cloud Spanner Instance + project : str (Optional) + The ID of the project which owns the instances, tables and data. Returns ------- @@ -70,4 +73,4 @@ def connect( """ - return CloudSpannerClient(instance_id=instance_id, database_id=database_id) + return CloudSpannerClient(instance_id=instance_id, database_id=database_id, project=project) diff --git a/third_party/ibis/ibis_cloud_spanner/client.py b/third_party/ibis/ibis_cloud_spanner/client.py index 7c40a34be..93736b32d 100644 --- a/third_party/ibis/ibis_cloud_spanner/client.py +++ b/third_party/ibis/ibis_cloud_spanner/client.py @@ -44,14 +44,11 @@ def parse_instance_and_dataset( instance: str, dataset: Optional[str] = None ) -> Tuple[str, str, Optional[str]]: - try: - data_instance, dataset = dataset.split(".") - except (ValueError, AttributeError): - billing_instance = data_instance = instance - else: - billing_instance = instance - return data_instance, billing_instance, dataset + data_instance = instance + dataset = dataset + + return data_instance, dataset class CloudSpannerTable(ops.DatabaseTable): @@ -216,7 +213,7 @@ def execute(self): class CloudSpannerDatabase(Database): - """A Cloud scanner dataset.""" + """A Cloud spanner dataset.""" class CloudSpannerClient(SQLClient): @@ -226,7 +223,7 @@ class CloudSpannerClient(SQLClient): database_class = CloudSpannerDatabase table_class = CloudSpannerTable - def __init__(self, instance_id, database_id=None, credentials=None): + def __init__(self, instance_id, database_id, project=None, credentials=None): """Construct a CloudSpannerClient. Parameters @@ -235,15 +232,16 @@ def __init__(self, instance_id, database_id=None, credentials=None): A instance name database_id : Optional[str] A ``.`` string or just a dataset name + project : str (Optional) + The ID of the project which owns the instances, tables and data. """ - self.spanner_client = spanner.Client() + self.spanner_client = spanner.Client(project=project) self.instance = self.spanner_client.instance(instance_id) self.database_name = self.instance.database(database_id) ( self.data_instance, - self.billing_instance, self.dataset, ) = parse_instance_and_dataset(instance_id, database_id) self.client = cs.Client() @@ -251,10 +249,10 @@ def __init__(self, instance_id, database_id=None, credentials=None): def _parse_instance_and_dataset(self, dataset): if not dataset and not self.dataset: raise ValueError("Unable to determine Cloud Spanner dataset.") - instance, _, dataset = parse_instance_and_dataset( - self.billing_instance, - dataset or "{}.{}".format(self.data_instance, self.dataset), + instance, dataset = parse_instance_and_dataset( + self.data_instance,(dataset or self.dataset) ) + return instance, dataset def get_data_using_query(self, query, results=False): @@ -280,8 +278,7 @@ def _get_query(self, dml, **kwargs): return self.query_class(self, dml, query_parameters=dml.context.params) def _fully_qualified_name(self, name, database): - instance, dataset = self._parse_instance_and_dataset(database) - return "{}".format(name) + return name def _get_table_schema(self, qualified_name): table = qualified_name @@ -301,6 +298,8 @@ def list_databases(self, like=None): return list_db def list_tables(self, like=None, database=None): + # TODO: use list_tables from the Database class when available. + if database is None: db_value = self.dataset_id else: @@ -322,19 +321,12 @@ def list_tables(self, like=None, database=None): return tables def exists_table(self, name, database=None): + if database is None: - db_value = self.dataset_id - else: - db_value = database - db = self.instance.database(db_value) - with db.snapshot() as snapshot: - query = "SELECT EXISTS(SELECT * FROM INFORMATION_SCHEMA.TABLES where TABLE_NAME = '{}' )".format( - name - ) - output = snapshot.execute_sql(query) - result = "" - for row in output: - result = row[0] + database = self.dataset_id + + db_value = self.instance.database(database) + result = table.Table(name, db_value).exists() return result def get_schema(self, table_id, database=None): @@ -377,7 +369,7 @@ def _execute(self, stmt, results=True, query_parameters=None): def database(self, name=None): if name is None and self.dataset is None: raise ValueError( - "Unable to determine Cloud Scanner dataset. Call " + "Unable to determine Cloud Spanner dataset. Call " "client.database('my_dataset') or set_database('my_dataset') " "to assign your client a dataset." ) @@ -393,3 +385,5 @@ def dataset(self, database): def exists_database(self, name): return self.instance.database(name).exists() + + From 67223eedb48964c12708e7727912f81ca5df58b9 Mon Sep 17 00:00:00 2001 From: dollylipare Date: Tue, 2 Mar 2021 13:57:05 +0000 Subject: [PATCH 10/11] added TODO comment for list tables function --- third_party/ibis/ibis_cloud_spanner/api.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/third_party/ibis/ibis_cloud_spanner/api.py b/third_party/ibis/ibis_cloud_spanner/api.py index 795041b82..d7b52d05f 100644 --- a/third_party/ibis/ibis_cloud_spanner/api.py +++ b/third_party/ibis/ibis_cloud_spanner/api.py @@ -51,15 +51,11 @@ def verify(expr, params=None): return False -<<<<<<< HEAD def connect( instance_id, database_id, project=None ) -> CloudSpannerClient: -======= -def connect(instance_id, database_id,) -> CloudSpannerClient: ->>>>>>> fa59391052102ae279436a581cb49bce383a1498 """Create a CloudSpannerClient for use with Ibis. Parameters From 0b5cea4f9d26b9a5bab71d1ccaeee5867a93186f Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 8 Mar 2021 16:49:15 -0600 Subject: [PATCH 11/11] rename project param to project_id This is consistent with the BigQuery Ibis connector --- third_party/ibis/ibis_cloud_spanner/api.py | 10 +++++++--- third_party/ibis/ibis_cloud_spanner/client.py | 6 +++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/third_party/ibis/ibis_cloud_spanner/api.py b/third_party/ibis/ibis_cloud_spanner/api.py index d7b52d05f..d035b5320 100644 --- a/third_party/ibis/ibis_cloud_spanner/api.py +++ b/third_party/ibis/ibis_cloud_spanner/api.py @@ -54,7 +54,7 @@ def verify(expr, params=None): def connect( instance_id, database_id, - project=None + project_id=None, ) -> CloudSpannerClient: """Create a CloudSpannerClient for use with Ibis. @@ -64,7 +64,7 @@ def connect( A Cloud Spanner Instance id. database_id : str A database id inside of the Cloud Spanner Instance - project : str (Optional) + project_id : str (Optional) The ID of the project which owns the instances, tables and data. Returns @@ -73,4 +73,8 @@ def connect( """ - return CloudSpannerClient(instance_id=instance_id, database_id=database_id, project=project) + return CloudSpannerClient( + instance_id=instance_id, + database_id=database_id, + project_id=project_id, + ) diff --git a/third_party/ibis/ibis_cloud_spanner/client.py b/third_party/ibis/ibis_cloud_spanner/client.py index 93736b32d..1f1692e69 100644 --- a/third_party/ibis/ibis_cloud_spanner/client.py +++ b/third_party/ibis/ibis_cloud_spanner/client.py @@ -223,7 +223,7 @@ class CloudSpannerClient(SQLClient): database_class = CloudSpannerDatabase table_class = CloudSpannerTable - def __init__(self, instance_id, database_id, project=None, credentials=None): + def __init__(self, instance_id, database_id, project_id=None, credentials=None): """Construct a CloudSpannerClient. Parameters @@ -232,12 +232,12 @@ def __init__(self, instance_id, database_id, project=None, credentials=None): A instance name database_id : Optional[str] A ``.`` string or just a dataset name - project : str (Optional) + project_id : str (Optional) The ID of the project which owns the instances, tables and data. """ - self.spanner_client = spanner.Client(project=project) + self.spanner_client = spanner.Client(project=project_id) self.instance = self.spanner_client.instance(instance_id) self.database_name = self.instance.database(database_id) (