From 81a8491bd9674633b6d0532410515fc0a511b5fc Mon Sep 17 00:00:00 2001 From: Alejandro Hall Date: Wed, 9 Oct 2019 17:46:30 +0200 Subject: [PATCH 01/21] Get geographies from metadata --- .../data/enrichment/enrichment_service.py | 49 ++++++++----------- .../data/enrichment/points_enrichment.py | 8 +-- .../data/enrichment/polygons_enrichment.py | 8 +-- 3 files changed, 30 insertions(+), 35 deletions(-) diff --git a/cartoframes/data/enrichment/enrichment_service.py b/cartoframes/data/enrichment/enrichment_service.py index 42b669482..2afc7fad9 100644 --- a/cartoframes/data/enrichment/enrichment_service.py +++ b/cartoframes/data/enrichment/enrichment_service.py @@ -9,6 +9,7 @@ from ...auth import get_default_credentials from ...utils.geom_utils import _compute_geometry_from_geom from ..observatory.variable import Variable +from ..observatory.dataset import Dataset as DatasetCatalog _ENRICHMENT_ID = 'enrichment_id' @@ -61,8 +62,8 @@ def _enrichment_queries(user_dataset, tablename, query_function, **kwargs): variables = __process_variables(kwargs['variables']) - table_to_geotable, table_to_variables, table_to_project, table_to_dataset =\ - __process_enrichment_variables(variables, user_dataset) + table_to_geotable, table_to_variables, table_to_project, table_to_dataset,\ + geotable_to_project, geotable_to_dataset = __process_enrichment_variables(variables, user_dataset) filters_str = __process_filters(kwargs['filters']) @@ -70,7 +71,7 @@ def _enrichment_queries(user_dataset, tablename, query_function, **kwargs): kwargs['agg_operators'] = __process_agg_operators(kwargs['agg_operators'], variables) return query_function(_ENRICHMENT_ID, filters_str, table_to_geotable, table_to_variables, table_to_project, - table_to_dataset, user_dataset, _WORKING_PROJECT, tablename, **kwargs) + table_to_dataset, geotable_to_project, geotable_to_dataset, user_dataset, _WORKING_PROJECT, tablename, **kwargs) def _execute_enrichment(bq_client, queries, data_copy, data_geom_column): @@ -162,30 +163,25 @@ def __process_agg_operators(agg_operators, variables): return agg_operators_result -def __get_tables_and_variables(variables, user_dataset): - - table_to_geotable, table_to_variables, table_to_project, table_to_dataset =\ - __process_enrichment_variables(variables, user_dataset) - - return table_to_geotable, table_to_variables, table_to_project, table_to_dataset - - def __process_enrichment_variables(variables, user_dataset): table_to_geotable = dict() table_to_variables = defaultdict(list) table_to_project = dict() table_to_dataset = dict() + geotable_to_project = dict() + geotable_to_dataset = dict() for variable in variables: project_name = variable.project_name dataset_name = variable.schema_name table_name = variable.dataset_name variable_name = variable.column_name + project_geotable, dataset_geotable, geotable = __get_properties_geotable(variable) if project_name != _PUBLIC_PROJECT: - table_name = '{dataset}_{table}'.format(dataset=dataset_name, - table=table_name, - user_dataset=user_dataset) + table_name = 'view_{dataset}_{table}'.format(dataset=dataset_name, + table=table_name, + user_dataset=user_dataset) if table_name not in table_to_dataset: if project_name != _PUBLIC_PROJECT: @@ -194,13 +190,12 @@ def __process_enrichment_variables(variables, user_dataset): table_to_dataset[table_name] = _PUBLIC_DATASET if table_name not in table_to_geotable: - geotable = __get_name_geotable_from_datatable(table_name) - if project_name != _PUBLIC_PROJECT: - geotable = '{dataset}_{geotable}'.format(dataset=dataset_name, - geotable=geotable, - user_dataset=user_dataset) + geotable = 'view_{dataset}_{geotable}'.format(dataset=dataset_geotable, + geotable=geotable) + geotable_to_project[table_name] = project_geotable + geotable_to_dataset[table_name] = dataset_geotable table_to_geotable[table_name] = geotable if table_name not in table_to_project: @@ -211,18 +206,14 @@ def __process_enrichment_variables(variables, user_dataset): table_to_variables[table_name].append(variable_name) - return table_to_geotable, table_to_variables, table_to_project, table_to_dataset - + return table_to_geotable, table_to_variables, table_to_project,\ + table_to_dataset, geotable_to_project, geotable_to_dataset -def __get_name_geotable_from_datatable(datatable): - datatable_split = datatable.split('_') +def __get_properties_geotable(variable): - if len(datatable_split) == 8: - geo_information = datatable_split[3:6] - elif len(datatable_split) == 7: - geo_information = datatable_split[2:5] + geography_id = DatasetCatalog.get(variable.dataset).geography - geotable = 'geography_{geo_information_joined}'.format(geo_information_joined='_'.join(geo_information)) + geo_project, geo_dataset, geo_table = geography_id.split('.') - return geotable + return geo_project, geo_dataset, geo_table diff --git a/cartoframes/data/enrichment/points_enrichment.py b/cartoframes/data/enrichment/points_enrichment.py index 1c219bfe9..f1cf28021 100644 --- a/cartoframes/data/enrichment/points_enrichment.py +++ b/cartoframes/data/enrichment/points_enrichment.py @@ -25,7 +25,8 @@ def enrich_points(data, variables, data_geom_column='geometry', filters=dict(), def _prepare_sql(enrichment_id, filters_processed, table_to_geotable, table_to_variables, - table_to_project, table_to_dataset, user_dataset, working_project, data_table, **kwargs): + table_to_project, table_to_dataset, geotable_to_project, geotable_to_dataset, + user_dataset, working_project, data_table, **kwargs): sqls = list() @@ -37,7 +38,7 @@ def _prepare_sql(enrichment_id, filters_processed, table_to_geotable, table_to_v ST_Area(enrichment_geo_table.geom) AS {variables_underscored}_area, NULL AS {variables_underscored}_population FROM `{project}.{dataset}.{enrichment_table}` enrichment_table - JOIN `{project}.{dataset}.{enrichment_geo_table}` enrichment_geo_table + JOIN `{geo_project}.{geo_dataset}.{enrichment_geo_table}` enrichment_geo_table ON enrichment_table.geoid = enrichment_geo_table.geoid JOIN `{working_project}.{user_dataset}.{data_table}` data_table ON ST_Within(data_table.{data_geom_column}, enrichment_geo_table.geom) @@ -47,7 +48,8 @@ def _prepare_sql(enrichment_id, filters_processed, table_to_geotable, table_to_v enrichment_geo_table=table_to_geotable[table], user_dataset=user_dataset, working_project=working_project, data_table=data_table, data_geom_column=kwargs['data_geom_column'], filters=filters_processed, - project=table_to_project[table], dataset=table_to_dataset[table]) + project=table_to_project[table], dataset=table_to_dataset[table], + geo_project=geotable_to_project[table], geo_dataset=geotable_to_dataset[table]) sqls.append(sql) diff --git a/cartoframes/data/enrichment/polygons_enrichment.py b/cartoframes/data/enrichment/polygons_enrichment.py index 094c4023d..ace2827c9 100644 --- a/cartoframes/data/enrichment/polygons_enrichment.py +++ b/cartoframes/data/enrichment/polygons_enrichment.py @@ -28,7 +28,8 @@ def enrich_polygons(data, variables, data_geom_column='geometry', agg_operators= def _prepare_sql(enrichment_id, filters_processed, table_to_geotable, table_to_variables, - table_to_project, table_to_dataset, user_dataset, working_project, data_table, **kwargs): + table_to_project, table_to_dataset, geotable_to_project, geotable_to_dataset, + user_dataset, working_project, data_table, **kwargs): grouper = 'group by data_table.{enrichment_id}'.format(enrichment_id=enrichment_id) @@ -58,7 +59,7 @@ def _prepare_sql(enrichment_id, filters_processed, table_to_geotable, table_to_v sql = ''' SELECT data_table.{enrichment_id}, {variables} FROM `{project}.{dataset}.{enrichment_table}` enrichment_table - JOIN `{project}.{dataset}.{enrichment_geo_table}` enrichment_geo_table + JOIN `{geo_project}.{geo_dataset}.{enrichment_geo_table}` enrichment_geo_table ON enrichment_table.geoid = enrichment_geo_table.geoid JOIN `{working_project}.{user_dataset}.{data_table}` data_table ON ST_Intersects(data_table.{data_geom_column}, enrichment_geo_table.geom) @@ -69,7 +70,8 @@ def _prepare_sql(enrichment_id, filters_processed, table_to_geotable, table_to_v user_dataset=user_dataset, working_project=working_project, data_table=data_table, data_geom_column=kwargs['data_geom_column'], filters=filters_processed, grouper=grouper, project=table_to_project[table], - dataset=table_to_dataset[table]) + dataset=table_to_dataset[table], geo_project=geotable_to_project[table], + geo_dataset=geotable_to_dataset[table]) sqls.append(sql) From acaea5ab87972f67a8a02d28f8f2727bd051ce97 Mon Sep 17 00:00:00 2001 From: Alejandro Hall Date: Wed, 9 Oct 2019 19:25:43 +0200 Subject: [PATCH 02/21] Add external table to query --- .../data/enrichment/enrichment_service.py | 20 +++++++------------ .../data/enrichment/points_enrichment.py | 13 ++++++------ .../data/enrichment/polygons_enrichment.py | 9 ++++----- 3 files changed, 17 insertions(+), 25 deletions(-) diff --git a/cartoframes/data/enrichment/enrichment_service.py b/cartoframes/data/enrichment/enrichment_service.py index 2afc7fad9..3d5b67e6e 100644 --- a/cartoframes/data/enrichment/enrichment_service.py +++ b/cartoframes/data/enrichment/enrichment_service.py @@ -62,8 +62,8 @@ def _enrichment_queries(user_dataset, tablename, query_function, **kwargs): variables = __process_variables(kwargs['variables']) - table_to_geotable, table_to_variables, table_to_project, table_to_dataset,\ - geotable_to_project, geotable_to_dataset = __process_enrichment_variables(variables, user_dataset) + table_to_geotable, table_to_variables,\ + table_to_project, table_to_dataset = __process_enrichment_variables(variables, user_dataset) filters_str = __process_filters(kwargs['filters']) @@ -71,7 +71,7 @@ def _enrichment_queries(user_dataset, tablename, query_function, **kwargs): kwargs['agg_operators'] = __process_agg_operators(kwargs['agg_operators'], variables) return query_function(_ENRICHMENT_ID, filters_str, table_to_geotable, table_to_variables, table_to_project, - table_to_dataset, geotable_to_project, geotable_to_dataset, user_dataset, _WORKING_PROJECT, tablename, **kwargs) + table_to_dataset, user_dataset, _WORKING_PROJECT, tablename, **kwargs) def _execute_enrichment(bq_client, queries, data_copy, data_geom_column): @@ -168,15 +168,13 @@ def __process_enrichment_variables(variables, user_dataset): table_to_variables = defaultdict(list) table_to_project = dict() table_to_dataset = dict() - geotable_to_project = dict() - geotable_to_dataset = dict() for variable in variables: project_name = variable.project_name dataset_name = variable.schema_name table_name = variable.dataset_name variable_name = variable.column_name - project_geotable, dataset_geotable, geotable = __get_properties_geotable(variable) + dataset_geotable, geotable = __get_properties_geotable(variable) if project_name != _PUBLIC_PROJECT: table_name = 'view_{dataset}_{table}'.format(dataset=dataset_name, @@ -193,9 +191,6 @@ def __process_enrichment_variables(variables, user_dataset): if project_name != _PUBLIC_PROJECT: geotable = 'view_{dataset}_{geotable}'.format(dataset=dataset_geotable, geotable=geotable) - - geotable_to_project[table_name] = project_geotable - geotable_to_dataset[table_name] = dataset_geotable table_to_geotable[table_name] = geotable if table_name not in table_to_project: @@ -206,14 +201,13 @@ def __process_enrichment_variables(variables, user_dataset): table_to_variables[table_name].append(variable_name) - return table_to_geotable, table_to_variables, table_to_project,\ - table_to_dataset, geotable_to_project, geotable_to_dataset + return table_to_geotable, table_to_variables, table_to_project, table_to_dataset def __get_properties_geotable(variable): geography_id = DatasetCatalog.get(variable.dataset).geography - geo_project, geo_dataset, geo_table = geography_id.split('.') + _, geo_dataset, geo_table = geography_id.split('.') - return geo_project, geo_dataset, geo_table + return geo_dataset, geo_table diff --git a/cartoframes/data/enrichment/points_enrichment.py b/cartoframes/data/enrichment/points_enrichment.py index f1cf28021..876903653 100644 --- a/cartoframes/data/enrichment/points_enrichment.py +++ b/cartoframes/data/enrichment/points_enrichment.py @@ -25,8 +25,8 @@ def enrich_points(data, variables, data_geom_column='geometry', filters=dict(), def _prepare_sql(enrichment_id, filters_processed, table_to_geotable, table_to_variables, - table_to_project, table_to_dataset, geotable_to_project, geotable_to_dataset, - user_dataset, working_project, data_table, **kwargs): + table_to_project, table_to_dataset, user_dataset, working_project, + data_table, **kwargs): sqls = list() @@ -35,10 +35,10 @@ def _prepare_sql(enrichment_id, filters_processed, table_to_geotable, table_to_v sql = ''' SELECT data_table.{enrichment_id}, {variables}, - ST_Area(enrichment_geo_table.geom) AS {variables_underscored}_area, - NULL AS {variables_underscored}_population + ST_Area(enrichment_geo_table.geom) AS {enrichment_table}_area, + NULL AS {enrichment_table}_population FROM `{project}.{dataset}.{enrichment_table}` enrichment_table - JOIN `{geo_project}.{geo_dataset}.{enrichment_geo_table}` enrichment_geo_table + JOIN `{project}.{dataset}.{enrichment_geo_table}` enrichment_geo_table ON enrichment_table.geoid = enrichment_geo_table.geoid JOIN `{working_project}.{user_dataset}.{data_table}` data_table ON ST_Within(data_table.{data_geom_column}, enrichment_geo_table.geom) @@ -48,8 +48,7 @@ def _prepare_sql(enrichment_id, filters_processed, table_to_geotable, table_to_v enrichment_geo_table=table_to_geotable[table], user_dataset=user_dataset, working_project=working_project, data_table=data_table, data_geom_column=kwargs['data_geom_column'], filters=filters_processed, - project=table_to_project[table], dataset=table_to_dataset[table], - geo_project=geotable_to_project[table], geo_dataset=geotable_to_dataset[table]) + project=table_to_project[table], dataset=table_to_dataset[table]) sqls.append(sql) diff --git a/cartoframes/data/enrichment/polygons_enrichment.py b/cartoframes/data/enrichment/polygons_enrichment.py index ace2827c9..370f8d625 100644 --- a/cartoframes/data/enrichment/polygons_enrichment.py +++ b/cartoframes/data/enrichment/polygons_enrichment.py @@ -28,8 +28,8 @@ def enrich_polygons(data, variables, data_geom_column='geometry', agg_operators= def _prepare_sql(enrichment_id, filters_processed, table_to_geotable, table_to_variables, - table_to_project, table_to_dataset, geotable_to_project, geotable_to_dataset, - user_dataset, working_project, data_table, **kwargs): + table_to_project, table_to_dataset, user_dataset, working_project, + data_table, **kwargs): grouper = 'group by data_table.{enrichment_id}'.format(enrichment_id=enrichment_id) @@ -59,7 +59,7 @@ def _prepare_sql(enrichment_id, filters_processed, table_to_geotable, table_to_v sql = ''' SELECT data_table.{enrichment_id}, {variables} FROM `{project}.{dataset}.{enrichment_table}` enrichment_table - JOIN `{geo_project}.{geo_dataset}.{enrichment_geo_table}` enrichment_geo_table + JOIN `{project}.{dataset}.{enrichment_geo_table}` enrichment_geo_table ON enrichment_table.geoid = enrichment_geo_table.geoid JOIN `{working_project}.{user_dataset}.{data_table}` data_table ON ST_Intersects(data_table.{data_geom_column}, enrichment_geo_table.geom) @@ -70,8 +70,7 @@ def _prepare_sql(enrichment_id, filters_processed, table_to_geotable, table_to_v user_dataset=user_dataset, working_project=working_project, data_table=data_table, data_geom_column=kwargs['data_geom_column'], filters=filters_processed, grouper=grouper, project=table_to_project[table], - dataset=table_to_dataset[table], geo_project=geotable_to_project[table], - geo_dataset=geotable_to_dataset[table]) + dataset=table_to_dataset[table]) sqls.append(sql) From 8bb6da96991eff3ec40fe1feb7fb873ca7f3cf0c Mon Sep 17 00:00:00 2001 From: Alejandro Hall Date: Wed, 9 Oct 2019 21:04:37 +0200 Subject: [PATCH 03/21] Add alias in variables to enrich --- cartoframes/data/enrichment/points_enrichment.py | 10 +++++----- cartoframes/data/enrichment/polygons_enrichment.py | 7 ++++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/cartoframes/data/enrichment/points_enrichment.py b/cartoframes/data/enrichment/points_enrichment.py index 876903653..a8f625f4e 100644 --- a/cartoframes/data/enrichment/points_enrichment.py +++ b/cartoframes/data/enrichment/points_enrichment.py @@ -43,12 +43,12 @@ def _prepare_sql(enrichment_id, filters_processed, table_to_geotable, table_to_v JOIN `{working_project}.{user_dataset}.{data_table}` data_table ON ST_Within(data_table.{data_geom_column}, enrichment_geo_table.geom) {filters}; - '''.format(enrichment_id=enrichment_id, variables=', '.join(variables), - variables_underscored='_'.join(variables), enrichment_table=table, - enrichment_geo_table=table_to_geotable[table], user_dataset=user_dataset, - working_project=working_project, data_table=data_table, + '''.format(enrichment_id=enrichment_id, variables_underscored='_'.join(variables), + enrichment_table=table, enrichment_geo_table=table_to_geotable[table], + user_dataset=user_dataset, working_project=working_project, data_table=data_table, data_geom_column=kwargs['data_geom_column'], filters=filters_processed, - project=table_to_project[table], dataset=table_to_dataset[table]) + project=table_to_project[table], dataset=table_to_dataset[table], + variables=', '.join(['enrichment_table.{}'.format(variable) for variable in variables])) sqls.append(sql) diff --git a/cartoframes/data/enrichment/polygons_enrichment.py b/cartoframes/data/enrichment/polygons_enrichment.py index 370f8d625..9d831d801 100644 --- a/cartoframes/data/enrichment/polygons_enrichment.py +++ b/cartoframes/data/enrichment/polygons_enrichment.py @@ -65,12 +65,13 @@ def _prepare_sql(enrichment_id, filters_processed, table_to_geotable, table_to_v ON ST_Intersects(data_table.{data_geom_column}, enrichment_geo_table.geom) {filters} {grouper}; - '''.format(enrichment_id=enrichment_id, variables=', '.join(variables_sql), - enrichment_table=table, enrichment_geo_table=table_to_geotable[table], + '''.format(enrichment_id=enrichment_id, enrichment_table=table, + enrichment_geo_table=table_to_geotable[table], user_dataset=user_dataset, working_project=working_project, data_table=data_table, data_geom_column=kwargs['data_geom_column'], filters=filters_processed, grouper=grouper, project=table_to_project[table], - dataset=table_to_dataset[table]) + dataset=table_to_dataset[table], + variables=', '.join(['enrichment_table.{}'.format(variable) for variable in variables])) sqls.append(sql) From 9e6491a6566248876c6f68a76d24924d8ba561e5 Mon Sep 17 00:00:00 2001 From: Alejandro Hall Date: Wed, 9 Oct 2019 21:34:52 +0200 Subject: [PATCH 04/21] Revert "Add alias in variables to enrich" This reverts commit 8bb6da96991eff3ec40fe1feb7fb873ca7f3cf0c. --- cartoframes/data/enrichment/points_enrichment.py | 10 +++++----- cartoframes/data/enrichment/polygons_enrichment.py | 7 +++---- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/cartoframes/data/enrichment/points_enrichment.py b/cartoframes/data/enrichment/points_enrichment.py index a8f625f4e..876903653 100644 --- a/cartoframes/data/enrichment/points_enrichment.py +++ b/cartoframes/data/enrichment/points_enrichment.py @@ -43,12 +43,12 @@ def _prepare_sql(enrichment_id, filters_processed, table_to_geotable, table_to_v JOIN `{working_project}.{user_dataset}.{data_table}` data_table ON ST_Within(data_table.{data_geom_column}, enrichment_geo_table.geom) {filters}; - '''.format(enrichment_id=enrichment_id, variables_underscored='_'.join(variables), - enrichment_table=table, enrichment_geo_table=table_to_geotable[table], - user_dataset=user_dataset, working_project=working_project, data_table=data_table, + '''.format(enrichment_id=enrichment_id, variables=', '.join(variables), + variables_underscored='_'.join(variables), enrichment_table=table, + enrichment_geo_table=table_to_geotable[table], user_dataset=user_dataset, + working_project=working_project, data_table=data_table, data_geom_column=kwargs['data_geom_column'], filters=filters_processed, - project=table_to_project[table], dataset=table_to_dataset[table], - variables=', '.join(['enrichment_table.{}'.format(variable) for variable in variables])) + project=table_to_project[table], dataset=table_to_dataset[table]) sqls.append(sql) diff --git a/cartoframes/data/enrichment/polygons_enrichment.py b/cartoframes/data/enrichment/polygons_enrichment.py index 9d831d801..370f8d625 100644 --- a/cartoframes/data/enrichment/polygons_enrichment.py +++ b/cartoframes/data/enrichment/polygons_enrichment.py @@ -65,13 +65,12 @@ def _prepare_sql(enrichment_id, filters_processed, table_to_geotable, table_to_v ON ST_Intersects(data_table.{data_geom_column}, enrichment_geo_table.geom) {filters} {grouper}; - '''.format(enrichment_id=enrichment_id, enrichment_table=table, - enrichment_geo_table=table_to_geotable[table], + '''.format(enrichment_id=enrichment_id, variables=', '.join(variables_sql), + enrichment_table=table, enrichment_geo_table=table_to_geotable[table], user_dataset=user_dataset, working_project=working_project, data_table=data_table, data_geom_column=kwargs['data_geom_column'], filters=filters_processed, grouper=grouper, project=table_to_project[table], - dataset=table_to_dataset[table], - variables=', '.join(['enrichment_table.{}'.format(variable) for variable in variables])) + dataset=table_to_dataset[table]) sqls.append(sql) From 2137acbad29b949ed2c1eec17b7621ad11e6bce0 Mon Sep 17 00:00:00 2001 From: Alejandro Hall Date: Wed, 9 Oct 2019 22:10:55 +0200 Subject: [PATCH 05/21] Multiple fixes --- .../data/enrichment/enrichment_service.py | 15 +++++++++++---- .../data/enrichment/points_enrichment.py | 13 ++++++------- .../data/enrichment/polygons_enrichment.py | 19 ++++++++++--------- 3 files changed, 27 insertions(+), 20 deletions(-) diff --git a/cartoframes/data/enrichment/enrichment_service.py b/cartoframes/data/enrichment/enrichment_service.py index 3d5b67e6e..2b561ab02 100644 --- a/cartoframes/data/enrichment/enrichment_service.py +++ b/cartoframes/data/enrichment/enrichment_service.py @@ -28,7 +28,11 @@ def enrich(query_function, **kwargs): queries = _enrichment_queries(user_dataset, tablename, query_function, **kwargs) - return _execute_enrichment(bq_client, queries, data_copy, kwargs['data_geom_column']) + data_enriched = _execute_enrichment(bq_client, queries, data_copy, kwargs['data_geom_column']) + + data_enriched[kwargs['data_geom_column']] = _compute_geometry_from_geom(data_enriched[kwargs['data_geom_column']]) + + return data_enriched def _get_credentials(credentials=None): @@ -59,8 +63,8 @@ def _upload_dataframe(bq_client, user_dataset, data_copy, data_geom_column): def _enrichment_queries(user_dataset, tablename, query_function, **kwargs): - - variables = __process_variables(kwargs['variables']) + is_polygon_enrichment = 'agg_operators' in kwargs + variables = __process_variables(kwargs['variables'], is_polygon_enrichment) table_to_geotable, table_to_variables,\ table_to_project, table_to_dataset = __process_enrichment_variables(variables, user_dataset) @@ -118,7 +122,7 @@ def __copy_data_and_generate_enrichment_id(data, enrichment_id_column, geometry_ return data_copy -def __process_variables(variables): +def __process_variables(variables, is_polygon_enrichment): variables_result = list() if isinstance(variables, Variable): @@ -135,6 +139,9 @@ def __process_variables(variables): else: raise EnrichmentException('Variable(s) to enrich should be an instance of Variable / CatalogList / str / list') + if is_polygon_enrichment: + variables_result = [variable for variable in variables_result if variable.agg_method is not None] + return variables_result diff --git a/cartoframes/data/enrichment/points_enrichment.py b/cartoframes/data/enrichment/points_enrichment.py index 876903653..2f950dc1b 100644 --- a/cartoframes/data/enrichment/points_enrichment.py +++ b/cartoframes/data/enrichment/points_enrichment.py @@ -35,20 +35,19 @@ def _prepare_sql(enrichment_id, filters_processed, table_to_geotable, table_to_v sql = ''' SELECT data_table.{enrichment_id}, {variables}, - ST_Area(enrichment_geo_table.geom) AS {enrichment_table}_area, - NULL AS {enrichment_table}_population + ST_Area(enrichment_geo_table.geom) AS {enrichment_table}_area FROM `{project}.{dataset}.{enrichment_table}` enrichment_table JOIN `{project}.{dataset}.{enrichment_geo_table}` enrichment_geo_table ON enrichment_table.geoid = enrichment_geo_table.geoid JOIN `{working_project}.{user_dataset}.{data_table}` data_table ON ST_Within(data_table.{data_geom_column}, enrichment_geo_table.geom) {filters}; - '''.format(enrichment_id=enrichment_id, variables=', '.join(variables), - variables_underscored='_'.join(variables), enrichment_table=table, - enrichment_geo_table=table_to_geotable[table], user_dataset=user_dataset, - working_project=working_project, data_table=data_table, + '''.format(enrichment_id=enrichment_id, variables_underscored='_'.join(variables), + enrichment_table=table, enrichment_geo_table=table_to_geotable[table], + user_dataset=user_dataset, working_project=working_project, data_table=data_table, data_geom_column=kwargs['data_geom_column'], filters=filters_processed, - project=table_to_project[table], dataset=table_to_dataset[table]) + project=table_to_project[table], dataset=table_to_dataset[table], + variables=', '.join(['enrichment_table.{}'.format(variable) for variable in variables])) sqls.append(sql) diff --git a/cartoframes/data/enrichment/polygons_enrichment.py b/cartoframes/data/enrichment/polygons_enrichment.py index 370f8d625..c15c9a5bd 100644 --- a/cartoframes/data/enrichment/polygons_enrichment.py +++ b/cartoframes/data/enrichment/polygons_enrichment.py @@ -36,24 +36,25 @@ def _prepare_sql(enrichment_id, filters_processed, table_to_geotable, table_to_v sqls = list() for table, variables in table_to_variables.items(): + agg_operators = kwargs.get('agg_operators') - if 'agg_operators' in kwargs: + if agg_operators is not None: - if isinstance(kwargs['agg_operators'], str): - agg_operators = {variable: kwargs['agg_operators'] for variable in variables} - else: - agg_operators = kwargs['agg_operators'] + if isinstance(agg_operators, str): + agg_operators = {variable: agg_operators for variable in variables} - variables_sql = ['{operator}({variable} * \ + variables_sql = ['{operator}(enrichment_table.{variable} * \ (ST_Area(ST_Intersection(enrichment_geo_table.geom, data_table.{data_geom_column}))\ / ST_area(data_table.{data_geom_column}))) as {variable}'.format(variable=variable, data_geom_column=kwargs['data_geom_column'], operator=agg_operators[variable]) for variable in variables] else: - variables_sql = variables + ['ST_Area(ST_Intersection(geo_table.geom, data_table.{data_geom_column}))\ - / ST_area(data_table.{data_geom_column}) AS measures_proportion'.format( - data_geom_column=kwargs['data_geom_column'])] + variables_sql = ['enrichment_table.{}'.format(variable) for variable in variables] +\ + ['ST_Area(ST_Intersection(enrichment_geo_table.geom, data_table.{data_geom_column}))\ + / ST_area(data_table.{data_geom_column}) AS measures_proportion'.format( + data_geom_column=kwargs['data_geom_column'])] + grouper = '' sql = ''' From 55f392d940270263cc2d7ba4d37294679b4c1f4f Mon Sep 17 00:00:00 2001 From: Alejandro Hall Date: Wed, 9 Oct 2019 22:40:35 +0200 Subject: [PATCH 06/21] Import catalog class well --- cartoframes/data/enrichment/enrichment_service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cartoframes/data/enrichment/enrichment_service.py b/cartoframes/data/enrichment/enrichment_service.py index 2b561ab02..dc4c76baa 100644 --- a/cartoframes/data/enrichment/enrichment_service.py +++ b/cartoframes/data/enrichment/enrichment_service.py @@ -8,8 +8,8 @@ from ...exceptions import EnrichmentException from ...auth import get_default_credentials from ...utils.geom_utils import _compute_geometry_from_geom -from ..observatory.variable import Variable -from ..observatory.dataset import Dataset as DatasetCatalog +from ..observatory import Variable +from ..observatory import Dataset as DatasetCatalog _ENRICHMENT_ID = 'enrichment_id' From c1276912a31a11bf38c313d3d6520f11425a0951 Mon Sep 17 00:00:00 2001 From: Alejandro Hall Date: Mon, 14 Oct 2019 22:21:06 -0400 Subject: [PATCH 07/21] Fix test --- test/data/enrichment/test_service.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/test/data/enrichment/test_service.py b/test/data/enrichment/test_service.py index f797eb3ea..f03d41905 100644 --- a/test/data/enrichment/test_service.py +++ b/test/data/enrichment/test_service.py @@ -76,9 +76,8 @@ def test_enrichment_query_by_points_one_variable(self): queries = _enrichment_queries(user_dataset, tablename, query_function, **kwargs) expected_queries = ['''SELECT data_table.enrichment_id, - CRMCYBURG, - ST_Area(enrichment_geo_table.geom) AS CRMCYBURG_area, - NULL AS CRMCYBURG_population + enrichment_table.CRMCYBURG, + ST_Area(enrichment_geo_table.geom) AS view_ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018_area FROM `carto-do-customers.{user_dataset}\ .ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018` enrichment_table JOIN `carto-do-customers.{user_dataset}\ @@ -111,9 +110,8 @@ def test_enrichment_query_by_points_two_variables(self): queries = _enrichment_queries(user_dataset, tablename, query_function, **kwargs) expected_queries = ['''SELECT data_table.enrichment_id, - CRMCYBURG, - ST_Area(enrichment_geo_table.geom) AS CRMCYBURG_area, - NULL AS CRMCYBURG_population + enrichment_table.CRMCYBURG, + ST_Area(enrichment_geo_table.geom) AS view_ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018_area, FROM `carto-do-customers.{user_dataset}\ .ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018` enrichment_table JOIN `carto-do-customers.{user_dataset}\ @@ -158,7 +156,7 @@ def test_enrichment_query_by_polygons_one_variable(self): queries = _enrichment_queries(user_dataset, tablename, query_function, **kwargs) - expected_queries = ['''SELECT data_table.enrichment_id, avg(CRMCYBURG *\ + expected_queries = ['''SELECT data_table.enrichment_id, avg(enrichment_table.CRMCYBURG *\ (ST_Area(ST_Intersection(enrichment_geo_table.geom, data_table.{geometry_column}))\ / ST_area(data_table.{geometry_column}))) as CRMCYBURG FROM `carto-do-customers.{user_dataset}.ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018`\ @@ -194,7 +192,7 @@ def test_enrichment_query_by_polygons_two_variables(self): queries = _enrichment_queries(user_dataset, tablename, query_function, **kwargs) - expected_queries = ['''SELECT data_table.enrichment_id, avg(CRMCYBURG *\ + expected_queries = ['''SELECT data_table.enrichment_id, avg(enrichment_table.CRMCYBURG *\ (ST_Area(ST_Intersection(enrichment_geo_table.geom, data_table.{geometry_column}))\ / ST_area(data_table.{geometry_column}))) as CRMCYBURG FROM `carto-do-customers.{user_dataset}.ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018`\ From ab9c05e6e0b2b101ca77354bc6cefc61956caa9c Mon Sep 17 00:00:00 2001 From: Alejandro Hall Date: Mon, 14 Oct 2019 22:36:30 -0400 Subject: [PATCH 08/21] Fix test --- test/data/enrichment/test_service.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test/data/enrichment/test_service.py b/test/data/enrichment/test_service.py index f03d41905..9ba353924 100644 --- a/test/data/enrichment/test_service.py +++ b/test/data/enrichment/test_service.py @@ -79,9 +79,9 @@ def test_enrichment_query_by_points_one_variable(self): enrichment_table.CRMCYBURG, ST_Area(enrichment_geo_table.geom) AS view_ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018_area FROM `carto-do-customers.{user_dataset}\ - .ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018` enrichment_table + .view_ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018` enrichment_table JOIN `carto-do-customers.{user_dataset}\ - .ags_geography_usa_blockgroup_2015` enrichment_geo_table + .view_ags_geography_usa_blockgroup_2015` enrichment_geo_table ON enrichment_table.geoid = enrichment_geo_table.geoid JOIN `carto-do-customers.{user_dataset}.{tablename}` data_table ON ST_Within(data_table.{geometry_column}, enrichment_geo_table.geom) @@ -113,9 +113,9 @@ def test_enrichment_query_by_points_two_variables(self): enrichment_table.CRMCYBURG, ST_Area(enrichment_geo_table.geom) AS view_ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018_area, FROM `carto-do-customers.{user_dataset}\ - .ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018` enrichment_table + .view_ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018` enrichment_table JOIN `carto-do-customers.{user_dataset}\ - .ags_geography_usa_blockgroup_2015` enrichment_geo_table + .view_ags_geography_usa_blockgroup_2015` enrichment_geo_table ON enrichment_table.geoid = enrichment_geo_table.geoid JOIN `carto-do-customers.{user_dataset}.{tablename}` data_table ON ST_Within(data_table.{geometry_column}, enrichment_geo_table.geom) @@ -159,9 +159,9 @@ def test_enrichment_query_by_polygons_one_variable(self): expected_queries = ['''SELECT data_table.enrichment_id, avg(enrichment_table.CRMCYBURG *\ (ST_Area(ST_Intersection(enrichment_geo_table.geom, data_table.{geometry_column}))\ / ST_area(data_table.{geometry_column}))) as CRMCYBURG - FROM `carto-do-customers.{user_dataset}.ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018`\ + FROM `carto-do-customers.{user_dataset}.view_ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018`\ enrichment_table - JOIN `carto-do-customers.{user_dataset}.ags_geography_usa_blockgroup_2015` enrichment_geo_table + JOIN `carto-do-customers.{user_dataset}.view_ags_geography_usa_blockgroup_2015` enrichment_geo_table ON enrichment_table.geoid = enrichment_geo_table.geoid JOIN `carto-do-customers.{user_dataset}.{tablename}` data_table ON ST_Intersects(data_table.{geometry_column}, enrichment_geo_table.geom) @@ -195,9 +195,9 @@ def test_enrichment_query_by_polygons_two_variables(self): expected_queries = ['''SELECT data_table.enrichment_id, avg(enrichment_table.CRMCYBURG *\ (ST_Area(ST_Intersection(enrichment_geo_table.geom, data_table.{geometry_column}))\ / ST_area(data_table.{geometry_column}))) as CRMCYBURG - FROM `carto-do-customers.{user_dataset}.ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018`\ + FROM `carto-do-customers.{user_dataset}.view_ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018`\ enrichment_table - JOIN `carto-do-customers.{user_dataset}.ags_geography_usa_blockgroup_2015` enrichment_geo_table + JOIN `carto-do-customers.{user_dataset}.view_ags_geography_usa_blockgroup_2015` enrichment_geo_table ON enrichment_table.geoid = enrichment_geo_table.geoid JOIN `carto-do-customers.{user_dataset}.{tablename}` data_table ON ST_Intersects(data_table.{geometry_column}, enrichment_geo_table.geom) @@ -206,9 +206,9 @@ def test_enrichment_query_by_polygons_two_variables(self): SELECT data_table.enrichment_id, avg(ticket_size_score *\ (ST_Area(ST_Intersection(enrichment_geo_table.geom, data_table.{geometry_column}))\ / ST_area(data_table.{geometry_column}))) as ticket_size_score - FROM `carto-do-customers.{user_dataset}.mastercard_financial_mrli_usa_blockgroup_2019_monthly_2019`\ + FROM `carto-do-customers.{user_dataset}.view_mastercard_financial_mrli_usa_blockgroup_2019_monthly_2019`\ enrichment_table - JOIN `carto-do-customers.{user_dataset}.mastercard_geography_usa_blockgroup_2019` enrichment_geo_table + JOIN `carto-do-customers.{user_dataset}.view_mastercard_geography_usa_blockgroup_2019` enrichment_geo_table ON enrichment_table.geoid = enrichment_geo_table.geoid JOIN `carto-do-customers.{user_dataset}.{tablename}` data_table ON ST_Intersects(data_table.{geometry_column}, enrichment_geo_table.geom) From 17f79a540cb6a2f00807490e5cb4a90e26aab8b7 Mon Sep 17 00:00:00 2001 From: Alejandro Hall Date: Mon, 14 Oct 2019 22:55:28 -0400 Subject: [PATCH 09/21] Fix test --- test/data/enrichment/test_service.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/data/enrichment/test_service.py b/test/data/enrichment/test_service.py index 9ba353924..7a642a85f 100644 --- a/test/data/enrichment/test_service.py +++ b/test/data/enrichment/test_service.py @@ -111,7 +111,7 @@ def test_enrichment_query_by_points_two_variables(self): expected_queries = ['''SELECT data_table.enrichment_id, enrichment_table.CRMCYBURG, - ST_Area(enrichment_geo_table.geom) AS view_ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018_area, + ST_Area(enrichment_geo_table.geom) AS view_ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018_area FROM `carto-do-customers.{user_dataset}\ .view_ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018` enrichment_table JOIN `carto-do-customers.{user_dataset}\ @@ -121,7 +121,7 @@ def test_enrichment_query_by_points_two_variables(self): ON ST_Within(data_table.{geometry_column}, enrichment_geo_table.geom) WHERE a='b';''', ''' SELECT data_table.enrichment_id, - ticket_size_score, + enrichment_table.ticket_size_score, ST_Area(enrichment_geo_table.geom) AS ticket_size_score_area, NULL AS ticket_size_score_population FROM `carto-do-customers.{user_dataset}\ @@ -203,7 +203,7 @@ def test_enrichment_query_by_polygons_two_variables(self): ON ST_Intersects(data_table.{geometry_column}, enrichment_geo_table.geom) WHERE a='b' group by data_table.enrichment_id;''', ''' - SELECT data_table.enrichment_id, avg(ticket_size_score *\ + SELECT data_table.enrichment_id, avg(enrichment_table.ticket_size_score *\ (ST_Area(ST_Intersection(enrichment_geo_table.geom, data_table.{geometry_column}))\ / ST_area(data_table.{geometry_column}))) as ticket_size_score FROM `carto-do-customers.{user_dataset}.view_mastercard_financial_mrli_usa_blockgroup_2019_monthly_2019`\ From 379685090f87e1f0c18127853b0a10bf43401969 Mon Sep 17 00:00:00 2001 From: Alejandro Hall Date: Mon, 14 Oct 2019 23:02:29 -0400 Subject: [PATCH 10/21] Fix test --- test/data/enrichment/test_service.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/data/enrichment/test_service.py b/test/data/enrichment/test_service.py index 7a642a85f..ff35a30f7 100644 --- a/test/data/enrichment/test_service.py +++ b/test/data/enrichment/test_service.py @@ -122,8 +122,7 @@ def test_enrichment_query_by_points_two_variables(self): WHERE a='b';''', ''' SELECT data_table.enrichment_id, enrichment_table.ticket_size_score, - ST_Area(enrichment_geo_table.geom) AS ticket_size_score_area, - NULL AS ticket_size_score_population + ST_Area(enrichment_geo_table.geom) AS view_ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018_area, FROM `carto-do-customers.{user_dataset}\ .mastercard_financial_mrli_usa_blockgroup_2019_monthly_2019` enrichment_table JOIN `carto-do-customers.{user_dataset}\ From f5c71bc79a521fc118e09bd016479b66de794476 Mon Sep 17 00:00:00 2001 From: Alejandro Hall Date: Mon, 14 Oct 2019 23:08:36 -0400 Subject: [PATCH 11/21] Fix test --- test/data/enrichment/test_service.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/data/enrichment/test_service.py b/test/data/enrichment/test_service.py index ff35a30f7..78827ec90 100644 --- a/test/data/enrichment/test_service.py +++ b/test/data/enrichment/test_service.py @@ -122,11 +122,11 @@ def test_enrichment_query_by_points_two_variables(self): WHERE a='b';''', ''' SELECT data_table.enrichment_id, enrichment_table.ticket_size_score, - ST_Area(enrichment_geo_table.geom) AS view_ags_demographics_crimerisk_usa_blockgroup_2015_yearly_2018_area, + ST_Area(enrichment_geo_table.geom) AS view_mastercard_financial_mrli_usa_blockgroup_2019_monthly_2019_area, FROM `carto-do-customers.{user_dataset}\ - .mastercard_financial_mrli_usa_blockgroup_2019_monthly_2019` enrichment_table + .view_mastercard_financial_mrli_usa_blockgroup_2019_monthly_2019` enrichment_table JOIN `carto-do-customers.{user_dataset}\ - .mastercard_geography_usa_blockgroup_2019` enrichment_geo_table + .view_mastercard_geography_usa_blockgroup_2019` enrichment_geo_table ON enrichment_table.geoid = enrichment_geo_table.geoid JOIN `carto-do-customers.{user_dataset}.{tablename}` data_table ON ST_Within(data_table.{geometry_column}, enrichment_geo_table.geom) From ab02cdd758fee5e8a3328c813b834ee4aec935ec Mon Sep 17 00:00:00 2001 From: Alejandro Hall Date: Mon, 14 Oct 2019 23:15:56 -0400 Subject: [PATCH 12/21] Fix test --- test/data/enrichment/test_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/data/enrichment/test_service.py b/test/data/enrichment/test_service.py index 78827ec90..56767353f 100644 --- a/test/data/enrichment/test_service.py +++ b/test/data/enrichment/test_service.py @@ -122,7 +122,7 @@ def test_enrichment_query_by_points_two_variables(self): WHERE a='b';''', ''' SELECT data_table.enrichment_id, enrichment_table.ticket_size_score, - ST_Area(enrichment_geo_table.geom) AS view_mastercard_financial_mrli_usa_blockgroup_2019_monthly_2019_area, + ST_Area(enrichment_geo_table.geom) AS view_mastercard_financial_mrli_usa_blockgroup_2019_monthly_2019_area FROM `carto-do-customers.{user_dataset}\ .view_mastercard_financial_mrli_usa_blockgroup_2019_monthly_2019` enrichment_table JOIN `carto-do-customers.{user_dataset}\ From 58df0b56b91a0c43a0c88c3913caba9ff0b04b47 Mon Sep 17 00:00:00 2001 From: Alejandro Hall Date: Tue, 15 Oct 2019 13:11:37 -0400 Subject: [PATCH 13/21] Remove unused module --- .../data/enrichment/enrichment_utils.py | 74 ------------------- 1 file changed, 74 deletions(-) delete mode 100644 cartoframes/data/enrichment/enrichment_utils.py diff --git a/cartoframes/data/enrichment/enrichment_utils.py b/cartoframes/data/enrichment/enrichment_utils.py deleted file mode 100644 index 41d6eefb9..000000000 --- a/cartoframes/data/enrichment/enrichment_utils.py +++ /dev/null @@ -1,74 +0,0 @@ -from __future__ import absolute_import - -import pandas as pd -import geopandas as gpd - -from ..dataset.dataset import Dataset -from collections import defaultdict -from ...exceptions import EnrichmentException - - -def copy_data_and_generate_enrichment_id(data, enrichment_id_column, geometry_column): - - if isinstance(data, Dataset): - data = data.dataframe - - data_copy = data.copy() - data_copy[enrichment_id_column] = range(data_copy.shape[0]) - - if isinstance(data_copy, gpd.GeoDataFrame): - data_copy[geometry_column] = data_copy[geometry_column].apply(lambda geometry: geometry.wkt) - - return data_copy - - -def process_filters(filters_dict): - filters = '' - # TODO: Add data table ref in fields of filters - if filters_dict: - filters_list = list() - - for key, value in filters_dict.items(): - filters_list.append('='.join(["{}".format(key), "'{}'".format(value)])) - - filters = ' AND '.join(filters_list) - filters = 'WHERE {filters}'.format(filters=filters) - - return filters - - -def get_tables_and_variables(variables): - - if isinstance(variables, pd.Series): - variables_id = [variables['id']] - elif isinstance(variables, pd.DataFrame): - variables_id = variables['id'].tolist() - else: - raise EnrichmentException('Variable(s) to enrich should be an instance of Series or DataFrame') - - table_to_variables = __process_enrichment_variables(variables_id) - table_data_enrichment = list(table_to_variables.keys()).pop() - table_geo_enrichment = __get_name_geotable_from_datatable(table_data_enrichment) - variables_list = list(table_to_variables.values()).pop() - - return table_data_enrichment, table_geo_enrichment, variables_list - - -def __process_enrichment_variables(variables): - table_to_variables = defaultdict(list) - - for variable in variables: - variable_split = variable.split('.') - table, variable = variable_split[-2], variable_split[-1] - - table_to_variables[table].append(variable) - - return table_to_variables - - -def __get_name_geotable_from_datatable(datatable): - datatable_split = datatable.split('_') - geo_information = datatable_split[2:5] - geotable = 'geography_{geo_information_joined}'.format(geo_information_joined='_'.join(geo_information)) - - return geotable From f7d54d0a9a8136040b153fd98bbe2effc2598b61 Mon Sep 17 00:00:00 2001 From: Alejandro Hall Date: Tue, 15 Oct 2019 14:06:38 -0400 Subject: [PATCH 14/21] Rename catalog dataset --- cartoframes/data/enrichment/enrichment_service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cartoframes/data/enrichment/enrichment_service.py b/cartoframes/data/enrichment/enrichment_service.py index dc4c76baa..f4f4a95ee 100644 --- a/cartoframes/data/enrichment/enrichment_service.py +++ b/cartoframes/data/enrichment/enrichment_service.py @@ -9,7 +9,7 @@ from ...auth import get_default_credentials from ...utils.geom_utils import _compute_geometry_from_geom from ..observatory import Variable -from ..observatory import Dataset as DatasetCatalog +from ..observatory import CatalogDataset _ENRICHMENT_ID = 'enrichment_id' @@ -213,7 +213,7 @@ def __process_enrichment_variables(variables, user_dataset): def __get_properties_geotable(variable): - geography_id = DatasetCatalog.get(variable.dataset).geography + geography_id = CatalogDataset.get(variable.dataset).geography _, geo_dataset, geo_table = geography_id.split('.') From 32b1545445a8690c8d0058b347c8570579eecde9 Mon Sep 17 00:00:00 2001 From: Alejandro Hall Date: Mon, 21 Oct 2019 14:04:39 +0200 Subject: [PATCH 15/21] Use variable get_list feature instead making one request for every variable --- cartoframes/data/enrichment/enrichment_service.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cartoframes/data/enrichment/enrichment_service.py b/cartoframes/data/enrichment/enrichment_service.py index f4f4a95ee..8a91e586c 100644 --- a/cartoframes/data/enrichment/enrichment_service.py +++ b/cartoframes/data/enrichment/enrichment_service.py @@ -123,7 +123,6 @@ def __copy_data_and_generate_enrichment_id(data, enrichment_id_column, geometry_ def __process_variables(variables, is_polygon_enrichment): - variables_result = list() if isinstance(variables, Variable): variables_result = [variables] @@ -133,7 +132,7 @@ def __process_variables(variables, is_polygon_enrichment): first_element = variables[0] if isinstance(first_element, str): - variables_result = [Variable.get(variable) for variable in variables] + variables_result = Variable.get_list(variables) else: variables_result = variables else: From 23e50ea030dbdb25d9679b7fdfb9abac65ef8af6 Mon Sep 17 00:00:00 2001 From: Alejandro Hall Date: Tue, 22 Oct 2019 11:05:28 +0200 Subject: [PATCH 16/21] Fix bugs regarding polygons enrichment and enhacements in points enrichment --- .../data/enrichment/enrichment_service.py | 17 +++++++++++++---- .../data/enrichment/points_enrichment.py | 6 +++--- .../data/enrichment/polygons_enrichment.py | 4 ++-- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/cartoframes/data/enrichment/enrichment_service.py b/cartoframes/data/enrichment/enrichment_service.py index 8a91e586c..247e6d3ea 100644 --- a/cartoframes/data/enrichment/enrichment_service.py +++ b/cartoframes/data/enrichment/enrichment_service.py @@ -160,11 +160,20 @@ def __process_filters(filters_dict): def __process_agg_operators(agg_operators, variables): - agg_operators_result = agg_operators.copy() + if isinstance(agg_operators, str): + agg_operators_result = dict() - for variable in variables: - if variable.column_name not in agg_operators_result: - agg_operators_result[variable.column_name] = variable.agg_method + for variable in variables: + agg_operators_result[variable.column_name] = agg_operators + + elif isinstance(agg_operators, dict): + agg_operators_result = agg_operators.copy() + + for variable in variables: + if variable.column_name not in agg_operators_result: + agg_operators_result[variable.column_name] = variable.agg_method + else: + raise EnrichmentException('agg_operators param must be a string or a dict') return agg_operators_result diff --git a/cartoframes/data/enrichment/points_enrichment.py b/cartoframes/data/enrichment/points_enrichment.py index dfe9c4326..abf452dc4 100644 --- a/cartoframes/data/enrichment/points_enrichment.py +++ b/cartoframes/data/enrichment/points_enrichment.py @@ -103,9 +103,9 @@ def _prepare_sql(enrichment_id, filters_processed, table_to_geotable, table_to_v JOIN `{working_project}.{user_dataset}.{data_table}` data_table ON ST_Within(data_table.{data_geom_column}, enrichment_geo_table.geom) {filters}; - '''.format(enrichment_id=enrichment_id, variables_underscored='_'.join(variables), - enrichment_table=table, enrichment_geo_table=table_to_geotable[table], - user_dataset=user_dataset, working_project=working_project, data_table=data_table, + '''.format(enrichment_id=enrichment_id, enrichment_table=table, + enrichment_geo_table=table_to_geotable[table], user_dataset=user_dataset, + working_project=working_project, data_table=data_table, data_geom_column=kwargs['data_geom_column'], filters=filters_processed, project=table_to_project[table], dataset=table_to_dataset[table], variables=', '.join(['enrichment_table.{}'.format(variable) for variable in variables])) diff --git a/cartoframes/data/enrichment/polygons_enrichment.py b/cartoframes/data/enrichment/polygons_enrichment.py index ac1471ebe..c0c9ce22e 100644 --- a/cartoframes/data/enrichment/polygons_enrichment.py +++ b/cartoframes/data/enrichment/polygons_enrichment.py @@ -3,7 +3,7 @@ from .enrichment_service import enrich -def enrich_polygons(data, variables, agg_operators, data_geom_column='geometry', +def enrich_polygons(data, variables, agg_operators=dict(), data_geom_column='geometry', filters=dict(), credentials=None): """enrich_polygons @@ -61,7 +61,7 @@ def enrich_polygons(data, variables, agg_operators, data_geom_column='geometry', filters = {'do_date': '2019-09-01'} dataset_enrich = enrichment.enrich_polygons(dataset, variables, filters) - + Enrich a polygons dataset with custom aggregation methods: From fec43d9909a27c7589384bc2d04128caaf817bf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs=20Arroyo=20Torrens?= Date: Wed, 23 Oct 2019 10:42:21 +0200 Subject: [PATCH 17/21] Extract imports in decode_geometry --- cartoframes/utils/geom_utils.py | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/cartoframes/utils/geom_utils.py b/cartoframes/utils/geom_utils.py index 108af0b81..d1977b0aa 100644 --- a/cartoframes/utils/geom_utils.py +++ b/cartoframes/utils/geom_utils.py @@ -1,11 +1,13 @@ import re import sys import binascii as ba -from warnings import warn -from copy import deepcopy import geojson import geopandas +from copy import deepcopy +from warnings import warn +from shapely import wkb, wkt, geometry, geos + from carto.exceptions import CartoException from ..lib import context @@ -101,7 +103,6 @@ def _compute_geometry_from_geom(geom_column): def _compute_geometry_from_latlng(lat, lng): - from shapely import geometry return [geometry.Point(xy) for xy in zip(lng, lat)] @@ -147,9 +148,7 @@ def detect_encoding_type(input_geom): - ENC_WKT: 'POINT (1234 5789)' - ENC_EWKT: 'SRID=4326;POINT (1234 5789)' """ - from shapely.geometry.base import BaseGeometry - - if isinstance(input_geom, BaseGeometry): + if isinstance(input_geom, geometry.base.BaseGeometry): return ENC_SHAPELY if isinstance(input_geom, str): @@ -179,28 +178,24 @@ def detect_encoding_type(input_geom): def _load_wkb(geom): """Load WKB or EWKB geometry.""" - from shapely.wkb import loads - return loads(geom) + return wkb.loads(geom) def _load_wkb_hex(geom): """Load WKB_HEX or EWKB_HEX geometry.""" - from shapely.wkb import loads - return loads(geom, hex=True) + return wkb.loads(geom, hex=True) def _load_wkb_bhex(geom): """Load WKB_BHEX or EWKB_BHEX geometry. The geom must be converted to WKB/EWKB before loading. """ - from shapely.wkb import loads - return loads(ba.unhexlify(geom)) + return wkb.loads(ba.unhexlify(geom)) def _load_wkt(geom): """Load WKT geometry.""" - from shapely.wkt import loads - return loads(geom) + return wkt.loads(geom) def _load_ewkt(egeom): @@ -210,8 +205,7 @@ def _load_ewkt(egeom): srid, geom = _extract_srid(egeom) ogeom = _load_wkt(geom) if srid: - from shapely.geos import lgeos - lgeos.GEOSSetSRID(ogeom._geom, int(srid)) + geos.lgeos.GEOSSetSRID(ogeom._geom, int(srid)) return ogeom @@ -235,9 +229,8 @@ def wkt_to_geojson(wkt): def geojson_to_wkt(geojson_str): - from shapely.geometry import shape geojson_geom = geojson.loads(geojson_str) - wkt_geometry = shape(geojson_geom) + wkt_geometry = geometry.shape(geojson_geom) shapely_geom = _load_wkt(wkt_geometry.wkt) From b64546111734460d81f3e6c6116ab355163bdfe1 Mon Sep 17 00:00:00 2001 From: elenatorro Date: Wed, 23 Oct 2019 11:51:22 +0200 Subject: [PATCH 18/21] Add information about DO 2.0 --- docs/includes/data_enrichment.rst | 7 ++++++- docs/includes/data_observatory.rst | 4 ++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/docs/includes/data_enrichment.rst b/docs/includes/data_enrichment.rst index 0562e13d1..6685c8509 100644 --- a/docs/includes/data_enrichment.rst +++ b/docs/includes/data_enrichment.rst @@ -2,7 +2,12 @@ Data Enrichment --------------- With CARTOframes it is possible to enrich your data by using our `Data Observatory <#data-observatory>`__ -Catalog through the enrichment methods. +Catalogue through the enrichment methods. + +**Important:** Since the data enrichment depends on the Data Observatory 2.0, +is only accessible to selected CARTO Enterprise users in a private beta. +We're still open to more beta testers, so if you're interested, +`please get in touch. <#https://carto.com/contact/?campaign_details=data-observatory-betatester>`__ .. automodule:: cartoframes.data.enrichment :members: diff --git a/docs/includes/data_observatory.rst b/docs/includes/data_observatory.rst index 2cc64a419..d6e43f8a0 100644 --- a/docs/includes/data_observatory.rst +++ b/docs/includes/data_observatory.rst @@ -1,6 +1,10 @@ Data Observatory ---------------- +**Important:** The new Data Observatory 2.0 is accessible to selected CARTO Enterprise +users in a private beta. We're still open to more beta testers, so if you're interested, +`please get in touch. <#https://carto.com/contact/?campaign_details=data-observatory-betatester>`__ + .. automodule:: cartoframes.data.observatory :members: :member-order: bysource From 752847de4084b4dfdc6c1164fb187edbdc874155 Mon Sep 17 00:00:00 2001 From: elenatorro Date: Wed, 23 Oct 2019 11:55:46 +0200 Subject: [PATCH 19/21] Rearrange args --- .../data/enrichment/points_enrichment.py | 44 +++++++-------- .../data/enrichment/polygons_enrichment.py | 54 +++++++++---------- 2 files changed, 48 insertions(+), 50 deletions(-) diff --git a/cartoframes/data/enrichment/points_enrichment.py b/cartoframes/data/enrichment/points_enrichment.py index abf452dc4..691590e9b 100644 --- a/cartoframes/data/enrichment/points_enrichment.py +++ b/cartoframes/data/enrichment/points_enrichment.py @@ -12,6 +12,28 @@ def enrich_points(data, variables, data_geom_column='geometry', filters=dict(), your points with our geographies. Extra columns as area and population will be provided with the aims of normalize these columns. + Args: + data (:py:class:`Dataset `, DataFrame, GeoDataFrame): + a Dataset, DataFrame or GeoDataFrame object to be enriched. + variables (:py:class:`Variable `, CatalogList, list, str): + variable(s), discovered through Catalog, for enriching the `data` argument. + data_geom_column (str): string indicating the 4326 geometry column in `data`. + filters (dict, optional): dictionary with either a `column` key + with the name of the column to filter or a `value` value with the value to filter by. + Filters will be used using the `AND` operator + credentials (:py:class:`Credentials `, optional): + credentials of user account. If not provided, + a default credentials (if set with :py:meth:`set_default_credentials + `) will attempted to be + used. + + Returns: + A dataframe as the provided one but with the variables to enrich appended to it + + Note that if the geometry of the `data` you provide intersects with more than one geometry + in the enrichment dataset, the number of rows of the returned dataframe could be different + than the `data` argument number of rows. + Examples: Enrich a points dataset with Catalog classes: @@ -55,28 +77,6 @@ def enrich_points(data, variables, data_geom_column='geometry', filters=dict(), variables = Catalog().country('usa').category('demographics').datasets[0].variables filters = {'do_date': '2019-09-01'} dataset_enrich = enrichment.enrich_points(dataset, variables, filters) - - Args: - data (:py:class:`Dataset `, DataFrame, GeoDataFrame): - a Dataset, DataFrame or GeoDataFrame object to be enriched. - variables (:py:class:`Variable `, CatalogList, list, str): - variable(s), discovered through Catalog, for enriching the `data` argument. - data_geom_column (str): string indicating the 4326 geometry column in `data`. - filters (dict, optional): dictionary with either a `column` key - with the name of the column to filter or a `value` value with the value to filter by. - Filters will be used using the `AND` operator - credentials (:py:class:`Credentials `, optional): - credentials of user account. If not provided, - a default credentials (if set with :py:meth:`set_default_credentials - `) will attempted to be - used. - - Returns: - A dataframe as the provided one but with the variables to enrich appended to it - - Note that if the geometry of the `data` you provide intersects with more than one geometry - in the enrichment dataset, the number of rows of the returned dataframe could be different - than the `data` argument number of rows. """ data_enriched = enrich(_prepare_sql, data=data, variables=variables, data_geom_column=data_geom_column, diff --git a/cartoframes/data/enrichment/polygons_enrichment.py b/cartoframes/data/enrichment/polygons_enrichment.py index c0c9ce22e..7cd07bf0d 100644 --- a/cartoframes/data/enrichment/polygons_enrichment.py +++ b/cartoframes/data/enrichment/polygons_enrichment.py @@ -14,6 +14,32 @@ def enrich_polygons(data, variables, agg_operators=dict(), data_geom_column='geo dataset, the proportional part of the intersection will be used to interpolate the quantity of the polygon value intersected, aggregating them with the operator provided by `agg_operators` argument. + Args: + data (Dataset, DataFrame, GeoDataFrame): a Dataset, DataFrame or GeoDataFrame object to be enriched. + variables (Variable, CatalogList, list, str): variable(s), discovered through Catalog, + for enriching the `data` argument. + agg_operators (dict, str, None, optional): dictionary with either a `column` key + with the name of the column to aggregate or a `operator` value with the operator to group by. + If `agg_operators`' dictionary is empty (default argument value) then aggregation operators + will be retrieved from metadata column. + If `agg_operators` is a string then all columns will be aggregated by this operator. + If `agg_operators` is `None` then no aggregations will be computed. All the values which + data geometry intersects with will be returned. + data_geom_column (str): string indicating the 4326 geometry column in `data`. + filters (dict, optional): dictionary with either a `column` key + with the name of the column to filter or a `value` value with the value to filter by. + credentials (:py:class:`Credentials `, optional): + credentials of user account. If not provided, + a default credentials (if set with :py:meth:`set_default_credentials + `) will attempted to be + used. + + Returns: + A dataframe as the provided one but with the variables to enrich appended to it + + Note that if the geometry of the `data` you provide intersects with more than one geometry + in the enrichment dataset, the number of rows of the returned dataframe could be different + than the `data` argument number of rows. Examples: @@ -96,34 +122,6 @@ def enrich_polygons(data, variables, agg_operators=dict(), data_geom_column='geo agg_operators = None dataset_enrich = enrichment.enrich_polygons(dataset, variables, agg_operators=agg_operators) - - - Args: - data (Dataset, DataFrame, GeoDataFrame): a Dataset, DataFrame or GeoDataFrame object to be enriched. - variables (Variable, CatalogList, list, str): variable(s), discovered through Catalog, - for enriching the `data` argument. - agg_operators (dict, str, None, optional): dictionary with either a `column` key - with the name of the column to aggregate or a `operator` value with the operator to group by. - If `agg_operators`' dictionary is empty (default argument value) then aggregation operators - will be retrieved from metadata column. - If `agg_operators` is a string then all columns will be aggregated by this operator. - If `agg_operators` is `None` then no aggregations will be computed. All the values which - data geometry intersects with will be returned. - data_geom_column (str): string indicating the 4326 geometry column in `data`. - filters (dict, optional): dictionary with either a `column` key - with the name of the column to filter or a `value` value with the value to filter by. - credentials (:py:class:`Credentials `, optional): - credentials of user account. If not provided, - a default credentials (if set with :py:meth:`set_default_credentials - `) will attempted to be - used. - - Returns: - A dataframe as the provided one but with the variables to enrich appended to it - - Note that if the geometry of the `data` you provide intersects with more than one geometry - in the enrichment dataset, the number of rows of the returned dataframe could be different - than the `data` argument number of rows. """ data_enriched = enrich(_prepare_sql, data=data, variables=variables, agg_operators=agg_operators, From 57aee4b81e54376ed17e7c0830526008be7bc813 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs=20Arroyo=20Torrens?= Date: Wed, 23 Oct 2019 12:01:06 +0200 Subject: [PATCH 20/21] Use CustomJSONDecoder to convert a DataFrame --- cartoframes/utils/utils.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/cartoframes/utils/utils.py b/cartoframes/utils/utils.py index bf43674c0..da05b8d63 100644 --- a/cartoframes/utils/utils.py +++ b/cartoframes/utils/utils.py @@ -5,7 +5,9 @@ import os import re import sys +import json import base64 +import decimal import hashlib import requests import geopandas @@ -239,9 +241,16 @@ def get_geodataframe_bounds(data): return [[xmin, ymin], [xmax, ymax]] +class CustomJSONEncoder(json.JSONEncoder): + def default(self, o): + if isinstance(o, decimal.Decimal): + return float(o) + return super(CustomJSONEncoder, self).default(o) + + def encode_geodataframe(data): filtered_geometries = _filter_null_geometries(data) - data = _set_time_cols_epoc(filtered_geometries).to_json() + data = _set_time_cols_epoc(filtered_geometries).to_json(cls=CustomJSONEncoder) encoded_data = base64.b64encode(data.encode('utf-8')).decode('utf-8') return encoded_data From 41372bf831bd6d3b5f4291e8c92416fff1efcd8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs=20Arroyo=20Torrens?= Date: Wed, 23 Oct 2019 12:02:06 +0200 Subject: [PATCH 21/21] Improve legend exceptions --- cartoframes/viz/legend.py | 8 ++++---- test/viz/test_legend.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cartoframes/viz/legend.py b/cartoframes/viz/legend.py index 00834f68a..6aac014f6 100644 --- a/cartoframes/viz/legend.py +++ b/cartoframes/viz/legend.py @@ -114,15 +114,15 @@ def _get_prop(self, _type): def _check_type(self, _type): if _type and _type not in constants.LEGEND_TYPES: raise ValueError( - 'Legend type is not valid. Valid legend types are: {}.'.format( - ', '.join(constants.LEGEND_TYPES) + 'Legend type "{}" is not valid. Valid legend types are: {}.'.format( + ', '.join(_type, constants.LEGEND_TYPES) )) def _check_prop(self, _prop): if _prop and _prop not in constants.LEGEND_PROPERTIES: raise ValueError( - 'Legend property is not valid. Valid legend properties are: {}.'.format( - ', '.join(constants.LEGEND_PROPERTIES) + 'Legend property "{}" is not valid. Valid legend properties are: {}.'.format( + ', '.join(_prop, constants.LEGEND_PROPERTIES) )) def _infer_prop(self, _type): diff --git a/test/viz/test_legend.py b/test/viz/test_legend.py index d3b8dcd97..47d30c473 100644 --- a/test/viz/test_legend.py +++ b/test/viz/test_legend.py @@ -86,7 +86,7 @@ def test_wrong_input(self): def test_wrong_type(self): """Legend should raise an error if legend type is not valid""" - msg = 'Legend type is not valid. Valid legend types are: default, color-bins, ' + msg = 'Legend type "xxx" is not valid. Valid legend types are: default, color-bins, ' 'color-bins-line, color-bins-point, color-bins-polygon, color-category, ' 'color-category-line, color-category-point, color-category-polygon, ' 'color-continuous, color-continuous-line, color-continuous-point, ' @@ -97,7 +97,7 @@ def test_wrong_type(self): def test_wrong_prop(self): """Legend should raise an error if legend prop is not valid""" - msg = 'Legend property is not valid. Valid legend properties are: ' + msg = 'Legend property "xxx" is not valid. Valid legend properties are: ' 'color, strokeColor, width, strokeWidth.' with self.assertRaisesRegexp(ValueError, msg): Legend({'type': 'color-category', 'prop': 'xxx'}).get_info()