Skip to content

Commit

Permalink
Merge pull request #1086 from CartoDB/1038-filter-by-slug
Browse files Browse the repository at this point in the history
Support filtering by slug
  • Loading branch information
alejandrohall committed Oct 10, 2019
2 parents 346a6f6 + 4afc1bf commit e7cfab5
Show file tree
Hide file tree
Showing 14 changed files with 156 additions and 39 deletions.
15 changes: 11 additions & 4 deletions cartoframes/data/observatory/catalog.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import absolute_import

from .entity import is_slug_value
from .category import Category
from .country import Country
from .geography import Geography
Expand Down Expand Up @@ -62,7 +63,7 @@ def country(self, country_id):
Args:
country_id (str):
Value for the column 'country_id' to be used when querying the Catalog.
Id value of the country to be used for filtering the Catalog.
Returns:
:py:class:`Catalog <cartoframes.data.observatory.catalog.Catalog>`
Expand All @@ -77,7 +78,7 @@ def category(self, category_id):
Args:
category_id (str):
Value for the column 'category_id' to be used when querying the Catalog.
Id value of the category to be used for filtering the Catalog.
Returns:
:py:class:`Catalog <cartoframes.data.observatory.catalog.Catalog>`
Expand All @@ -92,14 +93,20 @@ def geography(self, geography_id):
Args:
geography_id (str):
Value for the column 'geography_id' to be used when querying the Catalog
Id or slug value of the geography to be used for filtering the Catalog
Returns:
:py:class:`Catalog <cartoframes.data.observatory.catalog.Catalog>`
"""

self.filters[GEOGRAPHY_FILTER] = geography_id
filter_value = geography_id

if is_slug_value(geography_id):
geography = Geography.get(geography_id)
filter_value = geography.id

self.filters[GEOGRAPHY_FILTER] = filter_value
return self

def clear_filters(self):
Expand Down
4 changes: 4 additions & 0 deletions cartoframes/data/observatory/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ def _get_bigquery_client(project, credentials):
return BigQueryClient(project, credentials)


def is_slug_value(id_value):
return len(id_value.split('.')) == 1


class CatalogList(list):

def __init__(self, data):
Expand Down
4 changes: 2 additions & 2 deletions cartoframes/data/observatory/repository/entity_repo.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from cartoframes.exceptions import DiscoveryException
from cartoframes.data.observatory.entity import CatalogList
from cartoframes.data.observatory.entity import CatalogList, is_slug_value
from .repo_client import RepoClient

try:
Expand Down Expand Up @@ -47,7 +47,7 @@ def _get_filters(self, filters):
return cleaned_filters

def _get_id_filter(self, id_):
if self.slug_field is not None and len(id_.split('.')) == 1:
if self.slug_field is not None and is_slug_value(id_):
return {self.slug_field: id_}

return {self.id_field: id_}
Expand Down
26 changes: 13 additions & 13 deletions cartoframes/data/observatory/repository/repo_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,43 +19,43 @@ def set_user_credentials(self, credentials):
self._user_credentials = credentials or get_default_credentials()

def get_countries(self, filters=None):
query = 'SELECT DISTINCT view.country_id AS id FROM datasets_public view'
query = 'SELECT DISTINCT t.country_id AS id FROM datasets_public t'
return self._run_query(query, filters)

def get_categories(self, filters=None):
query = 'SELECT view.* FROM categories_public view'
query = 'SELECT t.* FROM categories_public t'
return self._run_query(query, filters)

def get_categories_joined_datasets(self, filters=None):
query = 'SELECT DISTINCT c.* FROM categories_public c, datasets_public view'
return self._run_query(query, filters, ['c.id = view.category_id'])
query = 'SELECT DISTINCT c.* FROM categories_public c, datasets_public t'
return self._run_query(query, filters, ['c.id = t.category_id'])

def get_providers(self, filters=None):
query = 'SELECT view.* FROM providers_public view'
query = 'SELECT t.* FROM providers_public t'
return self._run_query(query, filters)

def get_variables(self, filters=None):
query = 'SELECT view.* FROM variables_public view'
query = 'SELECT t.* FROM variables_public t'
return self._run_query(query, filters)

def get_variables_groups(self, filters=None):
query = 'SELECT view.* FROM variables_groups_public view'
query = 'SELECT t.* FROM variables_groups_public t'
return self._run_query(query, filters)

def get_geographies(self, filters=None):
query = 'SELECT view.* FROM geographies_public view'
query = 'SELECT t.* FROM geographies_public t'
return self._run_query(query, filters)

def get_geographies_joined_datasets(self, filters=None):
query = 'SELECT DISTINCT g.* FROM geographies_public g, datasets_public view'
return self._run_query(query, filters, ['g.id = view.geography_id'])
query = 'SELECT DISTINCT g.* FROM geographies_public g, datasets_public t'
return self._run_query(query, filters, ['g.id = t.geography_id'])

def get_datasets(self, filters=None):
query = 'SELECT view.* FROM datasets_public view'
query = 'SELECT t.* FROM datasets_public t'

extra_condition = []
if self._user_credentials is not None:
extra_condition.append('view.id IN ({})'.format(self._get_purchased_dataset_ids()))
extra_condition.append('t.id IN ({})'.format(self._get_purchased_dataset_ids()))

return self._run_query(query, filters, extra_condition)

Expand All @@ -72,7 +72,7 @@ def _compute_conditions(self, filters, extra_conditions):
conditions = extra_conditions or []

if filters is not None and len(filters) > 0:
conditions.extend(["view.{} = '{}'".format(key, value) for key, value in filters.items()])
conditions.extend(["t.{} = '{}'".format(key, value) for key, value in filters.items()])

return conditions

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@


_VARIABLE_GROUP_ID_FIELD = 'id'
_VARIABLE_GROUP_SLUG_FIELD = 'slug'
_ALLOWED_FILTERS = [DATASET_FILTER]


Expand All @@ -15,7 +16,8 @@ def get_variable_group_repo():
class VariableGroupRepository(EntityRepository):

def __init__(self):
super(VariableGroupRepository, self).__init__(_VARIABLE_GROUP_ID_FIELD, _ALLOWED_FILTERS)
super(VariableGroupRepository, self).__init__(_VARIABLE_GROUP_ID_FIELD, _ALLOWED_FILTERS,
_VARIABLE_GROUP_SLUG_FIELD)

def get_by_dataset(self, dataset_id):
return self._get_filtered_entities({DATASET_FILTER: dataset_id})
Expand All @@ -31,6 +33,7 @@ def _get_rows(self, filters=None):
def _map_row(self, row):
return {
'id': self._normalize_field(row, self.id_field),
'slug': self._normalize_field(row, 'slug'),
'name': self._normalize_field(row, 'name'),
'dataset_id': self._normalize_field(row, 'dataset_id'),
'starred': self._normalize_field(row, 'starred')
Expand Down
4 changes: 3 additions & 1 deletion cartoframes/data/observatory/repository/variable_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@


_VARIABLE_ID_FIELD = 'id'
_VARIABLE_SLUG_FIELD = 'slug'
_ALLOWED_DATASETS = [DATASET_FILTER, VARIABLE_GROUP_FILTER]


Expand All @@ -15,7 +16,7 @@ def get_variable_repo():
class VariableRepository(EntityRepository):

def __init__(self):
super(VariableRepository, self).__init__(_VARIABLE_ID_FIELD, _ALLOWED_DATASETS)
super(VariableRepository, self).__init__(_VARIABLE_ID_FIELD, _ALLOWED_DATASETS, _VARIABLE_SLUG_FIELD)

def get_by_dataset(self, dataset_id):
return self._get_filtered_entities({DATASET_FILTER: dataset_id})
Expand All @@ -34,6 +35,7 @@ def _get_rows(self, filters=None):
def _map_row(self, row):
return {
'id': self._normalize_field(row, self.id_field),
'slug': self._normalize_field(row, 'slug'),
'name': self._normalize_field(row, 'name'),
'description': self._normalize_field(row, 'description'),
'column_name': self._normalize_field(row, 'column_name'),
Expand Down
64 changes: 63 additions & 1 deletion examples/07_catalog/discovery.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,69 @@
"outputs": [],
"source": [
"catalog.clear_filters()\n",
"catalog.country('usa').category('demographics').geography('carto-do-public-data.tiger.geography_usa_schooldistrictunifiedclipped_2015').datasets"
"catalog.country('usa').category('demographics').geography('ags_blockgroup_1c63771c').datasets"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"SELECT t.* FROM datasets_public t WHERE t.country_id = 'usa' AND t.category_id = 'demographics' AND t.geography_id = 'carto-do-public-data.tiger.geography_usa_countyclipped_2015'\n"
]
},
{
"data": {
"text/plain": [
"[<Dataset('od_acs_1f614ee8')>,\n",
" <Dataset('od_acs_c2868f47')>,\n",
" <Dataset('od_acs_c1c86582')>,\n",
" <Dataset('od_bls_c2f65959')>,\n",
" <Dataset('od_acs_b581bfd1')>,\n",
" <Dataset('od_acsquantile_1fc24f44')>,\n",
" <Dataset('od_tiger_66b9092c')>,\n",
" <Dataset('od_acs_c5eb4b5e')>,\n",
" <Dataset('od_acsquantile_7985540b')>,\n",
" <Dataset('od_bls_c334336e')>,\n",
" <Dataset('od_bls_b29cadd6')>,\n",
" <Dataset('od_bls_2b95fc6c')>,\n",
" <Dataset('od_bls_5c92ccfa')>,\n",
" <Dataset('od_acs_5c10acf4')>,\n",
" <Dataset('od_acs_5b8fdefd')>,\n",
" <Dataset('od_acsquantile_16d4b47e')>,\n",
" <Dataset('od_acsquantile_55a55662')>,\n",
" <Dataset('od_acsquantile_7ee89012')>,\n",
" <Dataset('od_acsquantile_9efa084')>,\n",
" <Dataset('od_bls_b11879b8')>,\n",
" <Dataset('od_bls_b7d3bb53')>,\n",
" <Dataset('od_bls_2edaeae9')>,\n",
" <Dataset('od_bls_59ddda7f')>,\n",
" <Dataset('od_bls_c7b94fdc')>,\n",
" <Dataset('od_bls_b611d164')>,\n",
" <Dataset('od_bls_2f1880de')>,\n",
" <Dataset('od_bls_581fb048')>,\n",
" <Dataset('od_bls_c67b25eb')>,\n",
" <Dataset('od_bls_b35ec7e1')>,\n",
" <Dataset('od_bls_2a57965b')>,\n",
" <Dataset('od_bls_5d50a6cd')>,\n",
" <Dataset('od_bls_5ed472a3')>,\n",
" <Dataset('od_bls_c0b0e700')>,\n",
" <Dataset('od_bls_b0da138f')>,\n",
" <Dataset('od_bls_29d34235')>,\n",
" <Dataset('od_acs_550657ce')>]"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Catalog().country('usa').category('demographics').geography('carto-do-public-data.tiger.geography_usa_countyclipped_2015').datasets"
]
}
],
Expand Down
20 changes: 12 additions & 8 deletions test/data/observatory/examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

db_geography1 = {
'id': 'carto-do-public.tiger.geography_esp_census_2019',
'slug': 'geography_esp_census_2019',
'slug': 'esp_census_2019_4567890d',
'name': 'ESP - Census',
'description': 'Geography data for Spanish census',
'provider_id': 'bbva',
Expand All @@ -41,7 +41,7 @@
}
db_geography2 = {
'id': 'carto-do-public.tiger.geography_esp_municipalities_2019',
'slug': 'carto-do-public-esp-municipalities_2019',
'slug': 'esp_municipalities_2019_3456789c',
'name': 'ESP - Municipalities',
'description': 'Geography data for Spanish municipalities',
'provider_id': 'bbva',
Expand All @@ -59,7 +59,7 @@

db_dataset1 = {
'id': 'carto-do-public.project.basicstats-census',
'slug': 'carto-do-public-basicstats-census',
'slug': 'basicstats_census_1234567a',
'name': 'Basic Stats - Census',
'description': 'Basic stats on 2019 Spanish census',
'provider_id': 'bbva',
Expand All @@ -77,7 +77,7 @@
}
db_dataset2 = {
'id': 'carto-do-public.project.basicstats-municipalities',
'slug': 'carto-do-public-basicstats-municipalities',
'slug': 'basicstats_municipalities_2345678b',
'name': 'Basic Stats - Municipalities',
'description': 'Basic stats on 2019 Spanish municipalities',
'provider_id': 'bbva',
Expand All @@ -98,7 +98,8 @@
test_datasets = CatalogList([test_dataset1, test_dataset2])

db_variable1 = {
'id': 'var1',
'id': 'carto-do.variable.var1',
'slug': 'var1',
'name': 'Population',
'description': 'The number of people within each geography',
'column_name': 'pop',
Expand All @@ -110,7 +111,8 @@
'summary_jsonb': {}
}
db_variable2 = {
'id': 'var2',
'id': 'carto-do.variable.var2',
'slug': 'var2',
'name': 'Date',
'description': 'The date the data refers to (YYYY-MM format for month and YYYY-MM-DD for day).',
'column_name': 'Date',
Expand Down Expand Up @@ -138,13 +140,15 @@
test_providers = CatalogList([test_provider1, test_provider2])

db_variable_group1 = {
'id': 'vargroup1',
'id': 'carto-do.variable_group.vargroup1',
'slug': 'vargroup1',
'name': 'Population',
'dataset_id': 'dataset1',
'starred': True
}
db_variable_group2 = {
'id': 'vargroup2',
'id': 'carto-do.variable_group.vargroup2',
'slug': 'vargroup2',
'name': 'Date',
'dataset_id': 'dataset1',
'starred': False
Expand Down
4 changes: 2 additions & 2 deletions test/data/observatory/repository/test_category_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ def test_get_by_country(self, mocked_repo):
categories = repo.get_all({'country_id': country_code})

# Then
query = 'SELECT DISTINCT c.* FROM categories_public c, datasets_public view'
mocked_repo.assert_called_once_with(query, {'country_id': country_code}, ['c.id = view.category_id'])
query = 'SELECT DISTINCT c.* FROM categories_public c, datasets_public t'
mocked_repo.assert_called_once_with(query, {'country_id': country_code}, ['c.id = t.category_id'])
assert isinstance(categories, CatalogList)
assert categories == test_categories

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def test_missing_fields_are_mapped_as_None(self, mocked_repo):

expected_variables_groups = CatalogList([VariableGroup({
'id': 'variable_group1',
'slug': None,
'name': None,
'dataset_id': None,
'starred': None
Expand Down
15 changes: 15 additions & 0 deletions test/data/observatory/repository/test_variable_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,20 @@ def test_get_by_id_unknown_fails(self, mocked_repo):
with self.assertRaises(DiscoveryException):
repo.get_by_id(requested_id)

@patch.object(RepoClient, 'get_variables')
def test_get_by_slug(self, mocked_repo):
# Given
mocked_repo.return_value = [db_variable1]
requested_slug = db_variable1['slug']
repo = VariableRepository()

# When
variable = repo.get_by_id(requested_slug)

# Then
mocked_repo.assert_called_once_with({'slug': requested_slug})
assert variable == test_variable1

@patch.object(RepoClient, 'get_variables')
def test_get_by_dataset(self, mocked_repo):
# Given
Expand Down Expand Up @@ -106,6 +120,7 @@ def test_missing_fields_are_mapped_as_None(self, mocked_repo):

expected_variables = CatalogList([Variable({
'id': 'variable1',
'slug': None,
'name': None,
'description': None,
'column_name': None,
Expand Down

0 comments on commit e7cfab5

Please sign in to comment.