Skip to content

Commit

Permalink
Merge pull request #1606 from CartoDB/remove_bq_check
Browse files Browse the repository at this point in the history
Remove bq check
  • Loading branch information
Jesus89 committed Apr 3, 2020
2 parents 78b3238 + 67a413e commit c96383a
Show file tree
Hide file tree
Showing 8 changed files with 11 additions and 88 deletions.
15 changes: 1 addition & 14 deletions cartoframes/data/observatory/catalog/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,6 @@

from carto.do_dataset import DODataset
from ....utils.logger import log
from ....exceptions import DOError


_PLATFORM_BQ = 'bq'

_DATASET_READ_MSG = '''To load it as a DataFrame you can do:
Expand Down Expand Up @@ -37,7 +33,7 @@ class CatalogEntity(ABC):
"""
id_field = 'id'
_entity_repo = None
export_excluded_fields = ['summary_json', 'available_in', 'geom_coverage']
export_excluded_fields = ['summary_json', 'geom_coverage']

def __init__(self, data):
self.data = data
Expand Down Expand Up @@ -123,9 +119,6 @@ def _get_print_id(self):
return self.id

def _download(self, credentials, file_path=None, limit=None, order_by=None):
if not self._is_available_in('bq'):
raise DOError('{} is not ready for Download. Please, contact us for more information.'.format(self))

auth_client = credentials.get_api_key_auth_client()
rows = DODataset(auth_client=auth_client).name(self.id).download_stream(limit=limit, order_by=order_by)
if file_path:
Expand All @@ -142,9 +135,6 @@ def _download(self, credentials, file_path=None, limit=None, order_by=None):
dataframe = pd.read_csv(rows)
return dataframe

def _is_available_in(self, platform=_PLATFORM_BQ):
return self.data['available_in'] and platform in self.data['available_in']

def _get_remote_full_table_name(self, user_project, user_dataset, public_project):
project, dataset, table = self.id.split('.')

Expand Down Expand Up @@ -185,9 +175,6 @@ def to_dataframe(self):
"""
df = pd.DataFrame([item.data for item in self])

if 'available_in' in df:
del df['available_in']

if 'summary_json' in df:
del df['summary_json']

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,7 @@ def _map_row(self, row):
'update_frequency': self._normalize_field(row, 'update_frequency'),
'version': self._normalize_field(row, 'version'),
'is_public_data': self._normalize_field(row, 'is_public_data'),
'summary_json': self._normalize_field(row, 'summary_json'),
'available_in': self._normalize_field(row, 'available_in')
'summary_json': self._normalize_field(row, 'summary_json')
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,7 @@ def _map_row(self, row):
'update_frequency': self._normalize_field(row, 'update_frequency'),
'version': self._normalize_field(row, 'version'),
'is_public_data': self._normalize_field(row, 'is_public_data'),
'summary_json': self._normalize_field(row, 'summary_json'),
'available_in': self._normalize_field(row, 'available_in')
'summary_json': self._normalize_field(row, 'summary_json')
}

def get_geographies_gdf(self):
Expand Down
12 changes: 4 additions & 8 deletions tests/unit/data/observatory/catalog/examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@
'update_frequency': 'monthly',
'version': '20190203',
'is_public_data': True,
'summary_json': {},
'available_in': ['bq']
'summary_json': {}
}
db_geography2 = {
'id': 'carto-do-public.tiger.geography_esp_municipalities_2019',
Expand All @@ -56,8 +55,7 @@
'update_frequency': 'monthly',
'version': '20190203',
'is_public_data': False,
'summary_json': {},
'available_in': []
'summary_json': {}
}
test_geography1 = Geography(db_geography1)
test_geography2 = Geography(db_geography2)
Expand All @@ -83,8 +81,7 @@
'update_frequency': 'monthly',
'version': '20190203',
'is_public_data': True,
'summary_json': None,
'available_in': ['bq']
'summary_json': None
}
db_dataset2 = {
'id': 'carto-do-public.project.basicstats-municipalities',
Expand Down Expand Up @@ -122,8 +119,7 @@
'string': 1,
'integer': 1
}
},
'available_in': []
}
}
test_dataset1 = Dataset(db_dataset1)
test_dataset2 = Dataset(db_dataset2)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,7 @@ def test_missing_fields_are_mapped_as_None(self, mocked_repo):
'update_frequency': None,
'version': None,
'is_public_data': None,
'summary_json': None,
'available_in': None
'summary_json': None
})])

# When
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,7 @@ def test_missing_fields_are_mapped_as_None(self, mocked_repo):
'update_frequency': None,
'version': None,
'is_public_data': None,
'summary_json': None,
'available_in': None
'summary_json': None
})])

# When
Expand Down
21 changes: 1 addition & 20 deletions tests/unit/data/observatory/catalog/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def test_dataset_is_exported_as_series(self):
def test_dataset_is_exported_as_dict(self):
# Given
dataset = Dataset(db_dataset1)
excluded_fields = ['summary_json', 'available_in']
excluded_fields = ['summary_json']
expected_dict = {key: value for key, value in db_dataset1.items() if key not in excluded_fields}

# When
Expand Down Expand Up @@ -281,7 +281,6 @@ def test_datasets_are_exported_as_dataframe(self):
datasets = test_datasets
dataset = datasets[0]
expected_dataset_df = dataset.to_series()
del expected_dataset_df['available_in']
del expected_dataset_df['summary_json']

# When
Expand Down Expand Up @@ -521,21 +520,3 @@ def raise_exception(a, b, c):
'We are sorry, the Data Observatory is not enabled for your account yet. '
'Please contact your customer success manager or send an email to '
'sales@carto.com to request access to it.')

def test_dataset_is_available_in(self):
# Given
dataset_in_bq = Dataset(db_dataset1)
dataset_not_in_bq = Dataset(db_dataset2)

# Then
assert dataset_in_bq._is_available_in('bq')
assert not dataset_not_in_bq._is_available_in('bq')

def test_dataset_is_available_in_with_empty_field(self):
# Given
db_dataset = dict(db_dataset1)
db_dataset['available_in'] = None
dataset_null = Dataset(db_dataset)

# Then
assert not dataset_null._is_available_in('bq')
39 changes: 1 addition & 38 deletions tests/unit/data/observatory/catalog/test_geography.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def test_geography_is_exported_as_series(self):
def test_geography_is_exported_as_dict(self):
# Given
geography = Geography(db_geography1)
excluded_fields = ['summary_json', 'available_in', 'geom_coverage']
excluded_fields = ['summary_json', 'geom_coverage']
expected_dict = {key: value for key, value in db_geography1.items() if key not in excluded_fields}

# When
Expand Down Expand Up @@ -207,7 +207,6 @@ def test_geographies_are_exported_as_dataframe(self):
geographies = test_geographies
geography = geographies[0]
expected_geography_df = geography.to_series()
del expected_geography_df['available_in']
del expected_geography_df['summary_json']

# When
Expand All @@ -219,29 +218,6 @@ def test_geographies_are_exported_as_dataframe(self):
assert isinstance(sliced_geography, pd.Series)
assert sliced_geography.equals(expected_geography_df)

@patch.object(GeographyRepository, 'get_all')
@patch.object(GeographyRepository, 'get_by_id')
@patch.object(DODataset, 'download_stream')
def test_geography_not_available_in_bq_download_fails(self, download_stream_mock, get_by_id_mock, get_all_mock):
# mock geography
get_by_id_mock.return_value = test_geography2
geography = Geography.get(test_geography2.id)

# mock subscriptions
get_all_mock.return_value = [geography]

# mock big query client
download_stream_mock.return_value = []

# test
credentials = Credentials('fake_user', '1234')

with pytest.raises(Exception) as e:
geography.to_csv('fake_path', credentials)

error = '{} is not ready for Download. Please, contact us for more information.'.format(geography)
assert str(e.value) == error

@patch.object(GeographyRepository, 'get_all')
@patch.object(GeographyRepository, 'get_by_id')
@patch.object(DODataset, 'download_stream')
Expand Down Expand Up @@ -473,16 +449,3 @@ def raise_exception(a, b, c):
'We are sorry, the Data Observatory is not enabled for your account yet. '
'Please contact your customer success manager or send an email to '
'sales@carto.com to request access to it.')

def test_geography_is_available_in(self):
geography_in_bq = Geography(db_geography1)
geography_not_in_bq = Geography(db_geography2)

assert geography_in_bq._is_available_in('bq')
assert not geography_not_in_bq._is_available_in('bq')

def test_geography_is_available_in_with_empty_field(self):
db_geography = dict(db_geography1)
db_geography['available_in'] = None
geography_null = Geography(db_geography)
assert not geography_null._is_available_in('bq')

0 comments on commit c96383a

Please sign in to comment.