Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove bq check #1606

Merged
merged 5 commits into from
Apr 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
15 changes: 1 addition & 14 deletions cartoframes/data/observatory/catalog/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,6 @@

from carto.do_dataset import DODataset
from ....utils.logger import log
from ....exceptions import DOError


_PLATFORM_BQ = 'bq'

_DATASET_READ_MSG = '''To load it as a DataFrame you can do:

Expand Down Expand Up @@ -37,7 +33,7 @@ class CatalogEntity(ABC):
"""
id_field = 'id'
_entity_repo = None
export_excluded_fields = ['summary_json', 'available_in', 'geom_coverage']
export_excluded_fields = ['summary_json', 'geom_coverage']

def __init__(self, data):
self.data = data
Expand Down Expand Up @@ -123,9 +119,6 @@ def _get_print_id(self):
return self.id

def _download(self, credentials, file_path=None, limit=None, order_by=None):
if not self._is_available_in('bq'):
raise DOError('{} is not ready for Download. Please, contact us for more information.'.format(self))

auth_client = credentials.get_api_key_auth_client()
rows = DODataset(auth_client=auth_client).name(self.id).download_stream(limit=limit, order_by=order_by)
if file_path:
Expand All @@ -142,9 +135,6 @@ def _download(self, credentials, file_path=None, limit=None, order_by=None):
dataframe = pd.read_csv(rows)
return dataframe

def _is_available_in(self, platform=_PLATFORM_BQ):
return self.data['available_in'] and platform in self.data['available_in']

def _get_remote_full_table_name(self, user_project, user_dataset, public_project):
project, dataset, table = self.id.split('.')

Expand Down Expand Up @@ -185,9 +175,6 @@ def to_dataframe(self):
"""
df = pd.DataFrame([item.data for item in self])

if 'available_in' in df:
del df['available_in']

if 'summary_json' in df:
del df['summary_json']

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,7 @@ def _map_row(self, row):
'update_frequency': self._normalize_field(row, 'update_frequency'),
'version': self._normalize_field(row, 'version'),
'is_public_data': self._normalize_field(row, 'is_public_data'),
'summary_json': self._normalize_field(row, 'summary_json'),
'available_in': self._normalize_field(row, 'available_in')
'summary_json': self._normalize_field(row, 'summary_json')
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,7 @@ def _map_row(self, row):
'update_frequency': self._normalize_field(row, 'update_frequency'),
'version': self._normalize_field(row, 'version'),
'is_public_data': self._normalize_field(row, 'is_public_data'),
'summary_json': self._normalize_field(row, 'summary_json'),
'available_in': self._normalize_field(row, 'available_in')
'summary_json': self._normalize_field(row, 'summary_json')
}

def get_geographies_gdf(self):
Expand Down
12 changes: 4 additions & 8 deletions tests/unit/data/observatory/catalog/examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@
'update_frequency': 'monthly',
'version': '20190203',
'is_public_data': True,
'summary_json': {},
'available_in': ['bq']
'summary_json': {}
}
db_geography2 = {
'id': 'carto-do-public.tiger.geography_esp_municipalities_2019',
Expand All @@ -56,8 +55,7 @@
'update_frequency': 'monthly',
'version': '20190203',
'is_public_data': False,
'summary_json': {},
'available_in': []
'summary_json': {}
}
test_geography1 = Geography(db_geography1)
test_geography2 = Geography(db_geography2)
Expand All @@ -83,8 +81,7 @@
'update_frequency': 'monthly',
'version': '20190203',
'is_public_data': True,
'summary_json': None,
'available_in': ['bq']
'summary_json': None
}
db_dataset2 = {
'id': 'carto-do-public.project.basicstats-municipalities',
Expand Down Expand Up @@ -122,8 +119,7 @@
'string': 1,
'integer': 1
}
},
'available_in': []
}
}
test_dataset1 = Dataset(db_dataset1)
test_dataset2 = Dataset(db_dataset2)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,7 @@ def test_missing_fields_are_mapped_as_None(self, mocked_repo):
'update_frequency': None,
'version': None,
'is_public_data': None,
'summary_json': None,
'available_in': None
'summary_json': None
})])

# When
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,7 @@ def test_missing_fields_are_mapped_as_None(self, mocked_repo):
'update_frequency': None,
'version': None,
'is_public_data': None,
'summary_json': None,
'available_in': None
'summary_json': None
})])

# When
Expand Down
21 changes: 1 addition & 20 deletions tests/unit/data/observatory/catalog/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def test_dataset_is_exported_as_series(self):
def test_dataset_is_exported_as_dict(self):
# Given
dataset = Dataset(db_dataset1)
excluded_fields = ['summary_json', 'available_in']
excluded_fields = ['summary_json']
expected_dict = {key: value for key, value in db_dataset1.items() if key not in excluded_fields}

# When
Expand Down Expand Up @@ -281,7 +281,6 @@ def test_datasets_are_exported_as_dataframe(self):
datasets = test_datasets
dataset = datasets[0]
expected_dataset_df = dataset.to_series()
del expected_dataset_df['available_in']
del expected_dataset_df['summary_json']

# When
Expand Down Expand Up @@ -521,21 +520,3 @@ def raise_exception(a, b, c):
'We are sorry, the Data Observatory is not enabled for your account yet. '
'Please contact your customer success manager or send an email to '
'sales@carto.com to request access to it.')

def test_dataset_is_available_in(self):
# Given
dataset_in_bq = Dataset(db_dataset1)
dataset_not_in_bq = Dataset(db_dataset2)

# Then
assert dataset_in_bq._is_available_in('bq')
assert not dataset_not_in_bq._is_available_in('bq')

def test_dataset_is_available_in_with_empty_field(self):
# Given
db_dataset = dict(db_dataset1)
db_dataset['available_in'] = None
dataset_null = Dataset(db_dataset)

# Then
assert not dataset_null._is_available_in('bq')
39 changes: 1 addition & 38 deletions tests/unit/data/observatory/catalog/test_geography.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def test_geography_is_exported_as_series(self):
def test_geography_is_exported_as_dict(self):
# Given
geography = Geography(db_geography1)
excluded_fields = ['summary_json', 'available_in', 'geom_coverage']
excluded_fields = ['summary_json', 'geom_coverage']
expected_dict = {key: value for key, value in db_geography1.items() if key not in excluded_fields}

# When
Expand Down Expand Up @@ -207,7 +207,6 @@ def test_geographies_are_exported_as_dataframe(self):
geographies = test_geographies
geography = geographies[0]
expected_geography_df = geography.to_series()
del expected_geography_df['available_in']
del expected_geography_df['summary_json']

# When
Expand All @@ -219,29 +218,6 @@ def test_geographies_are_exported_as_dataframe(self):
assert isinstance(sliced_geography, pd.Series)
assert sliced_geography.equals(expected_geography_df)

@patch.object(GeographyRepository, 'get_all')
@patch.object(GeographyRepository, 'get_by_id')
@patch.object(DODataset, 'download_stream')
def test_geography_not_available_in_bq_download_fails(self, download_stream_mock, get_by_id_mock, get_all_mock):
# mock geography
get_by_id_mock.return_value = test_geography2
geography = Geography.get(test_geography2.id)

# mock subscriptions
get_all_mock.return_value = [geography]

# mock big query client
download_stream_mock.return_value = []

# test
credentials = Credentials('fake_user', '1234')

with pytest.raises(Exception) as e:
geography.to_csv('fake_path', credentials)

error = '{} is not ready for Download. Please, contact us for more information.'.format(geography)
assert str(e.value) == error

@patch.object(GeographyRepository, 'get_all')
@patch.object(GeographyRepository, 'get_by_id')
@patch.object(DODataset, 'download_stream')
Expand Down Expand Up @@ -473,16 +449,3 @@ def raise_exception(a, b, c):
'We are sorry, the Data Observatory is not enabled for your account yet. '
'Please contact your customer success manager or send an email to '
'sales@carto.com to request access to it.')

def test_geography_is_available_in(self):
geography_in_bq = Geography(db_geography1)
geography_not_in_bq = Geography(db_geography2)

assert geography_in_bq._is_available_in('bq')
assert not geography_not_in_bq._is_available_in('bq')

def test_geography_is_available_in_with_empty_field(self):
db_geography = dict(db_geography1)
db_geography['available_in'] = None
geography_null = Geography(db_geography)
assert not geography_null._is_available_in('bq')