Skip to content

Commit

Permalink
Merge pull request #1014 from CartoDB/feature/962-pandas-integration
Browse files Browse the repository at this point in the history
Feature/962 pandas integration
  • Loading branch information
oleurud committed Sep 23, 2019
2 parents fa5e057 + 3a6a774 commit affad11
Show file tree
Hide file tree
Showing 25 changed files with 379 additions and 53 deletions.
15 changes: 14 additions & 1 deletion cartoframes/data/observatory/category.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pandas as pd

from cartoframes.exceptions import DiscoveryException
from .repository.category_repo import get_category_repo
from .repository.dataset_repo import get_dataset_repo

Expand All @@ -22,7 +23,15 @@ def get_by_id(category_id):
return get_category_repo().get_by_id(category_id)

def datasets(self):
return get_dataset_repo().get_by_category(self[_CATEGORY_ID_FIELD])
return get_dataset_repo().get_by_category(self._get_id())

def _get_id(self):
try:
return self[_CATEGORY_ID_FIELD]
except KeyError:
raise DiscoveryException('Unsupported function: this instance actually represents a subset of Categories '
'class. You should use `Categories.get_by_id("category_id")` to obtain a valid '
'instance of the Category class and then attempt this function on it.')

def __eq__(self, other):
return self.equals(other)
Expand All @@ -41,6 +50,10 @@ def _constructor(self):
def _constructor_sliced(self):
return Category

def __init__(self, data):
super(Categories, self).__init__(data)
self.set_index(_CATEGORY_ID_FIELD, inplace=True, drop=False)

@staticmethod
def get_all():
return get_category_repo().get_all()
Expand Down
17 changes: 15 additions & 2 deletions cartoframes/data/observatory/country.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pandas as pd

from cartoframes.exceptions import DiscoveryException
from .repository.geography_repo import get_geography_repo
from .repository.country_repo import get_country_repo
from .repository.dataset_repo import get_dataset_repo
Expand All @@ -22,10 +23,18 @@ def get_by_id(iso_code3):
return get_country_repo().get_by_id(iso_code3)

def datasets(self):
return get_dataset_repo().get_by_country(self[_COUNTRY_ID_FIELD])
return get_dataset_repo().get_by_country(self._get_id())

def geographies(self):
return get_geography_repo().get_by_country(self[_COUNTRY_ID_FIELD])
return get_geography_repo().get_by_country(self._get_id())

def _get_id(self):
try:
return self[_COUNTRY_ID_FIELD]
except KeyError:
raise DiscoveryException('Unsupported function: this instance actually represents a subset of Countries '
'class. You should use `Countries.get_by_id("country_id")` to obtain a valid '
'instance of the Country class and then attempt this function on it.')

def __eq__(self, other):
return self.equals(other)
Expand All @@ -44,6 +53,10 @@ def _constructor(self):
def _constructor_sliced(self):
return Country

def __init__(self, data):
super(Countries, self).__init__(data)
self.set_index(_COUNTRY_ID_FIELD, inplace=True, drop=False)

@staticmethod
def get_all():
return get_country_repo().get_all()
Expand Down
15 changes: 14 additions & 1 deletion cartoframes/data/observatory/dataset.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pandas as pd

from cartoframes.exceptions import DiscoveryException
from .repository.dataset_repo import get_dataset_repo
from .repository.variable_repo import get_variable_repo

Expand All @@ -21,7 +22,15 @@ def get_by_id(dataset_id):
return get_dataset_repo().get_by_id(dataset_id)

def variables(self):
return get_variable_repo().get_by_dataset(self[_DATASET_ID_FIELD])
return get_variable_repo().get_by_dataset(self._get_id())

def _get_id(self):
try:
return self[_DATASET_ID_FIELD]
except KeyError:
raise DiscoveryException('Unsupported function: this instance actually represents a subset of Datasets '
'class. You should use `Datasets.get_by_id("dataset_id")` to obtain a valid '
'instance of the Dataset class and then attempt this function on it.')

def __eq__(self, other):
return self.equals(other)
Expand All @@ -40,6 +49,10 @@ def _constructor(self):
def _constructor_sliced(self):
return Dataset

def __init__(self, data):
super(Datasets, self).__init__(data)
self.set_index(_DATASET_ID_FIELD, inplace=True, drop=False)

@staticmethod
def get_all():
return get_dataset_repo().get_all()
Expand Down
17 changes: 15 additions & 2 deletions cartoframes/data/observatory/geography.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import pandas as pd

from cartoframes.exceptions import DiscoveryException
from .repository.dataset_repo import get_dataset_repo
from .repository.geography_repo import get_geography_repo

_GEOGRAPHY_FIELD_ID = 'id'
_GEOGRAPHY_ID_FIELD = 'id'


class Geography(pd.Series):
Expand All @@ -21,7 +22,15 @@ def get_by_id(geography_id):
return get_geography_repo().get_by_id(geography_id)

def datasets(self):
return get_dataset_repo().get_by_geography(self[_GEOGRAPHY_FIELD_ID])
return get_dataset_repo().get_by_geography(self._get_id())

def _get_id(self):
try:
return self[_GEOGRAPHY_ID_FIELD]
except KeyError:
raise DiscoveryException('Unsupported function: this instance actually represents a subset of Geographies '
'class. You should use `Geographies.get_by_id("geography_id")` to obtain a valid '
'instance of the Geography class and then attempt this function on it.')

def __eq__(self, other):
return self.equals(other)
Expand All @@ -40,6 +49,10 @@ def _constructor(self):
def _constructor_sliced(self):
return Geography

def __init__(self, data):
super(Geographies, self).__init__(data)
self.set_index(_GEOGRAPHY_ID_FIELD, inplace=True, drop=False)

@staticmethod
def get_all():
return get_geography_repo().get_all()
Expand Down
15 changes: 14 additions & 1 deletion cartoframes/data/observatory/provider.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pandas as pd

from cartoframes.exceptions import DiscoveryException
from .repository.provider_repo import get_provider_repo
from .repository.dataset_repo import get_dataset_repo

Expand All @@ -22,7 +23,15 @@ def get_by_id(provider_id):
return get_provider_repo().get_by_id(provider_id)

def datasets(self):
return get_dataset_repo().get_by_provider(self[_PROVIDER_ID_FIELD])
return get_dataset_repo().get_by_provider(self._get_id())

def _get_id(self):
try:
return self[_PROVIDER_ID_FIELD]
except KeyError:
raise DiscoveryException('Unsupported function: this instance actually represents a subset of Providers '
'class. You should use `Providers.get_by_id("category_id")` to obtain a valid '
'instance of the Provider class and then attempt this function on it.')

def __eq__(self, other):
return self.equals(other)
Expand All @@ -41,6 +50,10 @@ def _constructor(self):
def _constructor_sliced(self):
return Provider

def __init__(self, data):
super(Providers, self).__init__(data)
self.set_index(_PROVIDER_ID_FIELD, inplace=True, drop=False)

@staticmethod
def get_all():
return get_provider_repo().get_all()
Expand Down
3 changes: 3 additions & 0 deletions cartoframes/data/observatory/repository/category_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ def _to_category(result):

@staticmethod
def _to_categories(results):
if len(results) == 0:
return None

from cartoframes.data.observatory.category import Categories

return Categories([CategoryRepository._to_category(result) for result in results])
Expand Down
3 changes: 3 additions & 0 deletions cartoframes/data/observatory/repository/country_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ def _to_country(result):

@staticmethod
def _to_countries(results):
if len(results) == 0:
return None

from cartoframes.data.observatory.country import Countries

return Countries([CountryRepository._to_country(result) for result in results])
Expand Down
3 changes: 3 additions & 0 deletions cartoframes/data/observatory/repository/dataset_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ def _to_dataset(result):

@staticmethod
def _to_datasets(results):
if len(results) == 0:
return None

from cartoframes.data.observatory.dataset import Datasets

return Datasets(DatasetRepository._to_dataset(result) for result in results)
Expand Down
3 changes: 3 additions & 0 deletions cartoframes/data/observatory/repository/geography_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ def _to_geography(result):

@staticmethod
def _to_geographies(results):
if len(results) == 0:
return None

from cartoframes.data.observatory.geography import Geographies

return Geographies(GeographyRepository._to_geography(result) for result in results)
Expand Down
3 changes: 3 additions & 0 deletions cartoframes/data/observatory/repository/provider_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ def _to_provider(result):

@staticmethod
def _to_providers(results):
if len(results) == 0:
return None

from cartoframes.data.observatory.provider import Providers

return Providers([ProviderRepository._to_provider(result) for result in results])
Expand Down
3 changes: 3 additions & 0 deletions cartoframes/data/observatory/repository/variable_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ def _to_variable(result):

@staticmethod
def _to_variables(results):
if len(results) == 0:
return None

from cartoframes.data.observatory.variable import Variables

return Variables([VariableRepository._to_variable(result) for result in results])
Expand Down
17 changes: 15 additions & 2 deletions cartoframes/data/observatory/variable.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import pandas as pd

from cartoframes.exceptions import DiscoveryException
from .repository.dataset_repo import get_dataset_repo
from .repository.variable_repo import get_variable_repo

_VARIABLE_FIELD_ID = 'id'
_VARIABLE_ID_FIELD = 'id'


class Variable(pd.Series):
Expand All @@ -21,7 +22,15 @@ def get_by_id(variable_id):
return get_variable_repo().get_by_id(variable_id)

def datasets(self):
return get_dataset_repo().get_by_variable(self[_VARIABLE_FIELD_ID])
return get_dataset_repo().get_by_variable(self._get_id())

def _get_id(self):
try:
return self[_VARIABLE_ID_FIELD]
except KeyError:
raise DiscoveryException('Unsupported function: this instance actually represents a subset of Variables '
'class. You should use `Variables.get_by_id("variable_id")` to obtain a valid '
'instance of the Variable class and then attempt this function on it.')

def __eq__(self, other):
return self.equals(other)
Expand All @@ -40,6 +49,10 @@ def _constructor(self):
def _constructor_sliced(self):
return Variable

def __init__(self, data):
super(Variables, self).__init__(data)
self.set_index(_VARIABLE_ID_FIELD, inplace=True, drop=False)

@staticmethod
def get_all():
return get_variable_repo().get_all()
Expand Down
16 changes: 16 additions & 0 deletions examples/07_catalog/discovery.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,22 @@
"isinstance(filtered_country, pd.Series)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can also use the id to access with loc, since the id corresponds to the DataFrame's index:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"countries.loc['spain']"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
6 changes: 4 additions & 2 deletions test/data/observatory/repository/test_category_repo.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import unittest

from cartoframes.exceptions import DiscoveryException
from cartoframes.data.observatory.category import Categories

from cartoframes.data.observatory.repository.category_repo import CategoryRepository
from cartoframes.data.observatory.repository.repo_client import RepoClient
Expand All @@ -28,6 +27,9 @@ def test_get_all(self, mocked_repo):
mocked_repo.assert_called_once_with()
assert categories == test_categories

id1 = db_category1['id']
assert categories.loc[id1] == test_category1

@patch.object(RepoClient, 'get_categories')
def test_get_all_when_empty(self, mocked_repo):
# Given
Expand All @@ -39,7 +41,7 @@ def test_get_all_when_empty(self, mocked_repo):

# Then
mocked_repo.assert_called_once_with()
assert categories == Categories([])
assert categories is None

@patch.object(RepoClient, 'get_categories')
def test_get_by_id(self, mocked_repo):
Expand Down
3 changes: 1 addition & 2 deletions test/data/observatory/repository/test_country_repo.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import unittest

from cartoframes.exceptions import DiscoveryException
from cartoframes.data.observatory.country import Countries

from cartoframes.data.observatory.repository.country_repo import CountryRepository
from cartoframes.data.observatory.repository.repo_client import RepoClient
Expand Down Expand Up @@ -37,7 +36,7 @@ def test_get_all_when_empty(self, mocked_repo):
countries = repo.get_all()

# Then
assert countries == Countries([])
assert countries is None

@patch.object(RepoClient, 'get_countries')
def test_get_by_id(self, mocked_repo):
Expand Down
3 changes: 1 addition & 2 deletions test/data/observatory/repository/test_dataset_repo.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import unittest

from cartoframes.exceptions import DiscoveryException
from cartoframes.data.observatory.dataset import Datasets

from cartoframes.data.observatory.repository.dataset_repo import DatasetRepository
from cartoframes.data.observatory.repository.repo_client import RepoClient
Expand Down Expand Up @@ -39,7 +38,7 @@ def test_get_all_when_empty(self, mocked_repo):

# Then
mocked_repo.assert_called_once_with()
assert datasets == Datasets([])
assert datasets is None

@patch.object(RepoClient, 'get_datasets')
def test_get_by_id(self, mocked_repo):
Expand Down
3 changes: 1 addition & 2 deletions test/data/observatory/repository/test_geography_repo.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import unittest

from cartoframes.exceptions import DiscoveryException
from cartoframes.data.observatory.geography import Geographies

from cartoframes.data.observatory.repository.geography_repo import GeographyRepository
from cartoframes.data.observatory.repository.repo_client import RepoClient
Expand Down Expand Up @@ -39,7 +38,7 @@ def test_get_all_when_empty(self, mocked_repo):

# Then
mocked_repo.assert_called_once_with()
assert geographies == Geographies([])
assert geographies is None

@patch.object(RepoClient, 'get_geographies')
def test_get_by_id(self, mocked_repo):
Expand Down
Loading

0 comments on commit affad11

Please sign in to comment.