Skip to content

Commit

Permalink
Merge pull request #1112 from CartoDB/alasarr/feature-1031-datasets-b…
Browse files Browse the repository at this point in the history
…y-geom

Alasarr/feature 1031 datasets by geom
  • Loading branch information
Jesus89 committed Oct 23, 2019
2 parents 35a0439 + 3ccf8f4 commit a66a09f
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 2 deletions.
8 changes: 8 additions & 0 deletions cartoframes/data/observatory/catalog.py
Expand Up @@ -141,3 +141,11 @@ def subscriptions(self, credentials=None):
CatalogDataset.get_all(_no_filters, _credentials),
Geography.get_all(_no_filters, _credentials)
)

def datasets_filter(self, filter_dataset):
"""Get all the datasets in the Catalog filtered
Returns:
:py:class:`Datasets <cartoframes.data.observatory.Datasets>`
"""

return CatalogDataset.get_datasets_spatial_filtered(filter_dataset)
40 changes: 40 additions & 0 deletions cartoframes/data/observatory/dataset.py
@@ -1,8 +1,13 @@
from __future__ import absolute_import

import pandas as pd
import geopandas as gpd
from shapely import wkt
from cartoframes.data import Dataset as CFDataset

from .entity import CatalogEntity
from .repository.dataset_repo import get_dataset_repo
from .repository.geography_repo import get_geography_repo
from .repository.variable_repo import get_variable_repo
from .repository.variable_group_repo import get_variable_group_repo
from .repository.constants import DATASET_FILTER
Expand Down Expand Up @@ -118,6 +123,41 @@ def download(self, credentials=None):

return self._download(credentials)

@classmethod
def get_datasets_spatial_filtered(cls, filter_dataset):
user_gdf = cls._get_user_geodataframe(filter_dataset)

# TODO: check if the dataframe has a geometry column if not exception
# Saving memory
user_gdf = user_gdf[[user_gdf.geometry.name]]
catalog_geographies_gdf = get_geography_repo().get_geographies_gdf()
matched_geographies_ids = cls._join_geographies_geodataframes(catalog_geographies_gdf, user_gdf)

# Get Dataset objects
return get_dataset_repo().get_all({'geography_id': matched_geographies_ids})

@staticmethod
def _get_user_geodataframe(filter_dataset):
if isinstance(filter_dataset, gpd.GeoDataFrame):
# Geopandas dataframe
return filter_dataset

if isinstance(filter_dataset, CFDataset):
# CARTOFrames Dataset
user_df = filter_dataset.download(decode_geom=True)
return gpd.GeoDataFrame(user_df, geometry='geometry')

if isinstance(filter_dataset, str):
# String WKT
df = pd.DataFrame([{'geometry': filter_dataset}])
df['geometry'] = df['geometry'].apply(wkt.loads)
return gpd.GeoDataFrame(df)

@staticmethod
def _join_geographies_geodataframes(geographies_gdf1, geographies_gdf2):
join_gdf = gpd.sjoin(geographies_gdf1, geographies_gdf2, how='inner', op='intersects')
return join_gdf['id'].unique()

def subscribe(self, credentials=None):
"""Subscribe to a Dataset.
Expand Down
7 changes: 7 additions & 0 deletions cartoframes/data/observatory/repository/dataset_repo.py
Expand Up @@ -3,6 +3,8 @@

from .constants import CATEGORY_FILTER, COUNTRY_FILTER, GEOGRAPHY_FILTER, PROVIDER_FILTER, VARIABLE_FILTER
from .entity_repo import EntityRepository
from ..entity import CatalogList



_DATASET_ID_FIELD = 'id'
Expand Down Expand Up @@ -53,5 +55,10 @@ def _map_row(self, row):
'summary_json': self._normalize_field(row, 'summary_json')
}

def get_datasets_for_geographies(self, geographies):
rows = self.client.get_datasets_for_geographies(geographies)
normalized_data = [self._get_entity_class()(self._map_row(row)) for row in rows]
return CatalogList(normalized_data)


_REPO = DatasetRepository()
12 changes: 12 additions & 0 deletions cartoframes/data/observatory/repository/geography_repo.py
@@ -1,5 +1,9 @@
from __future__ import absolute_import

import geopandas as gpd
from cartoframes.auth import Credentials

from cartoframes.data import Dataset
from .constants import COUNTRY_FILTER, CATEGORY_FILTER
from .entity_repo import EntityRepository

Expand All @@ -8,6 +12,8 @@
_GEOGRAPHY_SLUG_FIELD = 'slug'
_ALLOWED_FILTERS = [COUNTRY_FILTER, CATEGORY_FILTER]

_DO_CREDENTIALS = Credentials('do-metadata', 'default_public')


def get_geography_repo():
return _REPO
Expand Down Expand Up @@ -51,5 +57,11 @@ def _map_row(self, row):
'summary_json': self._normalize_field(row, 'summary_json')
}

def get_geographies_gdf(self):
query = 'select id, geom_coverage as the_geom from geographies_public where geom_coverage is not null'
df = Dataset(query, credentials=_DO_CREDENTIALS).download(decode_geom=True)

return gpd.GeoDataFrame(df, geometry='geometry')


_REPO = GeographyRepository()
3 changes: 2 additions & 1 deletion cartoframes/data/observatory/summary.py
@@ -1,6 +1,5 @@
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

from shapely import wkb

Expand Down Expand Up @@ -73,6 +72,8 @@ def top_values(data):
if not data:
return

import matplotlib.pyplot as plt

top_values = pd.DataFrame(data['top_values'])

position = list(reversed(range(top_values.shape[0])))
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Expand Up @@ -27,7 +27,8 @@ def walk_subpkg(name):
'unidecode>=1.1.0,<2.0',
'pyarrow>=0.14.1,<1.0',
'google-cloud-bigquery>=1.19.0,<2.0',
'geojson>=2.5.0,<3.0'
'geojson>=2.5.0,<3.0',
# 'Rtree>=0.8.3,<1.0'
]

PACKAGE_DATA = {
Expand Down

0 comments on commit a66a09f

Please sign in to comment.