Skip to content

Commit

Permalink
Merge pull request #1718 from CartoDB/develop
Browse files Browse the repository at this point in the history
Release/1.1.1
  • Loading branch information
simon-contreras-deel committed Feb 15, 2021
2 parents a5f98f5 + a8578c5 commit 5f49363
Show file tree
Hide file tree
Showing 39 changed files with 32,896 additions and 12,081 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [1.1.1] - 2021-02-12

### Fixed
- Fix geocoding status columns when using cached parameter (#1717)

## [1.1.0] - 2020-12-04

### Added
Expand Down
8 changes: 4 additions & 4 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ CARTOframes

.. image:: https://travis-ci.org/CartoDB/cartoframes.svg?branch=develop
:target: https://travis-ci.org/CartoDB/cartoframes
.. image:: https://img.shields.io/badge/pypi-v1.1.0-orange
:target: https://pypi.org/project/cartoframes/1.1.0
.. image:: https://img.shields.io/badge/pypi-v1.1.1-orange
:target: https://pypi.org/project/cartoframes/1.1.1

A Python package for integrating `CARTO <https://carto.com/>`__ maps, analysis, and data services into data science workflows.

Expand All @@ -14,11 +14,11 @@ Python data analysis workflows often rely on the de facto standards `pandas <htt
Try it Out
==========

* Stable (1.1.0): |stable|
* Stable (1.1.1): |stable|
* Latest (develop branch): |develop|

.. |stable| image:: https://mybinder.org/badge_logo.svg
:target: https://mybinder.org/v2/gh/cartodb/cartoframes/v1.1.0?filepath=examples
:target: https://mybinder.org/v2/gh/cartodb/cartoframes/v1.1.1?filepath=examples

.. |develop| image:: https://mybinder.org/badge_logo.svg
:target: https://mybinder.org/v2/gh/cartodb/cartoframes/develop?filepath=examples
Expand Down
2 changes: 1 addition & 1 deletion binder/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cartoframes==1.1.0
cartoframes==1.1.1
# Additional dependencies from examples
matplotlib
dask
Expand Down
2 changes: 1 addition & 1 deletion cartoframes/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.1.0'
__version__ = '1.1.1'
30 changes: 21 additions & 9 deletions cartoframes/data/services/geocoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def geocode(self, source, street,
if not table_name:
raise ValueError('There is no "table_name" to cache the data')
return self._cached_geocode(source, table_name, street, city=city, state=state, country=country,
dry_run=dry_run)
dry_run=dry_run, status=status)

city, state, country = [
geocoding_utils.column_or_value_arg(arg, self.columns) for arg in [city, state, country]
Expand All @@ -201,14 +201,15 @@ def geocode(self, source, street,

return result

def _cached_geocode(self, source, table_name, street, city, state, country, dry_run):
def _cached_geocode(self, source, table_name, street, city, state, country, status, dry_run):
"""Geocode a dataframe caching results into a table.
If the same dataframe if geocoded repeatedly no credits will be spent.
But note there is a time overhead related to uploading the dataframe to a
temporary table for checking for changes.
"""
has_cache = has_table(table_name, self._credentials)
cache_columns = []

if has_cache:
cache_source_manager = SourceManager(table_name, self._credentials)
Expand All @@ -218,7 +219,7 @@ def _cached_geocode(self, source, table_name, street, city, state, country, dry_

if geocoding_constants.HASH_COLUMN in self.columns or not has_cache:
return self.geocode(
source, street=street, city=city, state=state,
source, street=street, city=city, state=state, status=status,
country=country, table_name=table_name, dry_run=dry_run, if_exists='replace')

tmp_table_name = self._new_temporary_table_name()
Expand All @@ -227,22 +228,33 @@ def _cached_geocode(self, source, table_name, street, city, state, country, dry_

to_carto(source, tmp_table_name, self._credentials, log_enabled=False)

self._execute_query(
"""
ALTER TABLE {tmp_table} ADD COLUMN IF NOT EXISTS {hash} text
""".format(tmp_table=tmp_table_name, hash=geocoding_constants.HASH_COLUMN))
_, status_columns = geocoding_utils.status_assignment_columns(status)
add_columns = [c for c in status_columns if c[0] in cache_columns]
add_columns += [(geocoding_constants.HASH_COLUMN, 'text')]

log.debug("Adding columns %s if needed", ', '.join([c[0] for c in add_columns]))
alter_sql = "ALTER TABLE {tmp_table} {add_columns};".format(
tmp_table=tmp_table_name,
add_columns=','.join([
'ADD COLUMN IF NOT EXISTS {} {}'.format(name, type) for name, type in add_columns]))
self._execute_query(alter_sql)

hcity, hstate, hcountry = [
geocoding_utils.column_or_value_arg(arg, self.columns) for arg in [city, state, country]
]

hash_expr = geocoding_utils.hash_expr(street, hcity, hstate, hcountry, table_prefix=tmp_table_name)
columns_to_update = [c[0] for c in add_columns]
columns_to_update.append('the_geom')
columns_expr = ','.join(["""{c} = {t}.{c} """.format(t=table_name, c=c) for c in columns_to_update])
self._execute_query(
"""
UPDATE {tmp_table} SET {hash}={table}.{hash}, the_geom={table}.the_geom
UPDATE {tmp_table}
SET {columns_to_update}
FROM {table} WHERE {hash_expr}={table}.{hash}
""".format(
tmp_table=tmp_table_name,
columns_to_update=columns_expr,
table=table_name,
hash=geocoding_constants.HASH_COLUMN,
hash_expr=hash_expr
Expand All @@ -260,7 +272,7 @@ def _cached_geocode(self, source, table_name, street, city, state, country, dry_
# TODO: refactor to share code with geocode() and call self._geocode() here instead
# actually to keep hashing knowledge encapsulated (AFW) this should be handled by
# _geocode using an additional parameter for an input table
gdf, metadata = self.geocode(table_name, street=street, city=city,
gdf, metadata = self.geocode(table_name, street=street, city=city, status=status,
state=state, country=country, dry_run=dry_run)
return self.result(data=gdf, metadata=metadata)

Expand Down
3 changes: 3 additions & 0 deletions docs/RELEASING.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ Then, a docs branch must be created. This is used by the Developer Center, and i
```
git checkout -b docs/vM.m.u
git push origin docs/vM.m.u
git checkout -b docs-vM.m.u
git push origin docs-vM.m.u
```

## The developer center "party" (production)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"\n",
"In this notebook, we will build a dashboard combining different data from CARTO's Data Observatory to help identify the locations with specific characteristics described below.\n",
"\n",
"**Note** this use case leverages premium datasets from [CARTO Data Observatory](https://carto.com/spatial-data-catalog/).\n",
"**Note:** This use case leverages premium datasets from [CARTO's Data Observatory](https://carto.com/spatial-data-catalog/).\n",
"\n",
"\n",
"### Use case description\n",
Expand Down Expand Up @@ -140,7 +140,7 @@
"source": [
"We can get the pharmacies from [Pitney Bowes' Consumer Points of Interest](https://carto.com/spatial-data-catalog/browser/dataset/pb_consumer_po_62cddc04/) dataset. This is a premium dataset, so we first need to check that we are subscribed to it.\n",
"\n",
"Take a look at <a href='#example-access-premium-data' target='_blank'>this template</a> for more details on how to access and download a premium dataset."
"Take a look at <a href='#example-access-premium-data-from-the-data-observatory' target='_blank'>this template</a> for more details on how to access and download a premium dataset."
]
},
{
Expand Down Expand Up @@ -10900,7 +10900,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
"version": "3.7.3"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"id": "IkfOIflwHsJN"
},
"source": [
"## Combining two datasets. Using geosocial media data to understand retail performance\n",
"## Combining geosocial and financial data to understand retail performance\n",
"\n",
"Geosocial data is location-based social media data that can be interpreted and analyzed as part of any location-oriented business decision. \n",
"\n",
Expand Down Expand Up @@ -105,7 +105,7 @@
"\n",
"In this section, we'll download the two datasets we're interested in and combine them into a single dataframe.\n",
"\n",
"For more information on how to access Data Observatory datasets using CARTOframes visit the [Guides](https://carto.com/developers/cartoframes/guides/) or take a look at the <a href='https://carto.com/developers/cartoframes/examples/#example-access-premium-data' target='_blank'>Access Premium Data</a> template."
"For more information on how to access Data Observatory datasets using CARTOframes visit the [Guides](https://carto.com/developers/cartoframes/guides/) or take a look at the <a href='https://carto.com/developers/cartoframes/examples/#example-access-premium-data-from-the-data-observatory' target='_blank'>Access Premium Data</a> template."
]
},
{
Expand Down
Loading

0 comments on commit 5f49363

Please sign in to comment.