Skip to content

Commit

Permalink
Merge pull request #766 from CartoDB/hotfix/0.10.1
Browse files Browse the repository at this point in the history
Hotfix 0.10.1
  • Loading branch information
Andy Eschbacher committed Jun 12, 2019
2 parents 42f4121 + 0beb9ee commit d57134f
Show file tree
Hide file tree
Showing 7 changed files with 92 additions and 16 deletions.
11 changes: 11 additions & 0 deletions NEWS.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
Changelog
=========

0.10.1
------

Release 2019-06-12

Updates

- Fix schema not always properly set in write operations (#734)
- Fix error in Dataset.upload related with array data (#754)
- Fix Dataset.download error when reading boolean column with nulls (#732)

0.10.0
------

Expand Down
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ CARTOframes
.. image:: https://coveralls.io/repos/github/CartoDB/cartoframes/badge.svg?branch=master
:target: https://coveralls.io/github/CartoDB/cartoframes?branch=master
.. image:: https://mybinder.org/badge_logo.svg
:target: https://mybinder.org/v2/gh/cartodb/cartoframes/v0.9.2?filepath=examples
:target: https://mybinder.org/v2/gh/cartodb/cartoframes/v0.10.1?filepath=examples

A Python package for integrating `CARTO <https://carto.com/>`__ maps, analysis, and data services into data science workflows.

Expand Down Expand Up @@ -86,7 +86,7 @@ Then create a new notebook and try the example code snippets below with tables t
Using `pipenv`
^^^^^^^^^^^^^^

Alternatively, `pipenv <https://pipenv.readthedocs.io/en/latest/>`__ provides an easy way to manage virtual environments. The steps below are:
Alternatively, `pipenv <https://pipenv.readthedocs.io/en/latest/>`__ provides an easy way to manage virtual environments. The steps below are:

1. Create a virtual environment with Python 3.4+ (recommended instead of Python 2.7)
2. Install cartoframes and Jupyter (optional) into the virtual environment
Expand Down
2 changes: 1 addition & 1 deletion cartoframes/__version__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""cartoframes version number"""
__title__ = 'cartoframes'
__description__ = 'CARTO Python package for data scientists'
__version__ = '0.10.0'
__version__ = '0.10.1'
__url__ = 'https://github.com/CartoDB/cartoframes'
__author__ = 'Andy Eschbacher'
__email__ = 'andy@carto.com'
Expand Down
9 changes: 7 additions & 2 deletions cartoframes/columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,16 +128,21 @@ def normalize_name(column_name):
return normalize_names([column_name])[0]


def dtypes(columns, exclude_dates=False, exclude_the_geom=False):
def dtypes(columns, exclude_dates=False, exclude_the_geom=False, exclude_bools=False):
return {x.name: x.dtype if not x.name == 'cartodb_id' else 'int64'
for x in columns if not (exclude_dates is True and x.dtype in Column.DATETIME_DTYPES)
and not(exclude_the_geom is True and x.name in Column.SUPPORTED_GEOM_COL_NAMES)}
and not(exclude_the_geom is True and x.name in Column.SUPPORTED_GEOM_COL_NAMES)
and not(exclude_bools is True and x.dtype == 'bool')}


def date_columns_names(columns):
return [x.name for x in columns if x.dtype in Column.DATETIME_DTYPES]


def bool_columns_names(columns):
return [x.name for x in columns if x.dtype == 'bool']


def pg2dtypes(pgtype):
"""Returns equivalent dtype for input `pgtype`."""
mapping = {
Expand Down
22 changes: 19 additions & 3 deletions cartoframes/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
get_map_template, top_basemap_layer_url)
from .analysis import Table
from .__version__ import __version__
from .columns import dtypes, date_columns_names
from .columns import dtypes, date_columns_names, bool_columns_names
from .dataset import Dataset, recursive_read, _decode_geom, get_columns

if sys.version_info >= (3, 0):
Expand Down Expand Up @@ -516,15 +516,20 @@ def fetch(self, query, decode_geom=False):
result = recursive_read(self, copy_query)

query_columns = get_columns(self, query)
df_types = dtypes(query_columns, exclude_dates=True, exclude_the_geom=True)
df_types = dtypes(query_columns, exclude_dates=True, exclude_the_geom=True, exclude_bools=True)
date_column_names = date_columns_names(query_columns)
bool_column_names = bool_columns_names(query_columns)

converters = {'the_geom': lambda x: _decode_geom(x) if decode_geom else x}
for bool_column_name in bool_column_names:
converters[bool_column_name] = lambda x: _convert_bool(x)

df = pd.read_csv(result, dtype=df_types,
parse_dates=date_column_names,
true_values=['t'],
false_values=['f'],
index_col='cartodb_id' if 'cartodb_id' in df_types else False,
converters={'the_geom': lambda x: _decode_geom(x) if decode_geom else x})
converters=converters)

if decode_geom:
df.rename({'the_geom': 'geometry'}, axis='columns', inplace=True)
Expand Down Expand Up @@ -1759,3 +1764,14 @@ def _debug_print(self, **kwargs):
str_value[-50:])
print('{key}: {value}'.format(key=key,
value=str_value))


def _convert_bool(x):
if x:
if x == 't':
return True
if x == 'f':
return False
return bool(x)
else:
return None
26 changes: 18 additions & 8 deletions cartoframes/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,11 @@ def from_geojson(cls, geojson):
def upload(self, with_lnglat=None, if_exists=FAIL, table_name=None, schema=None, context=None):
if table_name:
self.table_name = normalize_name(table_name)
if schema:
self.schema = schema
if context:
self.cc = context
self.schema = context.get_default_schema()
if schema:
self.schema = schema

if self.table_name is None or self.cc is None:
raise ValueError('You should provide a table_name and context to upload data.')
Expand Down Expand Up @@ -175,7 +176,7 @@ def _validate_init(self):

def _cartodbfy_query(self):
return "SELECT CDB_CartodbfyTable('{schema}', '{table_name}')" \
.format(schema=self.schema or self.cc.get_default_schema(), table_name=self.table_name)
.format(schema=self.schema or self._get_schema(), table_name=self.table_name)

def _copyfrom(self, with_lnglat=None):
geom_col = _get_geom_col_name(self.df)
Expand All @@ -197,7 +198,7 @@ def _rows(self, df, cols, with_lnglat, geom_col):
if with_lnglat and col in Column.SUPPORTED_GEOM_COL_NAMES:
continue
val = row[col]
if pd.isnull(val) or val is None:
if self._is_null(val):
val = ''
if with_lnglat:
if col == with_lnglat[0]:
Expand All @@ -219,6 +220,13 @@ def _rows(self, df, cols, with_lnglat, geom_col):
csv_row += '\n'
yield csv_row.encode()

def _is_null(self, val):
vnull = pd.isnull(val)
if isinstance(vnull, bool):
return vnull
else:
return vnull.all()

def _drop_table_query(self, if_exists=True):
return '''DROP TABLE {if_exists} {table_name}'''.format(
table_name=self.table_name,
Expand Down Expand Up @@ -348,8 +356,8 @@ def _map_geom_type(self, geom_type):
def _get_schema(self):
if self.cc:
return self.cc.get_default_schema()
else:
return 'public'

return None


def recursive_read(context, query, retry_times=Dataset.DEFAULT_RETRY_TIMES):
Expand Down Expand Up @@ -380,8 +388,10 @@ def get_query(dataset):


def _default_query(dataset):
if dataset.table_name and dataset.schema:
return 'SELECT * FROM "{0}"."{1}"'.format(dataset.schema, dataset.table_name)
if dataset.table_name:
return 'SELECT * FROM "{schema}"."{table}"'.format(
schema=dataset.schema or dataset._get_schema() or 'public',
table=dataset.table_name)


def _save_index_as_column(df):
Expand Down
34 changes: 34 additions & 0 deletions test/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import sys
import json
import warnings
import pandas as pd

from carto.exceptions import CartoException

Expand Down Expand Up @@ -265,6 +266,21 @@ def test_dataset_download_and_upload(self):
dataset.download()
dataset.upload(table_name=self.test_write_table, if_exists=Dataset.REPLACE)

def test_dataset_download_bool_null(self):
self.assertNotExistsTable(self.test_write_table)

query = 'SELECT * FROM (values (true, true), (false, false), (false, null)) as x(fakec_bool, fakec_bool_null)'
dataset = Dataset.from_query(query=query, context=self.cc)
dataset.upload(table_name=self.test_write_table)

dataset = Dataset.from_table(table_name=self.test_write_table, context=self.cc)
df = dataset.download()

self.assertEqual(df['fakec_bool'].dtype, 'bool')
self.assertEqual(df['fakec_bool_null'].dtype, 'object')
self.assertEqual(list(df['fakec_bool']), [True, False, False])
self.assertEqual(list(df['fakec_bool_null']), [True, False, None])

@unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping this test')
def test_dataset_write_points_dataset(self):
self.assertNotExistsTable(self.test_write_table)
Expand Down Expand Up @@ -511,3 +527,21 @@ def assertNotExistsTable(self, table_name):
'''.format(table=table_name))
except CartoException as e:
self.assertTrue('relation "{}" does not exist'.format(table_name) in str(e))


class TestDatasetUnit(unittest.TestCase, _UserUrlLoader):
"""Unit tests for cartoframes.Dataset"""

def test_rows(self):
df = pd.DataFrame.from_dict({'test': [True, [1, 2]]})
ds = Dataset.from_dataframe(df)
rows = ds._rows(ds.df, ['test'], None, '')

self.assertEqual(list(rows), [b'True|\n', b'[1, 2]|\n'])

def test_rows_null(self):
df = pd.DataFrame.from_dict({'test': [None, [None, None]]})
ds = Dataset.from_dataframe(df)
rows = ds._rows(ds.df, ['test'], None, '')

self.assertEqual(list(rows), [b'|\n', b'|\n'])

0 comments on commit d57134f

Please sign in to comment.