Skip to content

Commit

Permalink
Merge 0317005 into 19f8fa3
Browse files Browse the repository at this point in the history
  • Loading branch information
michellemho committed Sep 20, 2017
2 parents 19f8fa3 + 0317005 commit b25293d
Show file tree
Hide file tree
Showing 3 changed files with 173 additions and 15 deletions.
3 changes: 2 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
language: python
env:
- MPLBACKEND='agg'
-
- GEOPANDAS='True'; MPLBACKEND='agg'
python:
- '2.7'
- '3.4'
Expand All @@ -11,6 +11,7 @@ install:
- pip install . # install package
- pip install -r requirements.txt # install requires
- if [[ -n $MPLBACKEND ]]; then pip install matplotlib; fi
- if [[ -n $GEOPANDAS ]]; then pip install geopandas; fi
- pip install shapely coveralls # only for testing
script:
- nosetests --verbose --with-coverage --cover-package=cartoframes
Expand Down
53 changes: 43 additions & 10 deletions cartoframes/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@
mpi = None
plt = None
HAS_MATPLOTLIB = plt is not None
try:
import geopandas
from shapely.geometry import Point
except ImportError:
HAS_GEOPANDAS = False
else:
HAS_GEOPANDAS = True

# Choose constant to avoid overview generation which are triggered at a
# half million rows
Expand Down Expand Up @@ -115,6 +122,11 @@ def read(self, table_name, limit=None, index='cartodb_id',
limit (int, optional): Read only ``limit`` lines from
``table_name``. Defaults to `None`, which reads the full table.
index (str, optional): Not currently in use.
decode_geom (bool, optional): Defaults to `False`, which reads the
table into a pandas DataFrame as is. If `True`, reads table into
a pandas DataFrame with wkb geometries found in column
`the_geom` decoded as shapely geometries in new column named
`geometry`.
Returns:
pandas.DataFrame: DataFrame representation of `table_name` from
Expand Down Expand Up @@ -152,21 +164,29 @@ def write(self, df, table_name, temp_dir='/tmp', overwrite=False,
CARTO's `Import API
<https://carto.com/docs/carto-engine/import-api/standard-tables>`__
for more information.
encode_geom (bool, optional): Whether to write `geom_col` to CARTO
as `the_geom`.
encode_geom (bool, optional): Whether to encode `geom_col` into
hex-encoded wkb and add as new column "the_geom" to
to CARTO.
geom_col (str, optional): The name of the column where geometry
information is stored. Used in conjunction with `encode_geom`.
Returns:
None
"""
if encode_geom:
_add_encoded_geom(df, geom_col)

pgcolnames = normalize_colnames(df.columns)

if not overwrite:
# error if table exists and user does not want to overwrite
self._table_exists(table_name)
pgcolnames = normalize_colnames(df.columns)

if encode_geom:
if not HAS_GEOPANDAS:
raise RuntimeError('geopandas and shapely needs to be installed to use this option')
geom_col = _add_encoded_geom(df, geom_col)
pgcolnames.append('the_geom')
pgcolnames.remove(geom_col)

if df.shape[0] > MAX_IMPORT_ROWS:
# NOTE: schema is set using different method than in _set_schema
final_table_name = self._send_batches(df, table_name, temp_dir,
Expand All @@ -192,10 +212,13 @@ def write(self, df, table_name, temp_dir='/tmp', overwrite=False,
lat=lnglat[1]))

tqdm.write('Table successfully written to CARTO: '
'{base_url}dataset/{table_name}'.format(
'{base_url}/dataset/{table_name}'.format(
base_url=self.creds.base_url(),
table_name=final_table_name))

# If geometries were encoded, drop the_geom column in dataframe
df.drop('the_geom', axis=1, errors='ignore', inplace=True)

def delete(self, table_name):
"""Delete a table in user's CARTO account.
Expand Down Expand Up @@ -322,6 +345,7 @@ def _send_dataframe(self, df, table_name, temp_dir, geom_col, pgcolnames):
temp_dir (str): Name of directory used for temporarily storing the
DataFrame file to sent to CARTO
geom_col (str): Name of geometry column
pgcolnames (list of str): List of SQL-normalized column names
Returns:
final_table_name (str): Name of final table. This method will
Expand Down Expand Up @@ -468,11 +492,14 @@ def query(self, query, table_name=None, decode_geom=False):
table_name (str, optional): If set, this will create a new
table in the user's CARTO account that is the result of the
query. Defaults to None (no table created).
decode_geom (bool, optional): Defaults to `False`, which does not
decode geometries. If set to `True`, this will decode wkb
geometries into shapely geometries.
Returns:
pandas.DataFrame: DataFrame representation of query supplied.
Pandas data types are inferred from PostgreSQL data types.
In the case of PostgreSQL date types, the data type 'object' is
used.
In the case of invalid PostgreSQL date types, the data type 'object'
is used.
"""
self._debug_print(query=query)
if table_name:
Expand Down Expand Up @@ -993,7 +1020,7 @@ def _add_encoded_geom(df, geom_col):
raise KeyError('Geometries were requested to be encoded '
'but a geometry column was not found in the '
'DataFrame.'.format(geom_col=geom_col))
elif is_geopandas and geom_col:
elif is_geopandas and geom_col and (is_geopandas != geom_col):
warn('Geometry column of the input DataFrame does not '
'match the geometry column supplied. Using user-supplied '
'column...\n'
Expand All @@ -1002,9 +1029,15 @@ def _add_encoded_geom(df, geom_col):
geom_col))
elif is_geopandas and geom_col is None:
geom_col = is_geopandas

# check that geodataframe CRS is 4326
if is_geopandas and df.crs != {'init':'epsg:4326'}:
warn('Geodataframe does not have an assigned coordinate system. '
'Assign WGS84 latitude-longitude projection to '
'your geodataframe `crs` to ensure proper projection')
# updates in place
df['the_geom'] = df[geom_col].apply(_encode_geom)
return None
return geom_col


def _encode_decode_decorator(func):
Expand Down
132 changes: 128 additions & 4 deletions test/test_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def setUp(self):
'table_{ver}_{mpl}'.format(
ver=pyver,
mpl=has_mpl))

self.test_delete_table = 'cartoframes_test_delete_table_{ver}_{mpl}'.format(
ver=pyver,
mpl=has_mpl)
Expand All @@ -79,13 +80,16 @@ def tearDown(self):
"""restore to original state"""
tables = (self.test_write_table,
self.test_write_batch_table,
self.test_query_table)
self.test_query_table,
self.test_delete_table)

if self.apikey and self.baseurl:
cc = cartoframes.CartoContext(base_url=self.baseurl,
api_key=self.apikey)
for table in tables:
cc.delete(table)
self.sql_client.send('''
DROP TABLE IF EXISTS "{}"
'''.format(table))
# TODO: remove the named map templates

def add_map_template(self):
Expand Down Expand Up @@ -173,6 +177,7 @@ def test_cartocontext_write(self):
'lat': float,
'long': float}
df = pd.DataFrame(data).astype(schema)

cc.write(df, self.test_write_table)

# check if table exists
Expand All @@ -182,7 +187,6 @@ def test_cartocontext_write(self):
LIMIT 0
'''.format(table=self.test_write_table))
self.assertTrue(resp is not None)

# check that table has same number of rows
resp = self.sql_client.send('''
SELECT count(*)
Expand All @@ -197,11 +201,11 @@ def test_cartocontext_write(self):
cc.write(df, self.test_write_table,
overwrite=True,
lnglat=('long', 'lat'))

resp = self.sql_client.send('''
SELECT count(*) AS num_rows, count(the_geom) AS num_geoms
FROM {table}
'''.format(table=self.test_write_table))

# number of geoms should equal number of rows
self.assertEqual(resp['rows'][0]['num_rows'],
resp['rows'][0]['num_geoms'])
Expand Down Expand Up @@ -654,3 +658,123 @@ def test_pg2dtypes(self):
for i in results:
result = _pg2dtypes(i)
self.assertEqual (result, results[i])

def test_cartocontext_write_geopandas(self):
"""CartoContext.write__with__geopandas"""
try:
import geopandas as gpd
from shapely.geometry import Point
except ImportError:
HAS_GEOPANDAS = False
else:
HAS_GEOPANDAS = True

cc = cartoframes.CartoContext(base_url=self.baseurl,
api_key=self.apikey)
data = {'nums': list(range(100, 0, -1)),
'category': [random.choice('abcdefghijklmnop')
for _ in range(100)],
'lat': [0.01 * i for i in range(100)],
'long': [-0.01 * i for i in range(100)]}
schema = {'nums': int,
'category': 'object',
'lat': float,
'long': float}
df = pd.DataFrame(data).astype(schema)

if HAS_GEOPANDAS:
# Create a geodataframe
geometry = [Point(xy) for xy in zip(df.long, df.lat)]
df['lat_long'] = geometry
geo_df = gpd.GeoDataFrame(df, geometry='lat_long',crs={'init':'epsg:4326'})

# try writing geodataframe with encoding and geom_col specified
cc.write(geo_df, self.test_write_table, overwrite=True,
encode_geom=True, geom_col='lat_long')
resp = self.sql_client.send('''
SELECT count(*) AS num_rows, count(the_geom) AS num_geoms
FROM {table}
'''.format(table=self.test_write_table))

# number of geoms should equal number of rows
self.assertEqual(resp['rows'][0]['num_rows'],
resp['rows'][0]['num_geoms'])

# try writing geodataframe without encoding
cc.write(geo_df, self.test_write_table, overwrite=True)
resp = self.sql_client.send('''
SELECT count(*) AS num_rows, count(the_geom) AS num_geoms
FROM {table}
'''.format(table=self.test_write_table))
# number of geoms should zero
self.assertEqual(resp['rows'][0]['num_geoms'], 0)

# test writing geodataframe with multiple geometry columns, specifying
# geom_col different from geometry of geodataframe
null_islands = [0 for i in range(100)]
null_island_points = [Point(xy) for xy in zip(null_islands, null_islands)]
geo_df['null_islands'] = null_island_points
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
# Trigger warning
cc.write(geo_df, self.test_write_table, overwrite=True,
encode_geom=True, geom_col='null_islands')
assert len(w) == 1
assert issubclass(w[-1].category, UserWarning)
assert "user-supplied" in str(w[-1].message)

# test writing geodataframe with multiple geometry columns, without
# specifying geom_col
cc.write(geo_df, self.test_write_table, overwrite=True,
encode_geom=True)
is_geopandas = getattr(geo_df, '_geometry_column_name', None)
resp = self.sql_client.send('''
SELECT the_geom
FROM {table}
LIMIT 1
'''.format(table=self.test_write_table))
self.assertEqual(cartoframes.context._decode_geom(resp['rows'][0]['the_geom']),
(geo_df.iloc[0][is_geopandas]))

# test encoding geometry AND specifying lnglat pair
# lnglat pair will override encoded geometry as "the_geom" in CARTO
cc.write(geo_df, self.test_write_table, overwrite=True,
lnglat=('long', 'lat'), encode_geom=True, geom_col='null_islands')
is_geopandas = getattr(geo_df, '_geometry_column_name', None)
resp = self.sql_client.send('''
SELECT the_geom
FROM {table}
LIMIT 1
'''.format(table=self.test_write_table))
self.assertEqual(cartoframes.context._decode_geom(resp['rows'][0]['the_geom']),
(geo_df.iloc[0][is_geopandas]))

# try encoding geometries without a geometry column or geodataframe
with self.assertRaisesRegexp(KeyError, 'Geometries were requested'):
cc.write(df, self.test_write_table, overwrite=True,
encode_geom=True)

# try writing encoded geometries with a non-geometry 'geometry' column
with self.assertRaises(AttributeError):
df['geometry'] = df['nums']
cc.write(df, self.test_write_table, overwrite=True,
encode_geom=True)

# test writing geodataframe with different coordinate reference system
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
world_mercator = world.to_crs({'init': 'epsg:3395'})

with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
# Trigger warning
cc.write(world_mercator, self.test_write_table, overwrite=True,
encode_geom=True)
assert len(w) == 1
assert issubclass(w[-1].category, UserWarning)
assert "projection" in str(w[-1].message)

# try to encode_geom without importing geopandas
elif not HAS_GEOPANDAS:
with self.assertRaisesRegexp(RuntimeError, 'geopandas and shapely'):
cc.write(df, self.test_write_table, overwrite=True,
encode_geom=True)

0 comments on commit b25293d

Please sign in to comment.