Skip to content

Commit

Permalink
Merge 6035330 into 3df5419
Browse files Browse the repository at this point in the history
  • Loading branch information
Jesus89 authored Jun 28, 2019
2 parents 3df5419 + 6035330 commit 484f616
Show file tree
Hide file tree
Showing 6 changed files with 169 additions and 60 deletions.
4 changes: 2 additions & 2 deletions cartoframes/auth/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from ..__version__ import __version__
from ..columns import dtypes, date_columns_names, bool_columns_names
from ..data import Dataset
from ..data.utils import decode_geometry, recursive_read, get_columns
from ..data.utils import decode_geometry, ENC_WKB_HEX, recursive_read, get_columns

if sys.version_info >= (3, 0):
from urllib.parse import urlparse, urlencode
Expand Down Expand Up @@ -521,7 +521,7 @@ def fetch(self, query, decode_geom=False):
date_column_names = date_columns_names(query_columns)
bool_column_names = bool_columns_names(query_columns)

converters = {'the_geom': lambda x: decode_geometry(x) if decode_geom else x}
converters = {'the_geom': lambda x: decode_geometry(x, ENC_WKB_HEX) if decode_geom else x}
for bool_column_name in bool_column_names:
converters[bool_column_name] = lambda x: _convert_bool(x)

Expand Down
16 changes: 10 additions & 6 deletions cartoframes/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@

from carto.exceptions import CartoException

from .utils import decode_geometry, compute_query, compute_geodataframe, get_columns, DEFAULT_RETRY_TIMES, \
get_public_context
from .utils import decode_geometry, detect_encoding_type, compute_query, compute_geodataframe, \
get_columns, get_public_context, DEFAULT_RETRY_TIMES
from .dataset_info import DatasetInfo
from ..columns import Column, normalize_names, normalize_name
from ..geojson import load_geojson
Expand Down Expand Up @@ -578,7 +578,8 @@ def _rows(self, df, cols, with_lnglat, geom_col):
csv_row += '{val}|'.format(val=val)

if the_geom_val is not None:
geom = decode_geometry(the_geom_val)
enc_type = detect_encoding_type(the_geom_val)
geom = decode_geometry(the_geom_val, enc_type)
if geom:
csv_row += 'SRID=4326;{geom}'.format(geom=geom.wkt)
if with_lnglat is not None and lng_val is not None and lat_val is not None:
Expand Down Expand Up @@ -806,9 +807,12 @@ def _get_geom_col_name(df):
def _get_geom_col_type(df):
geom_col = _get_geom_col_name(df)
if geom_col is not None:
geom = decode_geometry(_first_value(df[geom_col]))
if geom is not None:
return geom.geom_type
first_geom = _first_value(df[geom_col])
if first_geom:
enc_type = detect_encoding_type(first_geom)
geom = decode_geometry(first_geom, enc_type)
if geom is not None:
return geom.geom_type


def _first_value(array):
Expand Down
92 changes: 70 additions & 22 deletions cartoframes/data/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
import time
import binascii as ba
from warnings import warn
Expand Down Expand Up @@ -38,6 +39,14 @@
'long'
]

ENC_SHAPELY = 'shapely'
ENC_WKB = 'wkb'
ENC_WKB_HEX = 'wkb-hex'
ENC_WKB_HEX_ASCII = 'wkb-hex-ascii'
ENC_EWKB_HEX_ASCII = 'ewkb-hex-ascii'
ENC_WKT = 'wkt'
ENC_EWKT = 'ewkt'


def compute_query(dataset):
if dataset.table_name:
Expand Down Expand Up @@ -84,7 +93,17 @@ def _warn_new_geometry_column(df):


def _compute_geometry_from_geom(geom):
return geom.apply(decode_geometry)
first_geom = next(item for item in geom if item is not None)
enc_type = detect_encoding_type(first_geom)
return geom.apply(lambda g: decode_geometry(g, enc_type))


def _first_value(array):
array = array.loc[~array.isnull()] # Remove null values
if len(array) > 0:
return array.iloc[0]
else:
warn('Dataset with null geometries')


def _compute_geometry_from_latlng(lat, lng):
Expand All @@ -107,31 +126,60 @@ def wrapper(*args):


@_encode_decode_decorator
def decode_geometry(ewkb):
"""Decode encoded wkb into a shapely geometry"""
# it's already a shapely object
if hasattr(ewkb, 'geom_type'):
return ewkb

def decode_geometry(geom, enc_type):
"""Decode any geometry into a shapely geometry"""
from shapely import wkb
from shapely import wkt
if ewkb:

func = {
ENC_SHAPELY: lambda: geom,
ENC_WKB: lambda: wkb.loads(geom),
ENC_WKB_HEX: lambda: wkb.loads(ba.unhexlify(geom)),
ENC_WKB_HEX_ASCII: lambda: wkb.loads(geom, hex=True),
ENC_EWKB_HEX_ASCII: lambda: wkb.loads(_remove_srid(geom), hex=True),
ENC_WKT: lambda: wkt.loads(geom),
ENC_EWKT: lambda: wkt.loads(_remove_srid(geom))
}.get(enc_type)

return func() if func else geom


def detect_encoding_type(input_geom):
"""
Detect geometry encoding type:
- ENC_WKB: b'\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00H\x93@\x00\x00\x00\x00\x00\x9d\xb6@'
- ENC_WKB_HEX: b'0101000000000000000048934000000000009db640'
- ENC_WKB_HEX_ASCII: '0101000000000000000048934000000000009db640'
- ENC_EWKB_HEX_ASCII: 'SRID=4326;0101000000000000000048934000000000009db640'
- ENC_WKT: 'POINT (1234 5789)'
- ENC_EWKT: 'SRID=4326;POINT (1234 5789)'
"""
from shapely.geometry.base import BaseGeometry

if isinstance(input_geom, BaseGeometry):
return ENC_SHAPELY

if isinstance(input_geom, bytes):
try:
return wkb.loads(ba.unhexlify(ewkb))
ba.unhexlify(input_geom)
return ENC_WKB_HEX
except Exception:
try:
return wkb.loads(ba.unhexlify(ewkb), hex=True)
except Exception:
try:
return wkb.loads(ewkb, hex=True)
except Exception:
try:
return wkb.loads(ewkb)
except Exception:
try:
return wkt.loads(ewkb)
except Exception:
pass
return ENC_WKB

if isinstance(input_geom, str):
result = re.match(r'^SRID=\d+;(.*)$', input_geom)
prefix = 'e' if result else ''
geom = result.group(1) if result else input_geom

if re.match(r'^[0-9a-fA-F]+$', geom):
return prefix + ENC_WKB_HEX_ASCII
elif geom != '':
return prefix + ENC_WKT


def _remove_srid(text):
result = re.match(r'^SRID=\d+;(.*)$', text)
return result.group(1) if result else text


def recursive_read(context, query, retry_times=DEFAULT_RETRY_TIMES):
Expand Down
66 changes: 65 additions & 1 deletion test/data/test_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
from geopandas.geoseries import GeoSeries

from cartoframes.data import Dataset
from cartoframes.data.utils import compute_query, compute_geodataframe
from cartoframes.data.utils import compute_query, compute_geodataframe, \
decode_geometry, detect_encoding_type

from mocks.context_mock import ContextMock

Expand Down Expand Up @@ -116,3 +117,66 @@ def test_compute_geodataframe_only_longitude(self):
ds = Dataset.from_dataframe(pd.DataFrame({'longitude': self.lng}))
with self.assertRaises(ValueError, msg=self.msg):
compute_geodataframe(ds)

def test_detect_encoding_type_shapely(self):
enc_type = detect_encoding_type(Point(1234, 5789))
self.assertEqual(enc_type, 'shapely')

def test_detect_encoding_type_wkb(self):
enc_type = detect_encoding_type(b'\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00H\x93@\x00\x00\x00\x00\x00\x9d\xb6@')
self.assertEqual(enc_type, 'wkb')

def test_detect_encoding_type_wkb_hex(self):
enc_type = detect_encoding_type(b'0101000000000000000048934000000000009db640')
self.assertEqual(enc_type, 'wkb-hex')

def test_detect_encoding_type_wkb_hex_ascii(self):
enc_type = detect_encoding_type('0101000000000000000048934000000000009db640')
self.assertEqual(enc_type, 'wkb-hex-ascii')

def test_detect_encoding_type_ewkb_hex_ascii(self):
enc_type = detect_encoding_type('SRID=4326;0101000000000000000048934000000000009db640')
self.assertEqual(enc_type, 'ewkb-hex-ascii')

def test_detect_encoding_type_wkt(self):
enc_type = detect_encoding_type('POINT (1234 5789)')
self.assertEqual(enc_type, 'wkt')

def test_detect_encoding_type_ewkt(self):
enc_type = detect_encoding_type('SRID=4326;POINT (1234 5789)')
self.assertEqual(enc_type, 'ewkt')

def test_decode_geometry_shapely(self):
geom = decode_geometry(Point(1234, 5789), 'shapely')
expected_geom = Point(1234, 5789)
self.assertEqual(str(geom), str(expected_geom))

def test_decode_geometry_wkb(self):
geom = decode_geometry(b'\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00H\x93@\x00\x00\x00\x00\x00\x9d\xb6@', 'wkb')
expected_geom = Point(1234, 5789)
self.assertEqual(str(geom), str(expected_geom))

def test_decode_geometry_wkb_hex(self):
geom = decode_geometry(b'0101000000000000000048934000000000009db640', 'wkb-hex')
expected_geom = Point(1234, 5789)
self.assertEqual(str(geom), str(expected_geom))

def test_decode_geometry_wkb_hex_ascii(self):
geom = decode_geometry('0101000000000000000048934000000000009db640', 'wkb-hex-ascii')
expected_geom = Point(1234, 5789)
self.assertEqual(str(geom), str(expected_geom))

def test_decode_geometry_ewkb_hex_ascii(self):
geom = decode_geometry('SRID=4326;0101000000000000000048934000000000009db640', 'ewkb-hex-ascii')
expected_geom = Point(1234, 5789)
self.assertEqual(str(geom), str(expected_geom))

def test_decode_geometry_wkt(self):
geom = decode_geometry('POINT (1234 5789)', 'wkt')
expected_geom = Point(1234, 5789)
self.assertEqual(str(geom), str(expected_geom))

def test_decode_geometry_ewkt(self):
geom = decode_geometry('SRID=4326;POINT (1234 5789)', 'ewkt')
expected_geom = Point(1234, 5789)
self.assertEqual(str(geom), str(expected_geom))
9 changes: 1 addition & 8 deletions test/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from cartoframes.auth import Context
from cartoframes.data import Dataset
from cartoframes.data.utils import decode_geometry, setting_value_exception
from cartoframes.data.utils import setting_value_exception
from cartoframes.columns import normalize_name
from cartoframes.geojson import load_geojson
from mocks.dataset_mock import DatasetMock
Expand Down Expand Up @@ -496,13 +496,6 @@ def get_default_schema(self):
dataset = Dataset.from_table(table_name='fake_table', context=FakeContext())
self.assertEqual(dataset._schema, username)

def test_decode_geometry(self):
# Point (0, 0) without SRID
ewkb = '010100000000000000000000000000000000000000'
decoded_geom = decode_geometry(ewkb)
self.assertEqual(decoded_geom.wkt, 'POINT (0 0)')
self.assertIsNone(decode_geometry(None))

# FIXME does not work in python 2.7 (COPY stucks and blocks the table, fix after
# https://github.com/CartoDB/CartoDB-SQL-API/issues/579 is fixed)
# @unittest.skipIf(WILL_SKIP, 'no carto credentials, skipping this test')
Expand Down
42 changes: 21 additions & 21 deletions test/viz/test_widget_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ def test_widget_list_init_with_a_list_of_widgets(self):
"""WidgetList should be properly initialized"""

widget_list = WidgetList([
Widget(widget_a),
Widget(widget_b)
Widget(widget_a),
Widget(widget_b)
])

self.assertTrue(isinstance(widget_list.widgets[0], Widget))
Expand All @@ -74,8 +74,8 @@ def test_widget_list_init_with_a_list_of_widgets(self):
def test_widget_list_variables(self):
"""Widget List should return a proper variables object"""
widget_list = WidgetList([
Widget(widget_a),
Widget(widget_b)
Widget(widget_a),
Widget(widget_b)
])

variables = widget_list.get_variables()
Expand All @@ -94,24 +94,24 @@ def test_widget_list_get_widgets_info(self):
"""Widget List should return a proper widgets info object"""

widget_list = WidgetList([
Widget(widget_a),
Widget(widget_b)
Widget(widget_a),
Widget(widget_b)
])

widgets_info = widget_list.get_widgets_info()
self.assertEqual(widgets_info, [
{
'type': 'formula',
'name': 'vb6dbcf',
'value': 'viewportSum($amount)',
'title': '[TITLE]',
'description': '[description]',
'footer': '[footer]'
}, {
'type': 'default',
'name': 'v76441e',
'value': '"Custom Info"',
'title': '',
'description': '',
'footer': ''
}])
{
'type': 'formula',
'name': 'vb6dbcf',
'value': 'viewportSum($amount)',
'title': '[TITLE]',
'description': '[description]',
'footer': '[footer]'
}, {
'type': 'default',
'name': 'v76441e',
'value': '"Custom Info"',
'title': '',
'description': '',
'footer': ''
}])

0 comments on commit 484f616

Please sign in to comment.