Skip to content

Commit

Permalink
Merge 3f9d979 into 4806f04
Browse files Browse the repository at this point in the history
  • Loading branch information
jgoizueta committed Oct 10, 2019
2 parents 4806f04 + 3f9d979 commit 3abbb0f
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 38 deletions.
27 changes: 24 additions & 3 deletions cartoframes/data/dataset/registry/dataframe_dataset.py
Expand Up @@ -53,6 +53,8 @@ def download(self, limit, decode_geom, retry_times):
def upload(self, if_exists, with_lnglat):
self._is_ready_for_upload_validation()

self._rename_index_for_upload()

dataframe_columns_info = DataframeColumnsInfo(self._df, with_lnglat)

if if_exists == BaseDataset.IF_EXISTS_REPLACE or not self.exists():
Expand Down Expand Up @@ -120,15 +122,34 @@ def _get_geom_type(self):
if geometry and geometry.geom_type:
return map_geom_type(geometry.geom_type)

def _rename_index_for_upload(self):
if self._df.index.name != 'cartodb_id':
if 'cartodb_id' not in self._df:
if _is_valid_index_for_cartodb_id(self._df.index):
# rename a integer unnamed index to cartodb_id
self._df.index.rename('cartodb_id', inplace=True)
else:
if self._df.index.name is None:
# replace an unnamed index by a cartodb_id column
self._df.set_index('cartodb_id')


def _is_valid_index_for_cartodb_id(index):
return index.name is None and index.nlevels == 1 and index.dtype == 'int' and index.is_unique


def _rows(df, dataframe_columns_info, with_lnglat):
for i, row in df.iterrows():
row_data = []
for c in dataframe_columns_info.columns:
col = c.dataframe
if col not in df.columns: # we could have filtered columns in the df. See DataframeColumnsInfo
continue
val = row[col]
if col not in df.columns:
if col == df.index.name:
val = i
else: # we could have filtered columns in the df. See DataframeColumnsInfo
continue
else:
val = row[col]

if _is_null(val):
val = ''
Expand Down
6 changes: 3 additions & 3 deletions cartoframes/data/services/isolines.py
Expand Up @@ -136,7 +136,6 @@ def _iso_areas(self,
return self.result(data=None, metadata=metadata)

source_columns = source.get_column_names()
source_has_id = 'cartodb_id' in source_columns

temporary_table_name = False

Expand All @@ -149,6 +148,9 @@ def _iso_areas(self,
temporary_table_name = self._new_temporary_table_name()
source.upload(table_name=temporary_table_name, credentials=self._credentials)
source_query = 'SELECT * FROM {table}'.format(table=temporary_table_name)
source_columns = source.get_column_names()

source_has_id = 'cartodb_id' in source_columns

iso_function = '_cdb_{function}_exception_safe'.format(function=function)
# TODO: use **options argument?
Expand Down Expand Up @@ -177,8 +179,6 @@ def _iso_areas(self,
result = result.download()
else:
result = dataset.download()
if not dry_run and not source_has_id:
remove_column_from_dataframe(result, 'cartodb_id')
if input_dataframe is None:
result = Dataset(result, credentials=self._credentials)

Expand Down
9 changes: 7 additions & 2 deletions cartoframes/utils/columns.py
Expand Up @@ -137,12 +137,17 @@ def __init__(self, df, with_lnglat=None):
self.columns = self._get_columns_info()

def _get_columns_info(self):
df_columns = [(name, self.df.dtypes[name]) for name in self.df.columns]
if self.df.index.name is not None and self.df.index.name not in self.df:
df_columns.append((self.df.index.name, self.df.index.dtype))

columns = []
for c in self.df.columns:

for c, dtype in df_columns:
if self._filter_column(c):
continue

columns.append(DataframeColumnInfo(c, self.geom_column, self.geom_type, self.df.dtypes[c]))
columns.append(DataframeColumnInfo(c, self.geom_column, self.geom_type, dtype))

if self.with_lnglat:
columns.append(DataframeColumnInfo(None))
Expand Down
32 changes: 16 additions & 16 deletions test/data/dataset/test_dataset.py
Expand Up @@ -734,8 +734,8 @@ def test_dataset_upload_one_geometry_that_is_not_the_geom_uses_the_geom(self):

ds.upload(table_name=table, credentials=credentials)

expected_query = "COPY {}(the_geom) FROM stdin WITH (FORMAT csv, DELIMITER '|');".format(table)
expected_data = [b'SRID=4326;POINT (1 1)\n']
expected_query = "COPY {}(the_geom,cartodb_id) FROM stdin WITH (FORMAT csv, DELIMITER '|');".format(table)
expected_data = [b'SRID=4326;POINT (1 1)|0\n']

self.assertEqual(ds._strategy._context.query, expected_query)
self.assertEqual(list(ds._strategy._context.response), expected_data)
Expand All @@ -750,8 +750,8 @@ def test_dataset_upload_one_geometry_that_is_the_geom_uses_the_geom(self):

ds.upload(table_name=table, credentials=credentials)

expected_query = "COPY {}(the_geom) FROM stdin WITH (FORMAT csv, DELIMITER '|');".format(table)
expected_data = [b'SRID=4326;POINT (1 1)\n']
expected_query = "COPY {}(the_geom,cartodb_id) FROM stdin WITH (FORMAT csv, DELIMITER '|');".format(table)
expected_data = [b'SRID=4326;POINT (1 1)|0\n']

self.assertEqual(ds._strategy._context.query, expected_query)
self.assertEqual(list(ds._strategy._context.response), expected_data)
Expand All @@ -766,8 +766,8 @@ def test_dataset_upload_with_several_geometry_columns_prioritize_the_geom(self):

ds.upload(table_name=table, credentials=credentials)

expected_query = "COPY {}(geom,the_geom,geometry) FROM stdin WITH (FORMAT csv, DELIMITER '|');".format(table)
expected_data = [b'POINT (0 0)|SRID=4326;POINT (1 1)|POINT (2 2)\n']
expected_query = "COPY {}(geom,the_geom,geometry,cartodb_id) FROM stdin WITH (FORMAT csv, DELIMITER '|');".format(table)
expected_data = [b'POINT (0 0)|SRID=4326;POINT (1 1)|POINT (2 2)|0\n']

self.assertEqual(ds._strategy._context.query, expected_query)
self.assertEqual(list(ds._strategy._context.response), expected_data)
Expand Down Expand Up @@ -798,8 +798,8 @@ def test_dataset_upload_with_lng_lat(self):

ds.upload(table_name=table, credentials=credentials, with_lnglat=('lng', 'lat'))

expected_query = "COPY {}(lng,lat,the_geom) FROM stdin WITH (FORMAT csv, DELIMITER '|');".format(table)
expected_data = [b'1|1|SRID=4326;POINT (1 1)\n']
expected_query = "COPY {}(lng,lat,cartodb_id,the_geom) FROM stdin WITH (FORMAT csv, DELIMITER '|');".format(table)
expected_data = [b'1|1|0|SRID=4326;POINT (1 1)\n']

self.assertEqual(ds._strategy._context.query, expected_query)
self.assertEqual(list(ds._strategy._context.response), expected_data)
Expand All @@ -814,9 +814,9 @@ def test_dataset_upload_prioritizing_with_lng_lat_over_the_geom(self):

ds.upload(table_name=table, credentials=credentials, with_lnglat=('lng', 'lat'))

expected_query = "COPY {}(lng,lat,the_geom) FROM stdin WITH (FORMAT csv, DELIMITER '|');".format(
expected_query = "COPY {}(lng,lat,cartodb_id,the_geom) FROM stdin WITH (FORMAT csv, DELIMITER '|');".format(
table)
expected_data = [b'1|1|SRID=4326;POINT (1 1)\n']
expected_data = [b'1|1|0|SRID=4326;POINT (1 1)\n']

self.assertEqual(ds._strategy._context.query, expected_query)
self.assertEqual(list(ds._strategy._context.response), expected_data)
Expand All @@ -831,9 +831,9 @@ def test_dataset_upload_prioritizing_with_lng_lat_over_other_geom_names(self):

ds.upload(table_name=table, credentials=credentials, with_lnglat=('lng', 'lat'))

expected_query = "COPY {}(lng,lat,the_geom) FROM stdin WITH (FORMAT csv, DELIMITER '|');".format(
expected_query = "COPY {}(lng,lat,cartodb_id,the_geom) FROM stdin WITH (FORMAT csv, DELIMITER '|');".format(
table)
expected_data = [b'1|1|SRID=4326;POINT (1 1)\n']
expected_data = [b'1|1|0|SRID=4326;POINT (1 1)\n']

self.assertEqual(ds._strategy._context.query, expected_query)
self.assertEqual(list(ds._strategy._context.response), expected_data)
Expand All @@ -848,8 +848,8 @@ def test_dataset_upload_without_geom(self):

ds.upload(table_name=table, credentials=credentials)

expected_query = "COPY {}(col1,col2,col3) FROM stdin WITH (FORMAT csv, DELIMITER '|');".format(table)
expected_data = [b'1|True|text\n']
expected_query = "COPY {}(col1,col2,col3,cartodb_id) FROM stdin WITH (FORMAT csv, DELIMITER '|');".format(table)
expected_data = [b'1|True|text|0\n']

self.assertEqual(ds._strategy._context.query, expected_query)
self.assertEqual(list(ds._strategy._context.response), expected_data)
Expand All @@ -864,8 +864,8 @@ def test_dataset_upload_null_values(self):

ds.upload(table_name=table, credentials=credentials)

expected_query = "COPY {}(test) FROM stdin WITH (FORMAT csv, DELIMITER '|');".format(table)
expected_data = [b'\n', b'\n']
expected_query = "COPY {}(test,cartodb_id) FROM stdin WITH (FORMAT csv, DELIMITER '|');".format(table)
expected_data = [b'|0\n', b'|1\n']

self.assertEqual(ds._strategy._context.query, expected_query)
self.assertEqual(list(ds._strategy._context.response), expected_data)
Expand Down
33 changes: 19 additions & 14 deletions test/data/services/test_isolines.py
Expand Up @@ -136,10 +136,12 @@ def test_isochrones_from_dataframe_dataset(self):
self.assertTrue('the_geom' in result_columns)
self.assertTrue('data_range' in result_columns)
self.assertEqual(result.get_num_rows(), 6)
self.assertFalse('cartodb_id' in result_columns)
self.assertFalse('cartodb_id' in result.dataframe)
self.assertFalse('source_id' in result_columns)
self.assertFalse('source_id' in result.dataframe)
self.assertTrue('cartodb_id' in result_columns)
self.assertTrue('cartodb_id' in result.dataframe)
self.assertTrue('source_id' in result_columns)
self.assertTrue('source_id' in result.dataframe)
self.assertEqual(result.dataframe['source_id'].min(), df.index.min())
self.assertEqual(result.dataframe['source_id'].max(), df.index.max())

def test_isochrones_from_dataframe_dataset_as_new_table(self):
self.skip(if_no_credits=True, if_no_credentials=True)
Expand Down Expand Up @@ -167,7 +169,7 @@ def test_isochrones_from_dataframe_dataset_as_new_table(self):
self.assertTrue('the_geom' in result_columns)
self.assertTrue('data_range' in result_columns)
self.assertEqual(result.get_num_rows(), 6)
self.assertFalse('source_id' in result_columns)
self.assertTrue('source_id' in result_columns)

def test_isochrones_from_dataframe(self):
self.skip(if_no_credits=True, if_no_credentials=True)
Expand All @@ -190,8 +192,11 @@ def test_isochrones_from_dataframe(self):
self.assertTrue('the_geom' in result)
self.assertTrue('data_range' in result)
self.assertEqual(len(result.index), 6)
self.assertFalse('cartodb_id' in result)
self.assertFalse('source_id' in result)
result_columns = Dataset(result).get_column_names()
self.assertTrue('cartodb_id' in result_columns)
self.assertTrue('source_id' in result_columns)
self.assertEqual(result['source_id'].min(), df.index.min())
self.assertEqual(result['source_id'].max(), df.index.max())

def test_isochrones_from_dataframe_as_new_table(self):
self.skip(if_no_credits=True, if_no_credentials=True)
Expand Down Expand Up @@ -222,7 +227,7 @@ def test_isochrones_from_dataframe_as_new_table(self):
self.assertTrue('the_geom' in result_columns)
self.assertTrue('data_range' in result_columns)
self.assertEqual(ds.get_num_rows(), 6)
self.assertFalse('source_id' in result_columns)
self.assertTrue('source_id' in result_columns)

def test_isochrones_from_table_dataset(self):
self.skip(if_no_credits=True, if_no_credentials=True)
Expand Down Expand Up @@ -309,8 +314,8 @@ def test_isochrones_from_query_dataset(self):
self.assertTrue('the_geom' in result_columns)
self.assertTrue('data_range' in result_columns)
self.assertEqual(result.get_num_rows(), 6)
self.assertFalse('cartodb_id' in result_columns)
self.assertFalse('cartodb_id' in result.dataframe)
self.assertTrue('cartodb_id' in result_columns)
self.assertTrue('cartodb_id' in result.dataframe)
self.assertFalse('source_id' in result_columns)
self.assertFalse('source_id' in result.dataframe)

Expand Down Expand Up @@ -388,7 +393,7 @@ def test_isochrones_from_dataframe_dataset_with_isoline_options(self):
self.assertTrue('the_geom' in result_columns)
self.assertTrue('data_range' in result_columns)
self.assertEqual(result.get_num_rows(), 6)
self.assertFalse('cartodb_id' in result_columns)
self.assertFalse('cartodb_id' in result.dataframe)
self.assertFalse('source_id' in result_columns)
self.assertFalse('source_id' in result.dataframe)
self.assertTrue('cartodb_id' in result_columns)
self.assertTrue('cartodb_id' in result.dataframe)
self.assertTrue('source_id' in result_columns)
self.assertTrue('source_id' in result.dataframe)

0 comments on commit 3abbb0f

Please sign in to comment.