Merge 5be6f74 into fa2db45

CartoDB · Oct 8, 2019 · 2a5db6c · 2a5db6c
2 parents fa2db45 + 5be6f74
commit 2a5db6c
Show file tree

Hide file tree

Showing 8 changed files with 570 additions and 4 deletions.
diff --git a/cartoframes/data/clients/bigquery_client.py b/cartoframes/data/clients/bigquery_client.py
@@ -1,5 +1,10 @@
 from __future__ import absolute_import
 
+import os
+import appdirs
+import csv
+import tqdm
+
 from google.cloud import bigquery
 from google.oauth2.credentials import Credentials as GoogleCredentials
 from google.auth.exceptions import RefreshError
@@ -8,6 +13,8 @@
 
 from ...auth import get_default_credentials
 
+_USER_CONFIG_DIR = appdirs.user_config_dir('cartoframes')
+
 
 def refresh_client(func):
     def wrapper(self, *args, **kwargs):
@@ -52,6 +59,40 @@ def upload_dataframe(self, dataframe, schema, tablename, project, dataset):
 
     @refresh_client
     def query(self, query, **kwargs):
-        response = self.client.query(query, **kwargs)
+        return self.client.query(query, **kwargs)
+
+    def download_to_file(self, project, dataset, table, limit=None, offset=None,
+                         file_path=None, fail_if_exists=False, progress_bar=True):
+        if not file_path:
+            file_name = '{}.{}.{}.csv'.format(project, dataset, table)
+            file_path = os.path.join(_USER_CONFIG_DIR, file_name)
+
+        if fail_if_exists and os.path.isfile(file_path):
+            raise CartoException('The file `{}` already exists.'.format(file_path))
+
+        query = _download_query(project, dataset, table, limit, offset)
+        rows_iter = self.query(query).result()
+
+        if progress_bar:
+            pb = tqdm.tqdm_notebook(total=rows_iter.total_rows)
+
+        with open(file_path, 'w') as csvfile:
+            csvwriter = csv.writer(csvfile)
+            for row in rows_iter:
+                csvwriter.writerow(row.values())
+                if progress_bar:
+                    pb.update(1)
+
+        return file_path
+
+
+def _download_query(project, dataset, table, limit=None, offset=None):
+    full_table_name = '`{}.{}.{}`'.format(project, dataset, table)
+    query = 'SELECT * FROM {}'.format(full_table_name)
+
+    if limit:
+        query += ' LIMIT {}'.format(limit)
+    if offset:
+        query += ' OFFSET {}'.format(offset)
 
-        return response
+    return query
diff --git a/cartoframes/data/observatory/dataset.py b/cartoframes/data/observatory/dataset.py
@@ -1,9 +1,19 @@
 from __future__ import absolute_import
 
+from warnings import warn
+
+from google.api_core.exceptions import NotFound
+
+from carto.exceptions import CartoException
+
 from .entity import CatalogEntity
 from .repository.dataset_repo import get_dataset_repo
 from .repository.variable_repo import get_variable_repo
 from .repository.variable_group_repo import get_variable_group_repo
+from ..clients.bigquery_client import BigQueryClient
+from ...auth import get_default_credentials
+
+_WORKING_PROJECT = 'carto-do-customers'
 
 
 class Dataset(CatalogEntity):
@@ -72,3 +82,29 @@ def is_public_data(self):
     @property
     def summary(self):
         return self.data['summary_jsonb']
+
+    def download(self, credentials=None):
+        credentials = _get_credentials(credentials)
+        user_dataset = credentials.username.replace('-', '_')
+        bq_client = _get_bigquery_client(_WORKING_PROJECT, credentials)
+
+        project, dataset, table = self.id.split('.')
+        view = 'view_{}_{}'.format(dataset.replace('-', '_'), table)
+
+        try:
+            file_path = bq_client.download_to_file(_WORKING_PROJECT, user_dataset, view)
+        except NotFound:
+            raise CartoException('You have not purchased the dataset `{}` yet'.format(self.id))
+
+        warn('Data saved: {}.'.format(file_path))
+        warn("Read it by: `pandas.read_csv('{}')`.".format(file_path))
+
+        return file_path
+
+
+def _get_credentials(credentials=None):
+    return credentials or get_default_credentials()
+
+
+def _get_bigquery_client(project, credentials):
+    return BigQueryClient(project, credentials)
diff --git a/cartoframes/data/observatory/geography.py b/cartoframes/data/observatory/geography.py
@@ -1,8 +1,18 @@
 from __future__ import absolute_import
 
+from warnings import warn
+
+from google.api_core.exceptions import NotFound
+
+from carto.exceptions import CartoException
+
 from .entity import CatalogEntity
 from .repository.dataset_repo import get_dataset_repo
 from .repository.geography_repo import get_geography_repo
+from ..clients.bigquery_client import BigQueryClient
+from ...auth import get_default_credentials
+
+_WORKING_PROJECT = 'carto-do-customers'
 
 
 class Geography(CatalogEntity):
@@ -52,3 +62,29 @@ def is_public_data(self):
     @property
     def summary(self):
         return self.data['summary_jsonb']
+
+    def download(self, credentials=None):
+        credentials = _get_credentials(credentials)
+        user_dataset = credentials.username.replace('-', '_')
+        bq_client = _get_bigquery_client(_WORKING_PROJECT, credentials)
+
+        project, dataset, table = self.id.split('.')
+        view = 'view_{}_{}'.format(dataset.replace('-', '_'), table)
+
+        try:
+            file_path = bq_client.download_to_file(_WORKING_PROJECT, user_dataset, view)
+        except NotFound:
+            raise CartoException('You have not purchased the dataset `{}` yet'.format(self.id))
+
+        warn('Data saved: {}.'.format(file_path))
+        warn("Read it by: `pandas.read_csv('{}')`.".format(file_path))
+
+        return file_path
+
+
+def _get_credentials(credentials=None):
+    return credentials or get_default_credentials()
+
+
+def _get_bigquery_client(project, credentials):
+    return BigQueryClient(project, credentials)
diff --git a/examples/08_data_observatory/download.ipynb b/examples/08_data_observatory/download.ipynb
@@ -0,0 +1,230 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Big Query Client"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from cartoframes.auth import Credentials, set_default_credentials\n",
+    "credentials = Credentials.from_file()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from cartoframes.data.clients.bigquery_client import BigQueryClient\n",
+    "client = BigQueryClient('carto-do-customers', credentials)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "project = 'carto-do-customers'\n",
+    "dataset = credentials.username.replace('-', '_')\n",
+    "table = 'view_mastercard_financial_mrli_usa_block_2019_monthly_2019'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "client.download_to_file(project, dataset, table)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Catalog"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "id                      carto-do.bbva.financial_origindistributionages...\n",
+       "name                                Destination Distribution Ages Genders\n",
+       "description                                                          None\n",
+       "provider_id                                                          bbva\n",
+       "category_id                                                     financial\n",
+       "data_source_id                              origindistributionagesgenders\n",
+       "country_iso_code3                                                   spain\n",
+       "language_iso_code3                                                   None\n",
+       "geography_id              carto-do.bbva.geography_spain_censustracts_2011\n",
+       "temporal_aggregation                                              monthly\n",
+       "time_coverage                                                        None\n",
+       "update_frequency                                                     None\n",
+       "version                                                              2017\n",
+       "is_public_data                                                       None\n",
+       "summary_jsonb                                                        None\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from cartoframes.data.observatory.catalog import Catalog\n",
+    "dataset = Catalog().categories.get('financial').datasets.get('carto-do.bbva.financial_origindistributionagesgenders_spain_censustracts_2011_monthly_2017')\n",
+    "dataset.to_series()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from cartoframes.auth import Credentials, set_default_credentials\n",
+    "credentials = Credentials.from_file()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset.download(credentials)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "id                    carto-do.bbva.geography_spain_censustracts_2011\n",
+       "name                                              Spain Census tracts\n",
+       "description                                                      None\n",
+       "provider_id                                                      bbva\n",
+       "country_iso_code3                                               spain\n",
+       "language_iso_code3                                               None\n",
+       "geom_coverage                                                    None\n",
+       "update_frequency                                                 None\n",
+       "version                                                          2011\n",
+       "is_public_data                                                   None\n",
+       "summary_jsonb                                                    None\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from cartoframes.data.observatory.geography import Geography\n",
+    "geography = Geography.get(dataset.geography)\n",
+    "geography.to_series()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/simon/dev/cartoframes/env/src/carto/carto/resources.py:90: FutureWarning: This is part of a non-public CARTO API and may change in the future. Take this into account if you are using this in a production environment\n",
+      "  warnings.warn('This is part of a non-public CARTO API and may change in the future. Take this into account if you are using this in a production environment', FutureWarning)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "988381ccb2a145008c0ae58b21d98427",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=35960), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/simon/dev/cartoframes/cartoframes/data/observatory/geography.py:79: UserWarning: Data saved: /home/simon/.config/cartoframes/carto-do-customers.simon_carto.view_bbva_geography_spain_censustracts_2011.csv.\n",
+      "  warn('Data saved: {}.'.format(file_path))\n",
+      "/home/simon/dev/cartoframes/cartoframes/data/observatory/geography.py:80: UserWarning: Read it by: `pandas.read_csv('/home/simon/.config/cartoframes/carto-do-customers.simon_carto.view_bbva_geography_spain_censustracts_2011.csv')`.\n",
+      "  warn(\"Read it by: `pandas.read_csv('{}')`.\".format(file_path))\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'/home/simon/.config/cartoframes/carto-do-customers.simon_carto.view_bbva_geography_spain_censustracts_2011.csv'"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "geography.download(credentials)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset_no_permissions = Catalog().categories.get('financial').datasets.get('carto-do.mastercard.financial_mrli_usa_zipcode_2019_monthly_2019')\n",
+    "dataset_no_permissions.download(credentials)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}