Skip to content

Commit

Permalink
Merge 5be6f74 into fa2db45
Browse files Browse the repository at this point in the history
  • Loading branch information
oleurud committed Oct 8, 2019
2 parents fa2db45 + 5be6f74 commit 2a5db6c
Show file tree
Hide file tree
Showing 8 changed files with 570 additions and 4 deletions.
45 changes: 43 additions & 2 deletions cartoframes/data/clients/bigquery_client.py
@@ -1,5 +1,10 @@
from __future__ import absolute_import

import os
import appdirs
import csv
import tqdm

from google.cloud import bigquery
from google.oauth2.credentials import Credentials as GoogleCredentials
from google.auth.exceptions import RefreshError
Expand All @@ -8,6 +13,8 @@

from ...auth import get_default_credentials

_USER_CONFIG_DIR = appdirs.user_config_dir('cartoframes')


def refresh_client(func):
def wrapper(self, *args, **kwargs):
Expand Down Expand Up @@ -52,6 +59,40 @@ def upload_dataframe(self, dataframe, schema, tablename, project, dataset):

@refresh_client
def query(self, query, **kwargs):
response = self.client.query(query, **kwargs)
return self.client.query(query, **kwargs)

def download_to_file(self, project, dataset, table, limit=None, offset=None,
file_path=None, fail_if_exists=False, progress_bar=True):
if not file_path:
file_name = '{}.{}.{}.csv'.format(project, dataset, table)
file_path = os.path.join(_USER_CONFIG_DIR, file_name)

if fail_if_exists and os.path.isfile(file_path):
raise CartoException('The file `{}` already exists.'.format(file_path))

query = _download_query(project, dataset, table, limit, offset)
rows_iter = self.query(query).result()

if progress_bar:
pb = tqdm.tqdm_notebook(total=rows_iter.total_rows)

with open(file_path, 'w') as csvfile:
csvwriter = csv.writer(csvfile)
for row in rows_iter:
csvwriter.writerow(row.values())
if progress_bar:
pb.update(1)

return file_path


def _download_query(project, dataset, table, limit=None, offset=None):
full_table_name = '`{}.{}.{}`'.format(project, dataset, table)
query = 'SELECT * FROM {}'.format(full_table_name)

if limit:
query += ' LIMIT {}'.format(limit)
if offset:
query += ' OFFSET {}'.format(offset)

return response
return query
36 changes: 36 additions & 0 deletions cartoframes/data/observatory/dataset.py
@@ -1,9 +1,19 @@
from __future__ import absolute_import

from warnings import warn

from google.api_core.exceptions import NotFound

from carto.exceptions import CartoException

from .entity import CatalogEntity
from .repository.dataset_repo import get_dataset_repo
from .repository.variable_repo import get_variable_repo
from .repository.variable_group_repo import get_variable_group_repo
from ..clients.bigquery_client import BigQueryClient
from ...auth import get_default_credentials

_WORKING_PROJECT = 'carto-do-customers'


class Dataset(CatalogEntity):
Expand Down Expand Up @@ -72,3 +82,29 @@ def is_public_data(self):
@property
def summary(self):
return self.data['summary_jsonb']

def download(self, credentials=None):
credentials = _get_credentials(credentials)
user_dataset = credentials.username.replace('-', '_')
bq_client = _get_bigquery_client(_WORKING_PROJECT, credentials)

project, dataset, table = self.id.split('.')
view = 'view_{}_{}'.format(dataset.replace('-', '_'), table)

try:
file_path = bq_client.download_to_file(_WORKING_PROJECT, user_dataset, view)
except NotFound:
raise CartoException('You have not purchased the dataset `{}` yet'.format(self.id))

warn('Data saved: {}.'.format(file_path))
warn("Read it by: `pandas.read_csv('{}')`.".format(file_path))

return file_path


def _get_credentials(credentials=None):
return credentials or get_default_credentials()


def _get_bigquery_client(project, credentials):
return BigQueryClient(project, credentials)
36 changes: 36 additions & 0 deletions cartoframes/data/observatory/geography.py
@@ -1,8 +1,18 @@
from __future__ import absolute_import

from warnings import warn

from google.api_core.exceptions import NotFound

from carto.exceptions import CartoException

from .entity import CatalogEntity
from .repository.dataset_repo import get_dataset_repo
from .repository.geography_repo import get_geography_repo
from ..clients.bigquery_client import BigQueryClient
from ...auth import get_default_credentials

_WORKING_PROJECT = 'carto-do-customers'


class Geography(CatalogEntity):
Expand Down Expand Up @@ -52,3 +62,29 @@ def is_public_data(self):
@property
def summary(self):
return self.data['summary_jsonb']

def download(self, credentials=None):
credentials = _get_credentials(credentials)
user_dataset = credentials.username.replace('-', '_')
bq_client = _get_bigquery_client(_WORKING_PROJECT, credentials)

project, dataset, table = self.id.split('.')
view = 'view_{}_{}'.format(dataset.replace('-', '_'), table)

try:
file_path = bq_client.download_to_file(_WORKING_PROJECT, user_dataset, view)
except NotFound:
raise CartoException('You have not purchased the dataset `{}` yet'.format(self.id))

warn('Data saved: {}.'.format(file_path))
warn("Read it by: `pandas.read_csv('{}')`.".format(file_path))

return file_path


def _get_credentials(credentials=None):
return credentials or get_default_credentials()


def _get_bigquery_client(project, credentials):
return BigQueryClient(project, credentials)
230 changes: 230 additions & 0 deletions examples/08_data_observatory/download.ipynb
@@ -0,0 +1,230 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Big Query Client"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from cartoframes.auth import Credentials, set_default_credentials\n",
"credentials = Credentials.from_file()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from cartoframes.data.clients.bigquery_client import BigQueryClient\n",
"client = BigQueryClient('carto-do-customers', credentials)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"project = 'carto-do-customers'\n",
"dataset = credentials.username.replace('-', '_')\n",
"table = 'view_mastercard_financial_mrli_usa_block_2019_monthly_2019'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"client.download_to_file(project, dataset, table)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Catalog"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id carto-do.bbva.financial_origindistributionages...\n",
"name Destination Distribution Ages Genders\n",
"description None\n",
"provider_id bbva\n",
"category_id financial\n",
"data_source_id origindistributionagesgenders\n",
"country_iso_code3 spain\n",
"language_iso_code3 None\n",
"geography_id carto-do.bbva.geography_spain_censustracts_2011\n",
"temporal_aggregation monthly\n",
"time_coverage None\n",
"update_frequency None\n",
"version 2017\n",
"is_public_data None\n",
"summary_jsonb None\n",
"dtype: object"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from cartoframes.data.observatory.catalog import Catalog\n",
"dataset = Catalog().categories.get('financial').datasets.get('carto-do.bbva.financial_origindistributionagesgenders_spain_censustracts_2011_monthly_2017')\n",
"dataset.to_series()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from cartoframes.auth import Credentials, set_default_credentials\n",
"credentials = Credentials.from_file()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dataset.download(credentials)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id carto-do.bbva.geography_spain_censustracts_2011\n",
"name Spain Census tracts\n",
"description None\n",
"provider_id bbva\n",
"country_iso_code3 spain\n",
"language_iso_code3 None\n",
"geom_coverage None\n",
"update_frequency None\n",
"version 2011\n",
"is_public_data None\n",
"summary_jsonb None\n",
"dtype: object"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from cartoframes.data.observatory.geography import Geography\n",
"geography = Geography.get(dataset.geography)\n",
"geography.to_series()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/simon/dev/cartoframes/env/src/carto/carto/resources.py:90: FutureWarning: This is part of a non-public CARTO API and may change in the future. Take this into account if you are using this in a production environment\n",
" warnings.warn('This is part of a non-public CARTO API and may change in the future. Take this into account if you are using this in a production environment', FutureWarning)\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "988381ccb2a145008c0ae58b21d98427",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=35960), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/simon/dev/cartoframes/cartoframes/data/observatory/geography.py:79: UserWarning: Data saved: /home/simon/.config/cartoframes/carto-do-customers.simon_carto.view_bbva_geography_spain_censustracts_2011.csv.\n",
" warn('Data saved: {}.'.format(file_path))\n",
"/home/simon/dev/cartoframes/cartoframes/data/observatory/geography.py:80: UserWarning: Read it by: `pandas.read_csv('/home/simon/.config/cartoframes/carto-do-customers.simon_carto.view_bbva_geography_spain_censustracts_2011.csv')`.\n",
" warn(\"Read it by: `pandas.read_csv('{}')`.\".format(file_path))\n"
]
},
{
"data": {
"text/plain": [
"'/home/simon/.config/cartoframes/carto-do-customers.simon_carto.view_bbva_geography_spain_censustracts_2011.csv'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"geography.download(credentials)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dataset_no_permissions = Catalog().categories.get('financial').datasets.get('carto-do.mastercard.financial_mrli_usa_zipcode_2019_monthly_2019')\n",
"dataset_no_permissions.download(credentials)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 2a5db6c

Please sign in to comment.