In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import os
from os.path import join
import re
import math
import basedosdados as bd
from google.cloud import bigquery

In [None]:
def download(
    savepath,
    query=None,
    dataset_id=None,
    table_id=None,
    billing_project_id=None,
    query_project_id="basedosdados",
    limit=None,
    from_file=False,
    reauth=False,
    compression="GZIP",
):
    """Download table or query result from basedosdados BigQuery (or other).

    * Using a **query**:

        `download('select * from `basedosdados.br_suporte.diretorio_municipios` limit 10')`

    * Using **dataset_id & table_id**:

        `download(dataset_id='br_suporte', table_id='diretorio_municipios')`

    You can also add arguments to modify save parameters:

    `download(dataset_id='br_suporte', table_id='diretorio_municipios', index=False, sep='|')`


    Args:
        savepath (str, pathlib.PosixPath):
            savepath must be a file path. Only supports `.csv`.
        query (str): Optional.
            Valid SQL Standard Query to basedosdados. If query is available,
            dataset_id and table_id are not required.
        dataset_id (str): Optional.
            Dataset id available in basedosdados. It should always come with table_id.
        table_id (str): Optional.
            Table id available in basedosdados.dataset_id.
            It should always come with dataset_id.
        billing_project_id (str): Optional.
            Project that will be billed. Find your Project ID here https://console.cloud.google.com/projectselector2/home/dashboard
        query_project_id (str): Optional.
            Which project the table lives. You can change this you want to query different projects.
        limit (int): Optional
            Number of rows.
        from_file (boolean): Optional.
            Uses the credentials from file, located in `~/.basedosdados/credentials/
        reauth (boolean): Optional.
            Re-authorize Google Cloud Project in case you need to change user or reset configurations.
        compression (str): Optional.
            Compression type. Only `GZIP` is available for now.
    Raises:
        Exception: If either table_id, dataset_id or query are empty.
    """

    billing_project_id, from_file = _set_config_variables(
        billing_project_id=billing_project_id, from_file=from_file
    )

    if (query is None) and ((table_id is None) or (dataset_id is None)):
        raise BaseDosDadosException(
            "Either table_id, dataset_id or query should be filled."
        )

    client = _google_client(billing_project_id, from_file, reauth)

    # makes sure that savepath is a filepath and not a folder
    savepath = _sets_savepath(savepath)

    # if query is not defined (so it won't be overwritten) and if
    # table is a view or external or if limit is specified,
    # convert it to a query.
    if not query and (
        not _is_table(client, dataset_id, table_id, query_project_id) or limit
    ):
        query = f"""
        SELECT *
          FROM {query_project_id}.{dataset_id}.{table_id}
        """

        if limit is not None:
            query += f" limit {limit}"

    if query:
        # sql queries produces anonymous tables, whose names
        # can be found within `job._properties`
        job = client["bigquery"].query(query)

        # views may take longer: wait for job to finish.
        _wait_for(job)

        dest_table = job._properties["configuration"]["query"]["destinationTable"]

        project_id = dest_table["projectId"]
        dataset_id = dest_table["datasetId"]
        table_id = dest_table["tableId"]

    _direct_download(client, dataset_id, table_id, savepath, project_id, compression)

In [2]:
MODULE_PATH = Path().absolute().parent
DATA_PATH = join(MODULE_PATH, "sources/raw")
CONFIG_PATH = join(MODULE_PATH, "config")

In [3]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = join(CONFIG_PATH, "optimum-vine-316721-5d37568eb4e0.json")

In [4]:
client = bigquery.Client()
print("Autenticação bem-sucedida! Projeto:", client.project)

Autenticação bem-sucedida! Projeto: optimum-vine-316721


In [5]:
df = bd.read_table('br_ibge_pib', 'municipio', billing_project_id="optimum-vine-316721")

Downloading: 100%|[32m██████████[0m|


In [8]:
bd.download(savepath=join(DATA_PATH, "test.csv"), dataset_id="br_ibge_pib", table_id="municipio", billing_project_id="optimum-vine-316721")

UnboundLocalError: local variable 'project_id' referenced before assignment

In [None]:
basedosdados.world_imdb_movies.top_movies_per_year

In [24]:
bd.download(savepath=join(DATA_PATH, "test.csv"), dataset_id="world_imdb_movies", table_id="top_movies_per_year")

UnboundLocalError: local variable 'project_id' referenced before assignment

In [13]:
bd.download(dataset_id="br_inmet_bdmep", table_id="microdados", savepath=DATA_PATH)

DefaultCredentialsError: Could not automatically determine credentials. Please set GOOGLE_APPLICATION_CREDENTIALS or explicitly create credentials and re-run the application. For more information, please see https://cloud.google.com/docs/authentication/getting-started