# Carga dados covid Big Query (BQ)

## Carregar módulos

In [None]:
from google.colab import auth
from google.cloud import bigquery

In [None]:
import numpy as np
import pandas as pd

## Autenticar projeto

In [None]:
auth.authenticate_user()

In [None]:
project_id = 'dadosbrasil-471617'

In [None]:
client = bigquery.Client(project=project_id)

## Carregar dados

- Dados Covid Brasil

In [None]:
! wget --no-check-certificate --content-disposition 'https://github.com/wcota/covid19br/blob/master/cases-brazil-cities-time.csv.gz?raw=true'

--2025-09-10 12:10:11--  https://github.com/wcota/covid19br/blob/master/cases-brazil-cities-time.csv.gz?raw=true
Resolving github.com (github.com)... 140.82.112.3
Connecting to github.com (github.com)|140.82.112.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://github.com/wcota/covid19br/raw/refs/heads/master/cases-brazil-cities-time.csv.gz [following]
--2025-09-10 12:10:11--  https://github.com/wcota/covid19br/raw/refs/heads/master/cases-brazil-cities-time.csv.gz
Reusing existing connection to github.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/wcota/covid19br/refs/heads/master/cases-brazil-cities-time.csv.gz [following]
--2025-09-10 12:10:12--  https://raw.githubusercontent.com/wcota/covid19br/refs/heads/master/cases-brazil-cities-time.csv.gz
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubus

In [None]:
! gunzip cases-brazil-cities-time.csv.gz

In [None]:
dados_brasil = pd.read_csv('cases-brazil-cities-time.csv')

- Dados censo

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
censo_path = '/content/drive/MyDrive/Colab Notebooks/Datasets/pos-graduacao/inteligencia de negocios/dados_municipios_2010.csv'
dados_censo = pd.read_csv(censo_path, sep = ';', decimal = ',', encoding = 'latin1')

## Ajustar dados

In [None]:
dados_brasil = dados_brasil[dados_brasil['state'] != 'TOTAL']

In [None]:
cols = ['ibgeID', 'date', 'state', 'city', 'totalCases', 'deaths']

In [None]:
dados_brasil = dados_brasil[cols].reset_index(drop=True)

## Adicionar dados covid no BQ

In [None]:
# # se criou anteriormente no console
# dataset_ref = client.dataset('dados_brasil')

In [None]:
list(client.list_datasets(project = project_id))

[<google.cloud.bigquery.dataset.DatasetListItem at 0x7e72b32595b0>,
 <google.cloud.bigquery.dataset.DatasetListItem at 0x7e72b891af90>]

In [None]:
# criar no python
dataset_ref = bigquery.Dataset(project_id+'.dados_brasil')
dataset_ref = client.create_dataset(dataset_ref)

In [None]:
table_ref = dataset_ref.table("dados_brasil_covid")

In [None]:
job = client.load_table_from_dataframe(dados_brasil, table_ref,
                                       job_config = bigquery.LoadJobConfig(write_disposition="WRITE_TRUNCATE"))
job.result()

LoadJob<project=dadosbrasil-471617, location=US, id=f58d1c40-b327-4941-81b5-1a0ca41bc38c>

## Adicionar dados do censo no BQ

In [None]:
table_ref = dataset_ref.table("dados_brasil_censo")

In [None]:
job = client.load_table_from_dataframe(dados_censo, table_ref,
                                       job_config = bigquery.LoadJobConfig(write_disposition="WRITE_TRUNCATE"))
job.result()

LoadJob<project=dadosbrasil-471617, location=US, id=384766fa-8e2b-4804-96b3-aa4e9316eb1c>

In [None]:
# # usar quando gerar processo de atualizacao
# job = client.load_table_from_dataframe(players_view, table_ref)
# job.result()

In [None]:
query_dados_censo = client.query(
    """
    SELECT *
    FROM `dados_brasil.dados_brasil_censo`
    LIMIT 10
    """
)

query_dados_covid = client.query(
    """
    SELECT *
    FROM `dados_brasil.dados_brasil_covid`
    LIMIT 10
    """
)

In [None]:
dados_censo = query_dados_censo.to_dataframe()
dados_covid = query_dados_covid.to_dataframe()

In [None]:
dados_censo.sort_values(by = 'ibgeID')

Unnamed: 0,ibgeID,Municipio,ESPVIDA,E_ANOSESTUDO,T_ANALF18M,RDPC,IDHM
6,1200328,JORDÃO,68.83,5.73,29.87,178.03,0.469
2,1300201,ATALAIA DO NORTE,68.95,4.63,37.39,159.21,0.45
9,1301951,ITAMARATI,71.32,5.84,40.71,214.95,0.477
4,1400704,UIRAMUTÃ,70.95,4.73,31.63,123.16,0.453
7,1501105,BAGRE,71.59,5.57,26.08,159.41,0.471
8,1501956,CACHOEIRA DO PIRIÁ,71.73,6.67,30.96,130.4,0.473
5,1502509,CHAVES,71.15,4.95,28.92,198.83,0.453
0,1504505,MELGAÇO,71.57,5.51,39.7,135.21,0.418
1,2104081,FERNANDO FALCÃO,68.67,5.75,41.64,106.99,0.443
3,2106359,MARAJÁ DO SENA,71.44,6.62,43.53,96.25,0.452


In [None]:
dados_covid.sort_values(by = 'ibgeID')

Unnamed: 0,ibgeID,date,state,city,totalCases,deaths
0,11,2023-03-02,RO,CASO SEM LOCALIZAÇÃO DEFINIDA/RO,-106,-1
1,12,2023-03-18,AC,CASO SEM LOCALIZAÇÃO DEFINIDA/AC,-226,-1
2,13,2023-02-25,AM,CASO SEM LOCALIZAÇÃO DEFINIDA/AM,-497,-2
3,13,2023-02-26,AM,CASO SEM LOCALIZAÇÃO DEFINIDA/AM,-641,-2
4,13,2023-03-02,AM,CASO SEM LOCALIZAÇÃO DEFINIDA/AM,-401,-2
5,13,2023-03-11,AM,CASO SEM LOCALIZAÇÃO DEFINIDA/AM,-1757,-10
6,13,2023-03-12,AM,CASO SEM LOCALIZAÇÃO DEFINIDA/AM,-1757,-10
7,13,2023-03-13,AM,CASO SEM LOCALIZAÇÃO DEFINIDA/AM,-1757,-10
8,13,2023-03-14,AM,CASO SEM LOCALIZAÇÃO DEFINIDA/AM,-1757,-10
9,13,2023-03-15,AM,CASO SEM LOCALIZAÇÃO DEFINIDA/AM,-1757,-10
