In [184]:
import requests
import pandas as pd
import basedosdados as bd
import datetime
import json
from pathlib import Path

In [169]:
a = f'{{mode}}'

In [189]:
dataset_id = 'br_rj_riodejaneiro_brt_gps'
table_id = 'registros'
url = "http://webapibrt.rio.rj.gov.br/api/v1/brt"

def get_file_path_and_partitions():
    
    capture_time = datetime.datetime.now()
    date = capture_time.strftime('%Y-%m-%d')
    hour = capture_time.strftime('%H')
    filename = capture_time.strftime('%Y-%m-%d-%H-%m-%S')
    
    partitions = f'data={date}/hora={hour}'
    
    file_path = f"../data/{{mode}}/{dataset_id}/{table_id}/{partitions}/{filename}.{{filetype}}"
    
    return file_path, partitions

def get_raw(url):
    
    data = requests.get(url)
    
    if data.ok:
        return data
    else:
        raise Exception('Requests failed with error {data.status_code}')
        
def pre_treatment(data):
    
    data = data.json()
    df = pd.DataFrame(content['veiculos'])
    df['timestamp_captura'] = datetime.datetime.now()
    df['dataHora'] = df['dataHora'].apply(lambda ms: datetime.datetime.fromtimestamp(ms/1000.0))
    
    return df

def save_raw_local(data, file_path, mode='raw'):
    
    _file_path = file_path.format(mode=mode, filetype='json')
    Path(_file_path).parent.mkdir(parents=True, exist_ok=True)
    json.dump(data.json(), Path(_file_path).open('w'))

def save_treated_local(df, file_path, mode='staging'):
    
    _file_path = file_path.format(mode=mode, filetype='csv')
    Path(_file_path).parent.mkdir(parents=True, exist_ok=True)
    df.to_csv(_file_path, index=False)
    
def upload_to_bigquery(dataset_id, table_id, partitions, mode='staging'):
    
    _file_path = file_path.format(mode=mode, filetype='csv')
    
    st = bd.Storage(dataset_id=dataset_id, table_id=table_id)
    st.upload(_file_path, partitions=partitions, mode='staging')

In [190]:
# CAPTURA

file_path, partitions = get_file_path_and_partitions()

data = get_raw(url)

save_raw_local(data, file_path)

treated_data = pre_treatment(data)

save_treated_local(treated_data, file_path)

upload_to_bigquery(dataset_id, table_id, partitions)

Uploading files: 100%|██████████| 1/1 [00:05<00:00,  5.09s/it]


In [207]:
data.status_code

200

In [None]:
# PRIMEIRA VEZ

In [210]:
file_path, partitions = get_file_path_and_partitions()

data = get_raw(url)

save_raw_local(data, file_path)

treated_data = pre_treatment(data)

save_treated_local(treated_data, file_path)

create_table_bq(dataset_id, table_id, file_path)

Uploading files: 100%|██████████| 3/3 [00:02<00:00,  1.19it/s]


In [200]:
def create_table_bq(dataset_id, table_id, file_path, mode='staging'):
    
    _file_path = Path(file_path.format(mode=mode, filetype='csv'))
    
    tb = bd.Table(dataset_id=dataset_id, table_id=table_id)
    tb.create(path=_file_path.parent.parent.parent, partitioned=True, 
              if_table_exists='replace', 
              if_storage_data_exists='replace',
              if_table_config_exists='pass')
    tb.publish()

In [204]:
tb.update()

In [208]:
st = bd.Storage(dataset_id=dataset_id, table_id=table_id)
st.delete_table()

In [209]:
file_path

'../data/{mode}/br_rj_riodejaneiro_brt_gps/registros/data=2021-02-24/hora=15/2021-02-24-15-02-56.{filetype}'

In [None]:
create_table_bq(dataset_id, table_id, file_path)