### Imports

In [4]:
import requests
import json
import os
import pandas as pd
from time import sleep
import re
from google.cloud import storage
from datetime import datetime

### Brasil API configuration and data extraction

In [None]:
def buscar_cnpj(cnpj):
    cnpj= re.sub(r'\D', '', str(cnpj))
    url = f"https://brasilapi.com.br/api/cnpj/v1/{cnpj}"
    
    try:
        response = requests.get(url)
        if response.status_code==200:
            data = response.json()

            file_path = os.path.join('..', 'data/raw', f"{cnpj}_{datetime.now().strftime('%Y-%m-%d')}.json")

            with open(file_path, 'w', encoding='utf-8') as f:
                json.dump(data, f, indent=4, ensure_ascii=False)
            
                print(f"File saved successfully at: {file_path}")
        else:
            print(f"{response.status_code} for cnpj {cnpj}")
    except Exception as e:
        print(f"Connection failed {e}")

In [11]:
buscar_cnpj(16788289000100)

400 para o cnpj 16788289000100


In [3]:
df_input = pd.read_csv(r"../data/input/cnpjs.csv", dtype={'CNPJ': str})
for cnpj in df_input['CNPJ']:
    buscar_cnpj(cnpj)
    sleep(1)

Arquivo salvo com sucesso em: ..\data/raw\01027058000191_2026-02-02.json
Arquivo salvo com sucesso em: ..\data/raw\16501555000157_2026-02-02.json
Arquivo salvo com sucesso em: ..\data/raw\10440482000154_2026-02-02.json
Arquivo salvo com sucesso em: ..\data/raw\08561701000101_2026-02-02.json
400 para o cnpj 03145247000139
400 para o cnpj 11152062000122
Arquivo salvo com sucesso em: ..\data/raw\03017677000120_2026-02-02.json
400 para o cnpj 11822453000184
400 para o cnpj 13264426000140
Arquivo salvo com sucesso em: ..\data/raw\04740876000125_2026-02-02.json


### GCP Connection
 1. Defining api key path
 2. Creating connection client
 3. Referencing bucket
 4. Testing connection (Listing what it has inside)

In [4]:
# Authetication (just 1 time)
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../key-google.json'
client = storage.Client()
bucket = client.bucket('credit-guard-raw-sa-east1')

def data_injection(file_name):

    #local path
    today = datetime.now().strftime('%Y-%m-%d')
    local_path = os.path.join('..', 'data', 'raw', file_name)
    
    #cloud path
    cloud_destination = f"raw/cnpj/ingestion_date={today}/{file_name}"

    # Uploading
    blob = bucket.blob(cloud_destination)
    blob.upload_from_filename(local_path)

    print(f'file {file_name} successfully uploaded')

In [5]:
local_files = os.listdir('../data/raw')
for file in local_files:
    if file.endswith('.json'):
        data_injection(file)
    else:
        pass

file 01027058000191_2026-02-02.json successfully uploaded
file 03017677000120_2026-02-02.json successfully uploaded
file 04740876000125_2026-02-02.json successfully uploaded
file 08561701000101_2026-02-02.json successfully uploaded
file 10440482000154_2026-02-02.json successfully uploaded
file 16501555000157_2026-02-02.json successfully uploaded
