### Códigos de Agentes Regulados
Notebook para extração e implementação no Big Query da tabela SIMP

In [5]:
from bs4 import BeautifulSoup
from io import BytesIO
from google.cloud import bigquery
import zipfile
import re
import os
import requests
import zipfile
import pandas as pd

In [6]:
BASE_URL = "https://csa.anp.gov.br/downloads/manuais-isimp/Geracao-de-tabelas-ISIMP.zip"

In [7]:
response = requests.get(BASE_URL, verify=False)
if response.status_code == 200:
    pass
else:
    raise Exception(f"Failed to download file: {response.status_code}")

with zipfile.ZipFile(BytesIO(response.content)) as z:
    with z.open("T001-Codigos_agentes_regulados.xlsx") as excel_file:
        df = pd.read_excel(excel_file, header=1)
df = df.rename(columns={
    "Raiz CNPJ": "agnt_num_base_cnpj",
    "Razão Social": "agnt_nom_razao_social",
})

df["agnt_num_base_cnpj"] = df["agnt_num_base_cnpj"].astype(str).str.zfill(8)

df = df[["agnt_nom_razao_social", "agnt_num_base_cnpj"]]

additional_data = pd.DataFrame({
    "agnt_nom_razao_social": [
        "ALESAT COMBUSTÍVEIS S. A.",
        "ASTER PETRÓLEO LTDA.",
        "AUTO POSTO HAACKE LTDA",
        "DINAMICA TERMINAIS DE COMBUSTIVEIS E DISTRIBUIDORA S/A",
        "FLEX DISTRIBUIDORA DE PETRÓLEO LTDA.",
        "MIDAS DISTRIBUIDORA DE COMBUSTIVEIS LTDA.",
        "NOROESTE DISTIBUIDORA DE COMBUSTÍVEIS LTDA.",
        "PETROGOIÁS DISTRIBUIDORA DE PETRÓLEO LTDA.",
        "PETRORIENTE DISTRIBUIDORA DE COMBUSTÍVEIS LTDA",
        "PODIUM DISTRIBUIDORA DE PETRÓLEO LTDA.",
        "REJAILE DISTRIBUIDORA DE PETRÓLEO LTDA",
        "SETTA COMBUSTÍVEIS S/A.",
        "START PETROLEO S.A.",
        "SUL COMBUSTÍVEIS LTDA.",
        "THEX COMBUSTIVEIS LTDA"
    ],
    "agnt_num_base_cnpj": [
        "23314594",
        "02377759",
        "00134794",
        "41189488",
        "10918655",
        "19700983",
        "01966325",
        "05470445",
        "37020090",
        "07253302",
        "00209895",
        "55483564",
        "39334434",
        "06278750",
        "39476085"
    ]
})

df = pd.concat([df, additional_data], ignore_index=True)



In [8]:
"""
Inserting data into BigQuery
"""
bq_client = bigquery.Client()
project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
bq_dataset = "rf_ext_anp"
table_name = "cnpj_agentes"
table_id = f"{project_id}.{bq_dataset}.{table_name}"

job_config = bigquery.LoadJobConfig(
    write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
)

try:
    job = bq_client.load_table_from_dataframe(
        df, table_id, job_config=job_config
    )
    job.result()
    print(f"  Data for {table_id} inserted successfully.")
except Exception as e:
    print(f"  Error inserting data for {table_id}: {str(e)}")

print("Data insertion completed!")

  Data for labs-vibra-final.rf_ext_anp.cnpj_agentes inserted successfully.
Data insertion completed!
