### Imports

In [None]:
import requests
import json
import os
import pandas as pd
from time import sleep
import re
from google.cloud import storage
from datetime import datetime

### Brasil API configuration and data extraction

In [None]:
def buscar_cnpj(cnpj):
    cnpj= re.sub(r'\D', '', str(cnpj))
    url = f"https://brasilapi.com.br/api/cnpj/v1/{cnpj}"
    
    try:
        response = requests.get(url)
        if response.status_code==200:
            data = response.json()

            file_path = os.path.join('..', 'data/raw', f"{cnpj}_{datetime.now().strftime('%Y-%m-%d')}.json")

            with open(file_path, 'w', encoding='utf-8') as f:
                json.dump(data, f, indent=4, ensure_ascii=False)
            
                print(f"File saved successfully at: {file_path}")
        else:
            print(f"{response.status_code} for cnpj {cnpj}")
    except Exception as e:
        print(f"Connection failed {e}")

In [None]:
buscar_cnpj(16788289000100)

In [None]:
df_input = pd.read_csv(r"../data/input/cnpjs.csv", dtype={'CNPJ': str})
for cnpj in df_input['CNPJ']:
    buscar_cnpj(cnpj)
    sleep(1)

### GCP Connection
 1. Defining api key path
 2. Creating connection client
 3. Referencing bucket
 4. Testing connection (Listing what it has inside)

In [None]:
# Authetication (just 1 time)
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../key-google.json'
client = storage.Client()
bucket = client.bucket('credit-guard-raw-sa-east1')

def data_injection(file_name):

    #local path
    today = datetime.now().strftime('%Y-%m-%d')
    local_path = os.path.join('..', 'data', 'raw', file_name)
    
    #cloud path
    cloud_destination = f"raw/cnpj/ingestion_date={today}/{file_name}"

    # Uploading
    blob = bucket.blob(cloud_destination)
    blob.upload_from_filename(local_path)

    print(f'file {file_name} successfully uploaded')

In [None]:
local_files = os.listdir('../data/raw')
for file in local_files:
    if file.endswith('.json'):
        data_injection(file)
    else:
        pass

In [None]:
!pip install python-dotenv

In [None]:
from dotenv import load_dotenv
import os
import requests
import json
from pathlib import Path


load_dotenv()

GITHUB_TOKEN = os.getenv('GITHUB_TOKEN')
local_path = os.path.join('..', 'data', 'github')

#github api call
def get_github_workflow():
    owner = 'ANIBEserra'
    repo = 'Portifolio' 
    workflow_id = '231418862'
    headers = {'Authorization': f'token {GITHUB_TOKEN}', 'Accept': 'application/vnd.github.v3+json'}

    url_runs = f"https://api.github.com/repos/{owner}/{repo}/actions/workflows/{workflow_id}/runs?per_page=1"
    
    try:
        # Get the last run ID
        response = requests.get(url_runs, headers=headers)
        if response.status_code == 200:
            run_data = response.json()
            run_id = run_data['workflow_runs'][0]['id']

        # Job Details
        url_jobs = f"https://api.github.com/repos/{owner}/{repo}/actions/runs/{run_id}/jobs"
        response = requests.get(url_jobs, headers=headers)
        if response.status_code == 200:
            jobs_data = response.json()
            file_path = os.path.join(local_path, f"github_workflow_run.json")

            with open(file_path, 'w', encoding='utf-8') as f:
                json.dump(jobs_data, f, indent=4, ensure_ascii=False)
            return jobs_data

    except Exception as e:
        print(f"Error: {e}")

In [None]:
get_github_workflow()