In [1]:
import json
import time
import requests
import itertools
import logging
from tenacity import (
    retry,
    wait_exponential,
    retry_if_exception_type,
    before_sleep_log,
    after_log
)
from typing import Optional, List
from pydantic import BaseModel
from datetime import datetime

# Set up logging for retry attempts
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [None]:
# Custom callback function that runs before each retry
def my_custom_callback(retry_state):
    """
    This function runs before each retry attempt.
    retry_state contains info about the retry attempt.
    """
    attempt_number = retry_state.attempt_number
    exception = retry_state.outcome.exception() if retry_state.outcome else None

    print(f"ðŸ”„ Retry attempt #{attempt_number}")
    print(f"Exception: {exception}")

    # You can do anything here:
    # - Send a notification
    # - Log to a file
    # - Update a progress bar
    # - Send metrics to monitoring system
    # etc.

## Load Input Data

In [2]:
dates_2023 = ["2023-01-06T00:00:00.000Z","2023-01-13T00:00:00.000Z","2023-01-20T00:00:00.000Z","2023-01-27T00:00:00.000Z","2023-02-03T00:00:00.000Z","2023-02-10T00:00:00.000Z","2023-02-17T00:00:00.000Z","2023-02-24T00:00:00.000Z","2023-03-03T00:00:00.000Z","2023-03-10T00:00:00.000Z","2023-03-17T00:00:00.000Z","2023-03-24T00:00:00.000Z","2023-03-31T00:00:00.000Z","2023-04-07T00:00:00.000Z","2023-04-14T00:00:00.000Z","2023-04-21T00:00:00.000Z","2023-04-28T00:00:00.000Z","2023-05-05T00:00:00.000Z","2023-05-12T00:00:00.000Z","2023-05-19T00:00:00.000Z","2023-05-26T00:00:00.000Z","2023-06-02T00:00:00.000Z","2023-06-09T00:00:00.000Z","2023-06-16T00:00:00.000Z","2023-06-23T00:00:00.000Z","2023-06-30T00:00:00.000Z","2023-07-07T00:00:00.000Z","2023-07-14T00:00:00.000Z","2023-07-21T00:00:00.000Z","2023-07-28T00:00:00.000Z","2023-08-04T00:00:00.000Z","2023-08-11T00:00:00.000Z","2023-08-18T00:00:00.000Z","2023-08-25T00:00:00.000Z","2023-09-01T00:00:00.000Z","2023-09-08T00:00:00.000Z","2023-09-15T00:00:00.000Z","2023-09-22T00:00:00.000Z","2023-09-29T00:00:00.000Z","2023-10-06T00:00:00.000Z","2023-10-13T00:00:00.000Z","2023-10-20T00:00:00.000Z","2023-10-27T00:00:00.000Z","2023-11-03T00:00:00.000Z","2023-11-10T00:00:00.000Z","2023-11-17T00:00:00.000Z","2023-11-24T00:00:00.000Z","2023-12-01T00:00:00.000Z","2023-12-08T00:00:00.000Z","2023-12-15T00:00:00.000Z","2023-12-22T00:00:00.000Z","2023-12-29T00:00:00.000Z"]

dates_2024 = ["2024-01-05T00:00:00.000Z","2024-01-12T00:00:00.000Z","2024-01-19T00:00:00.000Z","2024-01-26T00:00:00.000Z","2024-02-02T00:00:00.000Z","2024-02-09T00:00:00.000Z","2024-02-16T00:00:00.000Z","2024-02-23T00:00:00.000Z","2024-03-01T00:00:00.000Z","2024-03-08T00:00:00.000Z","2024-03-15T00:00:00.000Z","2024-03-22T00:00:00.000Z","2024-03-29T00:00:00.000Z","2024-04-05T00:00:00.000Z","2024-04-12T00:00:00.000Z","2024-04-19T00:00:00.000Z","2024-04-26T00:00:00.000Z","2024-05-03T00:00:00.000Z","2024-05-10T00:00:00.000Z","2024-05-17T00:00:00.000Z","2024-05-24T00:00:00.000Z","2024-05-31T00:00:00.000Z","2024-06-07T00:00:00.000Z","2024-06-14T00:00:00.000Z","2024-06-21T00:00:00.000Z","2024-06-28T00:00:00.000Z","2024-07-05T00:00:00.000Z","2024-07-12T00:00:00.000Z","2024-07-19T00:00:00.000Z","2024-07-26T00:00:00.000Z","2024-08-02T00:00:00.000Z","2024-08-09T00:00:00.000Z","2024-08-16T00:00:00.000Z","2024-08-23T00:00:00.000Z","2024-08-30T00:00:00.000Z","2024-09-06T00:00:00.000Z","2024-09-13T00:00:00.000Z","2024-09-20T00:00:00.000Z","2024-09-27T00:00:00.000Z","2024-10-04T00:00:00.000Z","2024-10-11T00:00:00.000Z","2024-10-18T00:00:00.000Z","2024-10-25T00:00:00.000Z","2024-11-01T00:00:00.000Z","2024-11-08T00:00:00.000Z","2024-11-15T00:00:00.000Z","2024-11-22T00:00:00.000Z","2024-11-29T00:00:00.000Z","2024-12-06T00:00:00.000Z","2024-12-13T00:00:00.000Z","2024-12-20T00:00:00.000Z","2024-12-27T00:00:00.000Z"]

dates_2025 = ["2025-01-03T00:00:00.000Z","2025-01-10T00:00:00.000Z","2025-01-17T00:00:00.000Z","2025-01-24T00:00:00.000Z","2025-01-31T00:00:00.000Z","2025-02-07T00:00:00.000Z","2025-02-14T00:00:00.000Z","2025-02-21T00:00:00.000Z","2025-02-28T00:00:00.000Z","2025-03-07T00:00:00.000Z","2025-03-14T00:00:00.000Z","2025-03-21T00:00:00.000Z","2025-03-28T00:00:00.000Z","2025-04-04T00:00:00.000Z","2025-04-11T00:00:00.000Z","2025-04-18T00:00:00.000Z","2025-04-25T00:00:00.000Z","2025-05-02T00:00:00.000Z","2025-05-09T00:00:00.000Z","2025-05-16T00:00:00.000Z","2025-05-23T00:00:00.000Z","2025-05-30T00:00:00.000Z","2025-06-06T00:00:00.000Z","2025-06-13T00:00:00.000Z","2025-06-20T00:00:00.000Z","2025-06-27T00:00:00.000Z","2025-07-04T00:00:00.000Z","2025-07-11T00:00:00.000Z","2025-07-18T00:00:00.000Z","2025-07-25T00:00:00.000Z","2025-08-01T00:00:00.000Z","2025-08-08T00:00:00.000Z","2025-08-15T00:00:00.000Z","2025-08-22T00:00:00.000Z","2025-08-29T00:00:00.000Z","2025-09-05T00:00:00.000Z","2025-09-12T00:00:00.000Z","2025-09-19T00:00:00.000Z","2025-09-26T00:00:00.000Z","2025-10-03T00:00:00.000Z","2025-10-10T00:00:00.000Z","2025-10-17T00:00:00.000Z","2025-10-24T00:00:00.000Z","2025-10-31T00:00:00.000Z","2025-11-07T00:00:00.000Z","2025-11-14T00:00:00.000Z","2025-11-21T00:00:00.000Z","2025-11-28T00:00:00.000Z","2025-12-05T00:00:00.000Z","2025-12-12T00:00:00.000Z","2025-12-19T00:00:00.000Z","2025-12-26T00:00:00.000Z"]

list_of_dates = [dates_2023, dates_2024, dates_2025]

In [3]:
file_apis = "payloads/apis.json"
with open(file_apis, "r") as f:
    apis = json.load(f)

file_endpoints = "payloads/endpoints.json"
with open(file_endpoints, "r") as f:
    endpoints = json.load(f)

file_organizations = "payloads/organizations.json"
with open(file_organizations, "r") as f:
    organizations = json.load(f)

organizations_pairs = list(itertools.product(organizations, organizations)) # all possible direct product, order matters.

# # Examples
# print(type(endpoints))
# print(endpoints[0]['value'])
#
# print(apis[0])
# print(organizations[0])

In [4]:
# Status; 200 = success, 500 = fail.
statuses = [500,200]

## Model

In [5]:
class APIRequestDataPoint(BaseModel):
    _id: str
    total: int
    date: str

class APIRequestCombination(BaseModel):
    receiver: str = None
    transmitter: str = None
    api: str = None
    endpoint: str = None
    status: int = None

    response_status_codes: List[int] = None # expect 3 values, each corresponds to [2023, 2024, 2025] correspondingly.
    data_points: Optional[List[APIRequestDataPoint]] = None

class APIRequestAllCombinations(BaseModel):
    api_requests: List[APIRequestCombination] = None

In [6]:
# Looped for each year.

@retry(
    wait=wait_exponential(multiplier=1, min=1, max=60),
    retry=retry_if_exception_type((requests.exceptions.Timeout,
                                   requests.exceptions.ConnectionError,
                                   requests.exceptions.HTTPError,
                                   requests.exceptions.RequestException
                                   )),
    before_sleep=my_custom_callback,  # Your custom function runs before each retry
    after=after_log(logger, logging.INFO)  # Also log after each attempt
)

def get_response(pairs, api, endpoint, status, dates: List[str]):
    url = "https://dashboard.openfinancebrasil.org.br/api/api-requests"

    payload = {
        "axis": "date",
        "dates": dates,
        "phase": "transactional-data",

        "receivers": [pairs[0]['value']],
        "transmitters": [pairs[1]['value']],
        "apis": [api['value']],
        "endpoints": [endpoint['value']],
        "status": status
    }

    headers = {
        "Content-Type": "application/json",
        "Origin": "https://dashboard.openfinancebrasil.org.br",
        "User-Agent": "Mozilla/5.0"
    }

    response = requests.post(url, json=payload, headers=headers)

    # Raise exception for rate limiting or server errors to trigger retry
    if response.status_code in [429, 500, 502, 503, 504]:
        response.raise_for_status()

    return response

In [7]:
def send_webhook_start(time):
    requests.post("https://ntfy.sh/openfinancebrazil374628", data=f"Started Scraping Open Finance Brazil Data at {time}")

def send_webhook_finish(time):
    requests.post("https://ntfy.sh/openfinancebrazil374628", data=f"Finished Scraping Open Finance Brazil Data at {time}")

## Main Loop

In [8]:
start_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
send_webhook_start(start_str)

requests_list = []
try:
    for pair, api, endpoint, status in itertools.product(
        organizations_pairs,
        apis,
        endpoints,
        statuses
    ):
        values = []
        response_codes = []
        for dates in list_of_dates:
            response = get_response(pair, api, endpoint, status, dates)

            response_codes.append(response.status_code)
            values = values + response.json()

            time.sleep(0.05)

        api_request = APIRequestCombination(
            receiver = pair[0]['label'],
            transmitter = pair[1]['label'],
            api = api['label'],
            endpoint = endpoint['label'],
            status = status,

            response_status_codes = response_codes,
            data_points = values,
        )

        requests_list.append(api_request)

        # Save temp.
        output_temp = APIRequestAllCombinations(
            api_requests = requests_list)
        with open(f"temp/open_finance_brazil_temp-{start_str}.json", "w") as f:
            f.write(output_temp.model_dump_json(indent=2))

        time.sleep(0.1)

except Exception as e:
    print(f"Error: {e}")

# Save
output = APIRequestAllCombinations(
    api_requests = requests_list)

finish_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
with open(f"output/open_finance_brazil-{finish_time}.json", "w") as f:
    f.write(output.model_dump_json(indent=2))

send_webhook_finish(finish_time)

KeyboardInterrupt: 

## Debugging (ignore)

In [None]:
output = APIRequestAllCombinations(
    api_requests = requests_list)

now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
with open(f"output/open_finance_brazil-{now_str}.json", "w") as f:
    f.write(output.model_dump_json(indent=2))

In [None]:
url = "https://dashboard.openfinancebrasil.org.br/api/api-requests"

payload = {
    "axis": "date",
    "dates": dates_2023,
    "phase": "transactional-data",

    "receivers": [organizations[0]['value']],
    "transmitters": [organizations[0]['value']],
    "apis": [apis[1]['value']],
    "endpoints": [endpoints[2]['value']],
    "status": 500
}

headers = {
    "Content-Type": "application/json",
    "Origin": "https://dashboard.openfinancebrasil.org.br",
    "User-Agent": "Mozilla/5.0"
}

response = requests.post(url, json=payload, headers=headers)

print("Status:", response.status_code)
print(type(json.dumps(response.json(), indent=2)))
print(json.dumps(response.json(), indent=2))

haha = response.json()

In [None]:
print(haha)

In [None]:
hihi = [haha[0], haha[1]]

In [None]:
huhu = []

huhu.extend(haha)
huhu.extend(hihi)

In [None]:
print(huhu)