In [5]:
import re
from dataclasses import dataclass, field
from pathlib import Path
from typing import Set

import pandas as pd


@dataclass
class B3Service:
    """
    B3Service: A service for obtaining B3 tickers from portfolio composition files.

    Args:
    - portfolio_composition_path (Path): The path to the directory containing portfolio composition files.

    Attributes:
    - portfolio_composition_path (Path): The path to the directory containing portfolio composition files.
    """

    portfolio_composition_path: Path
    tickers: Set[str] = field(default_factory=set)

    def _is_valid_ticker(self, ticker: str) -> bool:
        """
        Checks if a ticker is valid.

        Args:
        - ticker (str): Ticker to be validated.

        Returns:
        - bool: True if the ticker is valid, False otherwise.
        """
        return bool(re.match(r"^[A-Z]{4}\d{1,2}$", ticker))

    def _read_tickers_from_file(self, file: Path) -> pd.Series:
        """
        Reads tickers from a given file.

        Args:
        - file (Path): The file from which tickers need to be read.

        Returns:
        - pd.Series: Series containing tickers.
        """
        return pd.read_csv(
            str(file.absolute()), encoding="iso-8859-1", delimiter=";", index_col=False, usecols=[0]
        ).iloc[:, 0]

    def _update_tickers(self, new_tickers: Set[str]) -> None:
        """
        Updates the set of tickers.

        Args:
        - new_tickers (Set[str]): New tickers to be added.
        """
        self.tickers.update(new_tickers)

    def get_b3_tickers(self) -> Set[str]:
        """
        Gets B3 tickers from portfolio composition files.

        Returns:
        - Set[str]: Set of B3 tickers.
        """
        for file in self.portfolio_composition_path.iterdir():
            if not file.is_file() or file.suffix != ".csv":
                continue
            
            raw_tickers = self._read_tickers_from_file(file)
            
            valid_tickers = {ticker for ticker in raw_tickers if self._is_valid_ticker(ticker)}
            self._update_tickers(valid_tickers)

        return self.tickers
    
    def get_b3_tickers_in_yahoo_format(self) -> Set[str]:
        return {f"{ticker}.SA" for ticker in self.get_b3_tickers()}


In [6]:
portfolio_composition_path = Path("data/B3/portfolio_composition")
b3_service = B3Service(portfolio_composition_path)
b3_tickers = b3_service.get_b3_tickers()

b3_tickers

{'AALR3',
 'ABCB4',
 'ABEV3',
 'AERI3',
 'AESB3',
 'AGRO3',
 'ALOS3',
 'ALPA4',
 'ALUP11',
 'AMBP3',
 'ANIM3',
 'ARML3',
 'ARZZ3',
 'ASAI3',
 'AURE3',
 'AZUL4',
 'BBAS3',
 'BBDC3',
 'BBDC4',
 'BBSE3',
 'BEEF3',
 'BHIA3',
 'BLAU3',
 'BMOB3',
 'BPAC11',
 'BPAN4',
 'BRAP4',
 'BRFS3',
 'BRKM5',
 'BRSR6',
 'CAML3',
 'CASH3',
 'CBAV3',
 'CCRO3',
 'CEAB3',
 'CIEL3',
 'CLSA3',
 'CMIG3',
 'CMIG4',
 'CMIN3',
 'COGN3',
 'CPFE3',
 'CPLE3',
 'CPLE6',
 'CRFB3',
 'CSAN3',
 'CSMG3',
 'CSNA3',
 'CURY3',
 'CVCB3',
 'CXSE3',
 'CYRE3',
 'DASA3',
 'DIRR3',
 'DXCO3',
 'ECOR3',
 'EGIE3',
 'ELET3',
 'ELET6',
 'EMBR3',
 'ENAT3',
 'ENEV3',
 'ENGI11',
 'EQTL3',
 'ESPA3',
 'EVEN3',
 'EZTC3',
 'FESA4',
 'FLRY3',
 'FRAS3',
 'GFSA3',
 'GGBR4',
 'GGPS3',
 'GOAU4',
 'GOLL4',
 'GRND3',
 'GUAR3',
 'HAPV3',
 'HBSA3',
 'HYPE3',
 'IFCM3',
 'IGTI11',
 'INTB3',
 'IRBR3',
 'ITSA4',
 'ITUB4',
 'JALL3',
 'JBSS3',
 'JHSF3',
 'KEPL3',
 'KLBN11',
 'LAVV3',
 'LEVE3',
 'LJQQ3',
 'LOGG3',
 'LOGN3',
 'LREN3',
 'LUPA3',
 'LWSA3',
 'MAT

In [7]:
len(b3_tickers)

174