# Imports

In [None]:
import pandas as pd
from datetime import datetime
import re
import configparser

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 50)

now = datetime.now().date()

# Create obj config and load paths
config = configparser.ConfigParser()
config.read('config.ini')
paths = config['paths']

# General Functions

In [None]:
def replace_m_k_values(row):
    """
    Replaces shorthand representations ('M' for million and 'K' for thousand) 
    in the 'Negociacao diaria' column of a DataFrame row with their numeric equivalents.

    Args:
        row (pd.Series): A row of the DataFrame containing the 'Negociacao diaria' column.

    Returns:
        pd.Series: The updated row with the 'Negociacao diaria' value converted to a numeric format.
    """
    if 'M' in row['Negociacao diaria']:
        row['Negociacao diaria'] = (
            row['Negociacao diaria']
            .replace(' M', '0000')
            .replace(',', '')
        )
    elif 'K' in row['Negociacao diaria']:
        row['Negociacao diaria'] = (
            row['Negociacao diaria']
            .replace(' K', '0')
            .replace(',', '')
        )
    else:
        row['Negociacao diaria'] = (
            row['Negociacao diaria']
            .replace('.', '')
            .replace(',', '.')
        )
    
    return row


def convert_to_scientific(value):
    """
    Converts a monetary value expressed in Portuguese notation (e.g., 'Bilhões', 'Milhões', 'Mil') 
    into scientific notation.

    The function extracts the numeric portion of the string, identifies the magnitude 
    based on specific keywords, and converts the value accordingly.

    Args:
        value (str): The string containing a monetary value with keywords like 'Bilhões', 
                     'Milhões', or 'Mil'.

    Returns:
        str: The numeric value converted into scientific notation as a string.
    """
    numeric_value = float(re.sub(r'[^0-9.]', '', value))
    
    if 'Bilhões' in value or 'Bilhão' in value:
        numeric_value *= 1e9
    elif 'Milhões' in value or 'Milhão' in value:
        numeric_value *= 1e6
    elif 'Mil' in value:
        numeric_value *= 1e3

    return f"{numeric_value:e}"


def taxa_gerencia(text):
    """
    Processes a string representing a management fee and converts it to a numeric value.

    The function removes percentage symbols, annual references ('a.a.'), 
    and the word 'de' (common in Portuguese financial expressions). 
    If the input is invalid or empty, it returns 0.

    Args:
        text (str): The string containing the management fee information.

    Returns:
        float: The extracted management fee as a numeric value. 
               Returns 0 if the input is invalid or empty.
    """
    if not text:
        text = 0
    else:
        text = str(text).strip()
        text = text.split('%')[0]
        text = text.split('a.a.')[0]
        text = text.split('de')[0]

        try:
            text = float(text.replace(',', '.'))
        except ValueError:
            text = 0

    return text

def display_asset_info(df, asset_name=None):
    """
    Displays detailed information about a specific asset from the DataFrame.

    Args:
        df (DataFrame): The dataset to filter and colect info to show terminal.
        asset_name (str): The name of the asset to display information for.

    Returns:
        The function filters the DataFrame for the given asset and prints its details,
        including its score values (Score Padrao and Score Final), and detailed breakdowns 
        for both scores.
    """
    columns = ['Score Padrao', 'Score Extra', 'Score Final', 'Ativo', 'Score Padrao Detalhado', 'Score Extra Detalhado']

    if asset_name:
        filtered_df = df[df['Ativo'] == asset_name][columns]
    else:
        filtered_df = df.loc[:19, columns]

    if filtered_df.empty:
        print(f"Asset {asset_name} not found.")
        return

    for idx, row in filtered_df.iterrows():
        print(f"Asset: {row['Ativo']}")
        print(f"Ranking: {idx + 1}")
        print(f"Score Padrao: {row['Score Padrao']}")
        print(f"Score Final: {row['Score Final']}")

        print("Score Padrao Detalhado:")
        for item in row['Score Padrao Detalhado'].split(';'):
            print(f"\t{item.strip()}")

        print("Score Extra Detalhado:")
        for item in row['Score Extra Detalhado'].split(';'):
            print(f"\t{item.strip()}")
        
        print('-' * 40 + '\n')



# Treatments

In [None]:
# Load Dataset
df_details = pd.read_csv(
    paths["path_investidor10_fiis_details"],
    sep=",")

# Select Columns
df_details = df_details[
    [
        "fii_name", "quote", "dividend_yield", 
        "price_book_ratio", "liquidity", "appreciation_12months", 
        "number_unit_holders", "vacancy", "fund_type", "net_asset_value", 
        "management_fee", "issued_units", "market_sector", "net_asset_value_per_unit"
    ]
]

# Rename columns
df_details = df_details.rename(columns={"fii_name": "Ativo"})
df_details = df_details.rename(columns={"quote": "Preco Atual"})
df_details = df_details.rename(columns={"dividend_yield": "DY"})
df_details = df_details.rename(columns={"price_book_ratio": "P/VP"})
df_details = df_details.rename(columns={"liquidity": "Negociacao diaria"})
df_details = df_details.rename(columns={"appreciation_12months": "Variacao 12M"})
df_details = df_details.rename(columns={"number_unit_holders": "N/Cotistas"})
df_details = df_details.rename(columns={"vacancy": "Vacancia"})
df_details = df_details.rename(columns={"fund_type": "Tipo Fundo"})
df_details = df_details.rename(columns={"net_asset_value": "Valor Patrimonial"})
df_details = df_details.rename(columns={"management_fee": "Taxa Gerencia"})
df_details = df_details.rename(columns={"issued_units": "Cotas Emitidas"})
df_details = df_details.rename(columns={"market_sector": "Setor"})
df_details = df_details.rename(columns={"net_asset_value_per_unit": "VP/C"})

# Removed null rows (Fiis not found data in website) 
df_details = df_details.dropna(subset=['Preco Atual', 'DY', 'Negociacao diaria'], axis=0)

# Fix Null values to zero
df_details['Preco Atual'] = df_details['Preco Atual'].str.replace("-", "0")
df_details['DY'] = df_details['DY'].str.replace("-", "0")
df_details['P/VP'] = df_details['P/VP'].str.replace("-", "0")
df_details['Negociacao diaria'] = df_details['Negociacao diaria'].str.replace("-", "0")
df_details['Variacao 12M'] = df_details['Variacao 12M'].str.replace("-", "0")
df_details['N/Cotistas'] = df_details['N/Cotistas'].str.replace("-", "0")
df_details['Vacancia'] = df_details['Vacancia'].str.replace("-", "0")
df_details['Valor Patrimonial'] = df_details['Valor Patrimonial'].str.replace("-", "0")

# Fix special chars
df_details["Ativo"] = df_details["Ativo"].str.upper()
df_details["Preco Atual"] = df_details["Preco Atual"].str.replace("R$ ", "").str.replace(".", "").str.replace(",", ".")
df_details["DY"] = df_details["DY"].str.replace("%", "").str.replace(",", ".")
df_details["P/VP"] = df_details["P/VP"].str.replace(".", "").str.replace(",", ".")
df_details["Negociacao diaria"] = df_details["Negociacao diaria"].str.replace("R$ ", "")
df_details["Variacao 12M"] = df_details["Variacao 12M"].str.replace("%", "").str.replace(".", "").str.replace(",", ".")
df_details["N/Cotistas"] = df_details["N/Cotistas"].astype("str").str.replace(".", "")
df_details['Vacancia'] = df_details['Vacancia'].astype("str").str.replace("%", "").str.replace(".", "").str.replace(",", ".")
df_details['Valor Patrimonial'] = df_details['Valor Patrimonial'].str.replace("R$ ", "")
df_details["Taxa Gerencia"] = df_details["Taxa Gerencia"].str.replace(" ", "")

# General Functions
df_details = df_details.apply(replace_m_k_values, axis=1)
df_details['Valor Patrimonial'] = df_details['Valor Patrimonial'].apply(convert_to_scientific)
df_details['Taxa Gerencia'] = df_details['Taxa Gerencia'].apply(taxa_gerencia)

# Typing Columns
df_details["Preco Atual"] = df_details["Preco Atual"].astype("float")
df_details["P/VP"] = df_details["P/VP"].astype("float")
df_details["DY"] = df_details["DY"].astype("float")
df_details["Negociacao diaria"] = df_details["Negociacao diaria"].astype("float")
df_details["Variacao 12M"] = df_details["Variacao 12M"].astype("float")
df_details["N/Cotistas"] = df_details["N/Cotistas"].astype("int")
df_details["Vacancia"] = df_details["Vacancia"].astype("float")
df_details['Valor Patrimonial'] = df_details['Valor Patrimonial'].astype("float")
df_details['Taxa Gerencia'] = df_details['Taxa Gerencia'].astype("float")

In [None]:
# Load Dataset (Imóveis)
df_properties = pd.read_csv(
    paths["path_investidor10_fiis_properties"],
    sep=",")

# Rename columns
df_properties = df_properties.rename(columns={"fii_name": "Ativo"})

# New df to count Imóveis
df_count = df_properties['Ativo'].value_counts().rename('Quant. Imoveis').reset_index()

# Merge tables
df_fiis = df_details.merge(df_count, on='Ativo', how='left')

# Fix column
df_fiis['Quant. Imoveis'] = df_fiis['Quant. Imoveis'].fillna(0)
df_fiis['Quant. Imoveis'] = df_fiis['Quant. Imoveis'].astype('int')

# Score Padrao

In [None]:
df_fiis['Score Padrao'] = 0
df_fiis['Score Padrao Detalhado'] = ""

condicoes = [
    # 1. P/VP: Scores FIIs whose P/VP is in the ideal range (between 0.90 and 1.10)
    ((df_fiis['P/VP'] <= 1.10) & (df_fiis['P/VP'] >= 0.90), "P/VP adequado (0.90-1.10)"),
    
    # 2. Negociação diária: Scores FIIs with high average daily trading volume (>= 600 thousand BRL)
    (df_fiis['Negociacao diaria'] >= 600000, "Alta negociação diária (>= 600k)"),
    
    # 3. Dividend Yield (DY): Scores FIIs with an attractive DY (between 8% and 13%)
    ((df_fiis['DY'] >= 8) & (df_fiis['DY'] <= 13), "DY atraente (8-13%)"),
    
    # 4. Número de cotistas: Scores FIIs with a large number of shareholders (>= 50 thousand shareholders)
    (df_fiis['N/Cotistas'] >= 50000, "Muitos cotistas (>= 50k)"),
    
    # 5. Vacância: Scores FIIs with low vacancy rates (<= 10%)
    (df_fiis['Vacancia'] <= 10, "Baixa vacância (<= 10%)"),
    
    # 6. Valor Patrimonial: Scores FIIs with high patrimonial value (>= 1 million BRL)
    (df_fiis['Valor Patrimonial'] >= 1e6, "Valor patrimonial alto (>= 1M)"),
    
    # 7. Taxa de gestão: Scores FIIs with a low management fee (<= 1%)
    (df_fiis['Taxa Gerencia'] <= 1, "Taxa de gestão baixa (<= 1%)"),
    
    # 8. Tipo de fundo: Scores FIIs that are of type 'Fundo de Tijolo' or 'Fundo de Papel'
    (df_fiis['Tipo Fundo'].isin(['Fundo de Tijolo', 'Fundo de papel']), "Tipo de fundo adequado (Tijolo, Papel)"),
    
    # 9. Variação nos últimos 12 meses: Scores FIIs with price variation in the range of -10% to 2% over the last 12 months
    ((df_fiis['Variacao 12M'] >= -10) & (df_fiis['Variacao 12M'] <= 2), "variação 12M Oportunidade (-10% a 2%)"),
    
    # 10. Diversificação imobiliária: Scores FIIs that own 5 or more properties
    (df_fiis['Quant. Imoveis'] >= 5, "Diversificação imobiliária (>= 5 imóveis)")
]

for i, (condicao, descricao) in enumerate(condicoes):
    df_fiis['Score Padrao'] += condicao.astype(int)
    df_fiis['Score Padrao Detalhado'] += df_fiis.apply(lambda x: (descricao + ";") if condicao.loc[x.name] else "", axis=1)
df_fiis['Score Padrao Detalhado'] = df_fiis['Score Padrao Detalhado'].str.rstrip(";")


# Score Extra

In [None]:
df_fiis['Score Extra'] = 0
df_fiis['Score Extra Detalhado'] = ""

condicoes = [
    # 1. Vacância: Scores the 10 smallest values (lower vacancy is better)
    (df_fiis['Vacancia'].nsmallest(10).index, 'Menor Vacancia (Top10)'),
    
    # 2. Negociação diária: Scores the 10 largest values (higher liquidity is better)
    (df_fiis['Negociacao diaria'].nlargest(10).index, 'Maior Negociacao Diaria (Top10)'),
    
    # 3. Número de cotistas: Scores the 10 largest values (more shareholders are better)
    (df_fiis['N/Cotistas'].nlargest(10).index, 'Maior N Cotistas (Top10)'),
    
    # 4. Dividend Yield (DY): Scores the 5 values closest to the average within the 8-10% range
    (df_fiis[(df_fiis['DY'] >= 8) & (df_fiis['DY'] <= 10)].iloc[
        (df_fiis[(df_fiis['DY'] >= 8) & (df_fiis['DY'] <= 10)]['DY'] - df_fiis['DY'].mean()).abs().argsort()[:5]
        ].index, 'DY Seguro (8-10%)'),
    
    # 5. P/VP: Scores the 10 largest patrimonial values within the P/VP range close to 1 (0.95 to 1.00)
    (df_fiis[(df_fiis['Valor Patrimonial'] >= 1e6) & (df_fiis['P/VP'] >= 0.95) & (df_fiis['P/VP'] <= 1.00)].nlargest(10, 'Valor Patrimonial').index,
        'P/VP Extra (0.95 a 1.00)'),
    
    # 6. Diversificação de imóveis: Scores if there are 10 or more properties (better diversification)
    (df_fiis[df_fiis['Quant. Imoveis'] >= 10].index, 'Quant Imoveis (>= 10)'),
    
    # 7. Variação de 12 meses: Scores if the variation is between -5% and 5% (safer)
    (df_fiis[(df_fiis['Variacao 12M'] >= -5) & (df_fiis['Variacao 12M'] <= 5)].nlargest(20, 'Variacao 12M').index, 'Variacao Equilibrada (12M entre -5 e 5%)')
]

for indices, descricao in condicoes:
    df_fiis.loc[indices, 'Score Extra'] += 1
    df_fiis.loc[df_fiis.index.isin(indices), 'Score Extra Detalhado'] += descricao + ";"
df_fiis['Score Extra Detalhado'] = df_fiis['Score Extra Detalhado'].str.rstrip(";")

# Score Final

In [None]:
df_fiis['Score Final'] = df_fiis['Score Padrao'] + df_fiis['Score Extra']

# Conclusão

In [None]:
df_fiis = df_fiis.sort_values(['Score Padrao', 'Score Final'], ascending=False).reset_index()
df_fiis[['Score Padrao', 'Score Extra', 'Score Final', 'Ativo', 'Preco Atual', 'VP/C', 'P/VP']].head(20)

In [None]:
display_asset_info(df_fiis, 'MXRF11')

In [None]:
display_asset_info(df_fiis)