# Portfolio Analysis Script

This script processes financial data related to investments in real estate investment funds (FIIs) and stocks, using Pandas for data manipulation, and Tkinter for a possible GUI integration. 

The steps include:
- Reading and cleaning financial data.
- Merging various data sources (investment data and market data).
- Calculating portfolio statistics.
- Generating basic reports on investment status.

In [None]:
import pandas as pd
import os
import getpass
from datetime import datetime
import tkinter as tk
from tkinter import ttk
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import numpy as np
import configparser

# Set Pandas display options
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 50)

# Get current date
now = datetime.now().date()

## Configuration and Data Loading

This section loads the configuration file (`config.ini`) for paths and reads the data from CSV files containing investment reports and market quotes.


In [None]:
# Load configuration file
config = configparser.ConfigParser()
config.read('config.ini')
paths = config['paths']

# Load investment report
df = pd.read_csv(paths['path_treated_b3_report'], sep=",")

# Filter relevant columns
mask = [
    "Tipo Ativo", "Ativo", "Data Negociacao", "Quantidade Atual", 
    "Preco Medio Atual", "Investimento Atual", "Lucro Real", "Prejuizo"
]
df = df[mask]

# Convert "Data Negociacao" to datetime format
df['Data Negociacao'] = pd.to_datetime(df['Data Negociacao'])

# Rename columns for consistency
df = df.rename(columns={
    "Investimento Atual": "Investimento",
    "Quantidade Atual": "Quantidade Total",
    "Preco Medio Atual": "Preco Medio",
    "Lucro Real": "Lucro"
})

# Group by "Ativo" and sum "Lucro" and "Prejuizo"
group_lucro_prej = df.groupby("Ativo")[['Lucro', 'Prejuizo']].sum()

# Keep the latest entry for each "Ativo"
df = df.drop_duplicates(subset='Ativo', keep='last')

# Filter columns
df = df.loc[:, ["Tipo Ativo", "Ativo", "Quantidade Total", "Preco Medio", "Investimento"]]

# Merge with total "Lucro" and "Prejuizo" data
df = df.merge(group_lucro_prej, on="Ativo", how="left")

# Round financial values to two decimal places
df['Investimento'] = round(df['Investimento'], 2)
df['Preco Medio'] = round(df['Preco Medio'], 2)


## Market Data Cleaning

Here we load, clean, and preprocess the market data related to FIIs, such as price, dividend yield, and other financial indicators. The data is then merged with the investment data.


In [None]:
# Load FII market data
df_quote = pd.read_csv(paths['path_investidor10_fiis_details'], sep=",")

# Select relevant columns
df_quote = df_quote[[
    "fii_name", "quote", "dividend_yield", "price_book_ratio", 
    "liquidity", "appreciation_12months", "number_unit_holders", 
    "vacancy", "fund_type"
]]

# Rename columns for consistency
df_quote = df_quote.rename(columns={
    "fii_name": "Ativo",
    "quote": "Preco Atual",
    "dividend_yield": "Dividend Yield",
    "price_book_ratio": "P/VP",
    "liquidity": "Negociacao diaria",
    "appreciation_12months": "Variacao 12M",
    "number_unit_holders": "N/Cotistas",
    "vacancy": "Vacancia",
    "fund_type": "Tipo Fundo"
})

# Remove rows with null values in relevant columns
df_quote = df_quote.dropna(subset=['Preco Atual', 'Dividend Yield', 'Negociacao diaria'], axis=0)

# Replace dashes with zeros for missing values
cols = ['Preco Atual', 'Dividend Yield', 'P/VP', 'Negociacao diaria', 'Variacao 12M', 'N/Cotistas', 'Vacancia']
df_quote[cols] = df_quote[cols].apply(lambda x: x.str.replace("-", "0"))

# Clean and format columns
df_quote["Preco Atual"] = df_quote["Preco Atual"].str.replace("R$ ", "").str.replace(".", "").str.replace(",", ".")
df_quote["Dividend Yield"] = df_quote["Dividend Yield"].str.replace("%", "").str.replace(",", ".")
df_quote["P/VP"] = df_quote["P/VP"].str.replace(".", "").str.replace(",", ".")
df_quote["Negociacao diaria"] = df_quote["Negociacao diaria"].str.replace("R$ ", "")
df_quote["Variacao 12M"] = df_quote["Variacao 12M"].str.replace("%", "").str.replace(".", "").str.replace(",", ".")
df_quote["N/Cotistas"] = df_quote["N/Cotistas"].astype("str").str.replace(".", "")
df_quote['Vacancia'] = df_quote['Vacancia'].astype("str").str.replace("%", "").str.replace(".", "").str.replace(",", ".")

# Function to handle values in thousands (K) or millions (M)
def replace_m_k_values(row):
    if 'M' in row['Negociacao diaria']:
        row["Negociacao diaria"] = row["Negociacao diaria"].replace(" M", "0000").replace(",", "")
    elif 'K' in row['Negociacao diaria']:
        row["Negociacao diaria"] = row["Negociacao diaria"].replace(" K", "0").replace(",", "")
    else:
        row["Negociacao diaria"] = row["Negociacao diaria"].replace(".", "").replace(",", ".")
    return row

# Apply the transformation function to the data
df_quote = df_quote.apply(replace_m_k_values, axis=1)

# Convert columns to appropriate data types
df_quote["Preco Atual"] = df_quote["Preco Atual"].astype("float")
df_quote["P/VP"] = df_quote["P/VP"].astype("float")
df_quote["Dividend Yield"] = df_quote["Dividend Yield"].astype("float")
df_quote["Negociacao diaria"] = df_quote["Negociacao diaria"].astype("float")
df_quote["Variacao 12M"] = df_quote["Variacao 12M"].astype("float")
df_quote["N/Cotistas"] = df_quote["N/Cotistas"].astype("int")
df_quote["Vacancia"] = df_quote["Vacancia"].astype("float")

## Merging Investment and Market Data

Here we merge the processed investment data and market data for a comprehensive view of the portfolio.


In [None]:

# Merge investment and market data
df_investiments = df.merge(df_quote, on="Ativo", how="left")

# Calculate balance for each investment
df_investiments['Saldo'] = round((df_investiments['Quantidade Total'] * df_investiments['Preco Atual']) - df_investiments['Investimento'], 2)

# Calculate the total value by asset class
df_investiments["V/TA"] = df_investiments.groupby('Tipo Ativo')['Investimento'].transform('sum')

# Calculate percentage of the portfolio for each asset
df_investiments["% Carteira"] = round((df_investiments['Investimento'] * 100) / df_investiments['V/TA'], 2)

# Filter and display relevant columns
df_investiments = df_investiments.loc[:, [
    'Tipo Ativo', 'Ativo', 'Investimento', 'Quantidade Total', 'Preco Medio', 
    'Preco Atual', 'Variacao 12M', 'Saldo', 'Lucro', 'Prejuizo', '% Carteira', 
    'Dividend Yield', 'P/VP', 'Negociacao diaria', 'N/Cotistas', 'Vacancia', 'Tipo Fundo'
]]


## Portfolio Summary

This section calculates the total investments and displays a summary of the portfolio, including details of each asset and the allocation percentage.


In [None]:
# Print portfolio summary
print("Resumo de carteira\n")

# Calculate total investments
total_investimentos = df_investiments["Investimento"].sum()
print(f"Total de Investimentos: R$ {total_investimentos:.2f}")

In [None]:
# Continue sorting and filtering the portfolio by asset type and percentage
df_carteira = df_investiments.loc[df_investiments["Investimento"] > 0, [
    'Tipo Ativo', 'Ativo', 'Investimento', 'Quantidade Total', 'Preco Medio', 
    'Preco Atual', 'Variacao 12M', 'Saldo', 'Lucro', 'Prejuizo', '% Carteira'
]]

# Sort portfolio by asset type and percentage of the portfolio
df_carteira = df_carteira.sort_values(['Tipo Ativo', '% Carteira'], ascending=False)

# Show top 50 assets in the portfolio
df_carteira.head(50)

# Filter assets that are not in the portfolio (Investimento == 0)
df_fora_carteira = df_investiments.loc[df_investiments["Investimento"] == 0, [
    'Tipo Ativo', 'Ativo', 'Investimento', 'Quantidade Total', 'Preco Medio', 
    'Preco Atual', 'Variacao 12M', 'Saldo', 'Lucro', 'Prejuizo', '% Carteira'
]]

# Display assets outside the portfolio
df_fora_carteira.head(50)

# Group by asset type to summarize total investment and balance
df_general = df_carteira.groupby(['Tipo Ativo'])[['Investimento', 'Saldo']].sum()

# Calculate percentage of total portfolio per asset type
df_general["% Carteira"] = round((df_general['Investimento'] * 100) / total_investimentos, 2)

# Display summary for each asset type
df_general.head()


## Tax Consideration for FIIs

Here, we will check the number of unit holders (`N/Cotistas`) for each FII. Based on the number of cotistas, we determine whether the FII pays taxes or not. If the FII has fewer than 100 cotistas, it is considered "NOT SAFE" and subject to taxes.


In [None]:
# Check for tax consideration based on number of unit holders (N/Cotistas) for FIIs
for index, row in df_investiments.iterrows():
    if row['Tipo Ativo'] == 'Fii':
        if row['N/Cotistas'] <= 100:
            print(f"NOT SAFE - FII {row['Ativo']} paga Imposto, tem {row['N/Cotistas']} cotistas")
        else:
            print(f"SAFE - FII {row['Ativo']} não paga imposto, tem {row['N/Cotistas']} cotistas")


## Visualizing Portfolio Data

In this section, we will generate visualizations (such as pie charts or bar plots) to display the portfolio distribution, both by asset type and by the percentage of the total portfolio.


In [None]:
# Remove null values from the dataset
df_investiments = df_investiments.loc[df_investiments["Investimento"] > 0]

# Plot portfolio distribution by asset type
plt.figure(figsize=(10, 6))
df_general['% Carteira'].plot(kind='pie', autopct='%1.1f%%', startangle=90, legend=False)
plt.title("Distribuição da Carteira por Tipo de Ativo")
plt.ylabel('')
plt.show()

# Plot individual investments and their performance (Investimento vs Saldo)
plt.figure(figsize=(10, 6))

ax = df_investiments.plot(kind='bar', x='Ativo', y=['Investimento', 'Saldo'], stacked=True, figsize=(12, 8))
plt.title("Investimentos e Saldo por Ativo")
plt.ylabel('Valor (R$)')
plt.xticks(rotation=90)

# Fix the axis y for intervals 100
ax.set_yticks(range(-500, int(df_investiments[['Investimento', 'Saldo']].max().max()) + 100, 100))

plt.show()

# Optional: Create more plots


In [None]:
cols = ['Tipo Ativo','Ativo','Investimento','Quantidade Total','Preco Medio','Preco Atual','Variacao 12M','Saldo','Lucro', 'Prejuizo', '% Carteira']
df_investiments.sort_values(['Tipo Ativo', '% Carteira'], ascending=False, inplace=True)

my_wallet = df_investiments[cols]
my_wallet.to_csv(paths['path_portfolio_analysis'], index=False)

In [None]:
my_wallet.head(30)

## Conclusion

This script provides an analysis of your investment portfolio, including:
- Total investment amount.
- Distribution of the portfolio by asset class.
- Detailed information on individual assets.
- Tax considerations for FIIs based on the number of unit holders.
- Visualization of portfolio performance.