In [21]:
# Cell 1: imports and setup
import os
from typing import Optional
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt

# display and plotting setup
pd.set_option("display.max_columns", 50)
pd.set_option("display.width", 160)
plt.rcParams["figure.figsize"] = (10, 5)

# default paths (adjust if needed)
COMPANIES_PATH = r"../data/raw/acoes-listadas-b3.csv"
OUTPUT_DIR = r"../data/processed"

In [22]:
# Cell 2: load B3-listed companies (CSV validation included)
def load_listed_companies(path: str = COMPANIES_PATH) -> Optional[pd.DataFrame]:
    try:
        df = pd.read_csv(path)
    except FileNotFoundError:
        print(f"❌ File not found: {path}")
        return None
    except Exception as e:
        print(f"❌ Error while reading CSV: {e}")
        return None

    # expected column: 'Ticker'
    if "Ticker" not in df.columns:
        possible = [c for c in df.columns if "tick" in c.lower() or "code" in c.lower()]
        if possible:
            print("⚠️ Missing 'Ticker' column — maybe you meant:", possible)
        else:
            print("⚠️ 'Ticker' column not found in CSV.")
        return None

    print(f"✅ Loaded {len(df)} tickers.")
    return df

In [23]:
# Cell 3: load and preview the first few rows
df_listed = load_listed_companies()
if df_listed is not None:
    display(df_listed.head(10))
    print("\nColumns:", df_listed.columns.tolist())

✅ Loaded 406 tickers.


Unnamed: 0,Ticker,Nome,Negócios,Última (R$),Variação
0,PDGR3,PDG Realty,34.615.600,7,"-46,15%"
1,MGLU3,Magazine Luiza,32.721.500,915,"-0,11%"
2,AMBP3,Ambipar,27.231.800,140,"-49,09%"
3,ASAI3,Assaí,24.871.500,906,"-2,16%"
4,ITSA4,Itaúsa,22.795.700,1122,"+0,54%"
5,BBDC4,Banco Bradesco,22.193.300,1710,"+0,06%"
6,ABEV3,Ambev,21.785.000,1187,"-0,50%"
7,COGN3,Cogna,21.087.500,321,"+1,58%"
8,PETR4,Petrobras,19.756.800,3100,"-0,26%"
9,VALE3,Vale,19.240.200,5859,"-0,19%"



Columns: ['Ticker', 'Nome', 'Negócios', 'Última (R$)', 'Variação']


In [24]:
# Cell 4: select a ticker (user input or auto-pick)
def choose_ticker(df: pd.DataFrame, interactive: bool = True) -> Optional[str]:
    tickers = df["Ticker"].astype(str).str.upper().unique()

    # auto mode: pick the first one
    if not interactive:
        print(f"Non-interactive mode: using {tickers[0]}")
        return tickers[0] + ".SA"

    # interactive mode
    print("Example tickers (first 20):")
    print(", ".join(tickers[:20]))
    ticker = input("Enter ticker (no .SA) or index (e.g. 0): ").strip().upper()

    if ticker.isdigit():
        idx = int(ticker)
        if 0 <= idx < len(tickers):
            return tickers[idx] + ".SA"
        else:
            print("Index out of range.")
            return None

    if ticker in tickers:
        return ticker + ".SA"

    print("Ticker not found.")
    return None

# usage:
ticker = choose_ticker(df_listed, interactive=True)


Example tickers (first 20):
PDGR3, MGLU3, AMBP3, ASAI3, ITSA4, BBDC4, ABEV3, COGN3, PETR4, VALE3, ITUB4, RAIZ4, BBAS3, CSAN3, MOTV3, B3SA3, MBRF3, RENT3, GGBR4, PCAR3


In [25]:
# Cell 5: baixar histórico completo para 1 ticker
def download_full_history(ticker: str, timeout: int = 20) -> Optional[pd.DataFrame]:
    try:
        print(f"⬇️ Baixando {ticker} ...")
        data = yf.download(
            tickers=ticker,
            period="max",
            interval="1d",
            auto_adjust=True,
            progress=True,
            threads=True,
            group_by="column",
            timeout=timeout
        )
        if data.empty:
            print("⚠️ Nenhum dado retornado.")
            return None
        # se houver multiindex de colunas (quando várias ações), normaliza:
        if isinstance(data.columns, pd.MultiIndex):
            data.columns = data.columns.droplevel(0)
        data.index = pd.to_datetime(data.index)
        print(f"✅ {len(data)} registros de {data.index.min().date()} a {data.index.max().date()}")
        return data
    except Exception as e:
        print(f"❌ Erro no download: {type(e).__name__} - {e}")
        return None

# exemplo de uso:
df_hist = download_full_history("PETR4.SA")


⬇️ Baixando PETR4.SA ...


[*********************100%***********************]  1 of 1 completed

✅ 6473 registros de 2000-01-03 a 2025-10-10





In [28]:
df_hist.head()

Ticker,PETR4.SA,PETR4.SA,PETR4.SA,PETR4.SA,PETR4.SA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2000-01-03,1.156394,1.156394,1.156394,1.156394,35389440000
2000-01-04,1.092424,1.092424,1.092424,1.092424,28861440000
2000-01-05,1.081401,1.081401,1.081401,1.081401,43033600000
2000-01-06,1.077661,1.077661,1.077661,1.077661,34055680000
2000-01-07,1.082582,1.082582,1.082582,1.082582,20912640000


In [None]:
# Cell 6: preview data and plot Close price
def show_history_preview(data: pd.DataFrame, n: int = 5, title: Optional[str] = None):
    display(data.head(n))
    if "Close" in data.columns:
        data["Close"].plot(title=(title or "Close Price"))
        plt.xlabel("Date")
        plt.ylabel("Close (adjusted)")
        plt.show()
    else:
        print("No 'Close' column found — nothing to plot.")

# usage:
show_history_preview(df_hist, title="PETR4.SA")


Ticker,PETR4.SA,PETR4.SA,PETR4.SA,PETR4.SA,PETR4.SA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2000-01-03,1.156394,1.156394,1.156394,1.156394,35389440000
2000-01-04,1.092424,1.092424,1.092424,1.092424,28861440000
2000-01-05,1.081401,1.081401,1.081401,1.081401,43033600000
2000-01-06,1.077661,1.077661,1.077661,1.077661,34055680000
2000-01-07,1.082582,1.082582,1.082582,1.082582,20912640000
2000-01-10,1.104824,1.104824,1.104824,1.104824,19563520000
2000-01-11,1.077661,1.077661,1.077661,1.077661,23987200000
2000-01-12,1.079236,1.079236,1.079236,1.079236,23301120000


No 'Close' column found — nothing to plot.


In [27]:
# Cell 7: export to CSV with fixed name "df.csv"
def export_data(data: pd.DataFrame, output_dir: str = OUTPUT_DIR):
    """
    Export the DataFrame to a CSV file with a fixed name 'df.csv'.
    Overwrites existing files without asking (because, why not?).

    Parameters
    ----------
    data : pd.DataFrame
        The data to export.
    output_dir : str
        Directory where the file will be saved.
    """
    try:
        os.makedirs(output_dir, exist_ok=True)
        filepath = os.path.join(output_dir, "df.csv")

        # flatten MultiIndex columns (if any)
        if isinstance(data.columns, pd.MultiIndex):
            data.columns = data.columns.droplevel(0)

        data.to_csv(filepath, index=True)
        print(f"💾 Data successfully saved at: {filepath}")
        return filepath

    except Exception as e:
        print(f"❌ Failed to export data: {e}")
        return None


# usage example:
export_data(df_hist)



💾 Data successfully saved at: ../data/processed\df.csv


'../data/processed\\df.csv'