In [None]:
# --- 1. SETUP AND IMPORTS ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Set plotting style and suppress scientific notation for clarity
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('deep')
pd.set_option('display.float_format', lambda x: '%.4f' % x)

# --- 2. ESTABLISH PATHS ---
PROJECT_ROOT = Path().resolve().parent
PROCESSED_DATA_PATH = PROJECT_ROOT / 'data' / 'processed'

print(f"Project Root: {PROJECT_ROOT}")
print(f"Attempting to load data from: {PROCESSED_DATA_PATH}")

# --- 3. LOAD ALL DATASETS ---
try:
    prices_df = pd.read_parquet(PROCESSED_DATA_PATH / 'prices_equity_daily.parquet')
    index_df = pd.read_parquet(PROCESSED_DATA_PATH / 'index_ibov_daily.parquet')
    rf_df = pd.read_parquet(PROCESSED_DATA_PATH / 'risk_free_daily.parquet')
    fundamentals_df = pd.read_parquet(PROCESSED_DATA_PATH / 'fundamentals_quarterly.parquet')
    print("\nSUCCESS: All required data files loaded successfully.")
except FileNotFoundError as e:
    print(f"\nERROR: Could not load data file: {e}")
    print("Please ensure you have run 'scripts/update_data.py' successfully before proceeding.")

In [None]:
#
# Step 2: Price Data Audit (Equities & Index)
#
# Perform sanity checks on the core price series and visualize key assets.
#

print("--- Equity Price Data Sanity Checks ---")
print(f"Any negative adjusted close prices? {'Yes' if (prices_df['adj_close'] <= 0).any() else 'No'}")
print(f"Number of rows with any missing data: {prices_df.isnull().any(axis=1).sum()}")

# Check for extreme one-day returns, which may indicate data errors or splits.
prices_df['daily_return'] = prices_df.groupby('ticker')['adj_close'].pct_change()
extreme_returns = prices_df[prices_df['daily_return'].abs() > 0.80]
print(f"\nFound {len(extreme_returns)} instances of daily returns > 80%.")
if not extreme_returns.empty:
    print("Instances of extreme returns to investigate:")
    print(extreme_returns[['ticker', 'date', 'daily_return']])

# --- Visual Inspection ---
fig, ax = plt.subplots(2, 1, figsize=(15, 10), sharex=True)
fig.suptitle('Visual Audit of Price Series', fontsize=16)

# Plot a selection of key tickers
tickers_to_plot = ['PETR4.SA', 'AAPL34.SA'] # A domestic stock and a BDR
for ticker in tickers_to_plot:
    asset_data = prices_df[prices_df['ticker'] == ticker].set_index('date')
    if not asset_data.empty:
        ax[0].plot(asset_data.index, asset_data['adj_close'], label=ticker, alpha=0.9)
ax[0].set_title('Selected Equity Price Series')
ax[0].set_ylabel('Adjusted Close (R$)')
ax[0].legend()
ax[0].grid(True, which='both', linestyle='--', linewidth=0.5)

# Plot the IBOV index
ax[1].plot(index_df.index, index_df['adj_close'], label='IBOV Index (^BVSP)', color='black')
ax[1].set_title('IBOV Index Price Series')
ax[1].set_ylabel('Index Level')
ax[1].legend()
ax[1].grid(True, which='both', linestyle='--', linewidth=0.5)

plt.tight_layout(rect=[0, 0, 1, 0.98])
plt.show()```

---
**Cell 3: Code**
```python
#
# Step 3: Risk-Free & Fundamentals Audit
#
# Audit the SELIC rate for plausibility and inspect the structure of the
# quarterly fundamental data.
#

print("--- Risk-Free (SELIC) Data Audit ---")
print(rf_df.describe())

# --- Visual Inspection of SELIC and Fundamentals ---
fig, axes = plt.subplots(2, 1, figsize=(15, 10))
fig.suptitle('Visual Audit of Non-Price Data', fontsize=16)

# Plot SELIC Rate
axes[0].plot(rf_df.index, rf_df['selic_annualized'], label='SELIC (Annualized)')
axes[0].set_title('Annualized SELIC Rate Over Time')
axes[0].set_ylabel('Annualized Rate')
axes[0].yaxis.set_major_formatter(plt.FuncFormatter('{:.1%}'.format))
axes[0].grid(True, which='both', linestyle='--', linewidth=0.5)

# Plot Book Value Per Share for a sample ticker to confirm quarterly steps
vale_fundamentals = fundamentals_df[fundamentals_df['ticker'] == 'VALE3.SA'].set_index('fiscal_period_end')
if not vale_fundamentals.empty:
    axes[1].plot(vale_fundamentals.index, vale_fundamentals['book_per_share'], marker='o', linestyle='-')
    axes[1].set_title('Book Value Per Share (VPA) for VALE3.SA')
    axes[1].set_ylabel('VPA (R$)')
    axes[1].grid(True, which='both', linestyle='--', linewidth=0.5)

plt.tight_layout(rect=[0, 0, 1, 0.98])
plt.show()

# --- Fundamentals Sanity Checks ---
print("\n--- Fundamentals Sanity Checks ---")
print(f"Any negative book equity? {'Yes' if (fundamentals_df['book_equity'] < 0).any() else 'No'}")
print(f"Any zero/negative shares outstanding? {'Yes' if (fundamentals_df['shares_outstanding'] <= 0).any() else 'No'}")