In [None]:

import pandas as pd
from pathlib import Path

# Get the project root directory (go up 3 folders from this file)
BASE_DIR = Path(__file__).resolve().parents[3]

DATA_RAW = BASE_DIR / "data" / "raw"
DATA_PROCESSED = BASE_DIR / "data" / "processed"

print(f"Looking for data in: {DATA_RAW}")
print(f"Data raw exists: {DATA_RAW.exists()}")


def load_ea():
    """EA - skip metadata rows"""
    path = DATA_RAW / "EA_2010_2025.csv"
    df = pd.read_csv(path, skiprows=2)  # ✅ Skip "Price, Adj Close" and "Ticker, EA"
    df = df[df['Date'].notna() & (df['Date'] != '')]  # ✅ Remove empty rows
    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
    df = df.dropna(subset=['Date'])  # ✅ Drop rows with invalid dates
    df = df.rename(columns={"Date": "date", "Adj Close": "adj_close"})
    df["adj_close"] = pd.to_numeric(df["adj_close"], errors="coerce")
    df["ticker"] = "EA"
    return df[["date", "ticker", "adj_close"]].dropna()
