In [1]:
#Leer raw.prices_daily desde postgres
import pandas as pd
from sqlalchemy import create_engine
import os

engine = create_engine(
    f"postgresql://{os.getenv('PG_USER')}:{os.getenv('PG_PASSWORD')}@{os.getenv('PG_HOST')}:{os.getenv('PG_PORT')}/{os.getenv('PG_DB')}"
)

query = f"SELECT * FROM {os.getenv('PG_SCHEMA_RAW')}.prices_daily;"
df = pd.read_sql(query, engine)
df = df.sort_values(["ticker", "date"])
df.head()


Unnamed: 0,date,ticker,open,high,low,close,adj_close,volume,run_id,ingested_at_utc,source_name
0,2019-01-02,AAPL,38.7225,39.712502,38.557499,39.48,37.538822,148158800,initial_ingest,2025-11-29 18:04:16.128999,yfinance
1,2019-01-03,AAPL,35.994999,36.43,35.5,35.547501,33.799671,365248800,initial_ingest,2025-11-29 18:04:16.128999,yfinance
2,2019-01-04,AAPL,36.1325,37.137501,35.950001,37.064999,35.242554,234428400,initial_ingest,2025-11-29 18:04:16.128999,yfinance
3,2019-01-07,AAPL,37.174999,37.2075,36.474998,36.982498,35.164116,219111200,initial_ingest,2025-11-29 18:04:16.128999,yfinance
4,2019-01-08,AAPL,37.389999,37.955002,37.130001,37.6875,35.83445,164101200,initial_ingest,2025-11-29 18:04:16.128999,yfinance


In [5]:
#2. calcular features del mercado
# Asegurar que date es datetime
df["date"] = pd.to_datetime(df["date"])

# Identificación temporal
df["year"] = df["date"].dt.year
df["month"] = df["date"].dt.month
df["day_of_week"] = df["date"].dt.dayofweek

# Return intradía
df["return_close_open"] = (df["close"] - df["open"]) / df["open"]

# Return respecto al día anterior
df["return_prev_close"] = df.groupby("ticker")["close"].pct_change()

# Volatilidad 5 días de retornos diarios
df["volatility_5d"] = (
    df.groupby("ticker")["return_prev_close"]
      .rolling(5)
      .std()
      .reset_index(level=0, drop=True)
)


In [7]:
#3. seleccionar solo columnas finales
features_df = df[[
    "date", "ticker",
    "year", "month", "day_of_week",
    "open", "high", "low", "close", "volume",
    "return_close_open", "return_prev_close", "volatility_5d",
    "run_id", "ingested_at_utc"
]]


In [8]:
#4. insertar en analytics.daily_features
features_df.to_sql(
    "daily_features",
    engine,
    schema=os.getenv("PG_SCHEMA_ANALYTICS"),
    if_exists="replace",   # durante pruebas
    index=False
)


157