In [12]:
# ============================================
# SML (Security Market Line) interactiva (Plotly)
# Snowflake → IBEX + activos → β mensual → SML anual (geom.)
# ============================================

import pandas as pd
import numpy as np
import snowflake.connector
import statsmodels.api as sm
import plotly.graph_objects as go

# -----------------------------
# Configuración
# -----------------------------
start_date = '2022-01-01'
end_date   = '2024-12-31'      # ¡sin coma!
ticker_ibex = '^IBEX'
rf_anual = 0.03                # 3% anual
freq_label = 'mensual'         # etiqueta gráfica

# Lista editable de activos
tickers = [
    "IBE.MC", "ITX.MC", "TEF.MC", "BBVA.MC", "SAN.MC", "REP.MC",
    "AENA.MC", "IAG.MC", "ENG.MC", "ACS.MC", "FER.MC", "CABK.MC",
    "ELE.MC", "MAP.MC"
]

# -----------------------------
# Conexión Snowflake
# -----------------------------
conn = snowflake.connector.connect(
    user='TFMGRUPO4',
    password='TFMgrupo4ucm01_01#',
    account='WYNIFVB-YE01854',
    warehouse='COMPUTE_WH',
    database='YAHOO_FINANCE',
    schema='MACHINE_LEARNING',
    role='ACCOUNTADMIN'
)
cursor = conn.cursor()

# -----------------------------
# IBEX (mercado)
# -----------------------------
q_ibex = f"""
    SELECT DATE, STOCKCLOSE
    FROM INDEX_TOTALES
    WHERE TICKER = '{ticker_ibex}'
      AND DATE BETWEEN '{start_date}' AND '{end_date}'
    ORDER BY DATE
"""
cursor.execute(q_ibex)
df_ibex = pd.DataFrame(cursor.fetchall(), columns=['DATE', 'IBEX'])
df_ibex['DATE'] = pd.to_datetime(df_ibex['DATE'])
df_ibex['IBEX'] = pd.to_numeric(df_ibex['IBEX'], errors='coerce').astype(float)
df_ibex.set_index('DATE', inplace=True)
df_ibex = df_ibex.sort_index()

# -----------------------------
# Activos (precios)
# -----------------------------
dfs = {'IBEX': df_ibex['IBEX']}
for tk in tickers:
    q = f"""
        SELECT FECHA, CLOSE
        FROM TICKERS_INDEX
        WHERE TICKER = '{tk}'
          AND FECHA BETWEEN '{start_date}' AND '{end_date}'
        ORDER BY FECHA
    """
    cursor.execute(q)
    df = pd.DataFrame(cursor.fetchall(), columns=['FECHA', tk])
    df['FECHA'] = pd.to_datetime(df['FECHA'])
    df[tk] = pd.to_numeric(df[tk], errors='coerce').astype(float)
    df.set_index('FECHA', inplace=True)
    df = df.sort_index()
    dfs[tk] = df[tk]

cursor.close()
conn.close()

# -----------------------------
# Panel OUTER y mensual robusto
# -----------------------------
panel = pd.concat(dfs, axis=1, join='outer').sort_index()
panel_m = panel.resample('ME').mean()  # media mensual → evita perder meses
rets = panel_m.pct_change(fill_method=None)  # sin 'pad' (evita warning)

# -----------------------------
# Rf mensual (para excesos)
# -----------------------------
k = 12
rf_p = (1 + rf_anual)**(1/k) - 1

# -----------------------------
# Utilidades
# -----------------------------
def geometric_annualized(r, periods_per_year=12):
    r = r.dropna()
    n = r.shape[0]
    if n == 0:
        return np.nan
    gross = (1 + r).prod()
    return gross**(periods_per_year / n) - 1

def estimate_beta_alpha_r2(ri, rm, rf_period):
    pair = pd.concat([rm, ri], axis=1, join='inner').dropna()
    pair.columns = ['Rm', 'Ri']
    nobs = pair.shape[0]
    if nobs < 3:
        return None, None, None, nobs
    X = pair['Rm'] - rf_period
    y = pair['Ri'] - rf_period
    Xc = sm.add_constant(X)
    res = sm.OLS(y, Xc).fit()
    beta  = res.params.get('Rm', np.nan)
    alpha = res.params.get('const', np.nan)
    r2    = res.rsquared
    return float(beta), float(alpha), float(r2), int(nobs)

# -----------------------------
# E[Rm] anual geométrico y prima
# -----------------------------
E_Rm_ann = geometric_annualized(rets['IBEX'], periods_per_year=12)
market_premium = E_Rm_ann - rf_anual

# -----------------------------
# β, α, R² y E[Ri] (anual geom.) por activo
# -----------------------------
rows = []
for tk in tickers:
    beta, alpha, r2, nobs = estimate_beta_alpha_r2(rets[tk], rets['IBEX'], rf_p)
    if beta is None:
        print(f"[AVISO] {tk}: datos insuficientes tras alinear (n={nobs}). Se omite.")
        continue
    pair = pd.concat([rets['IBEX'], rets[tk]], axis=1, join='inner').dropna()
    E_Ri_ann = geometric_annualized(pair[tk], periods_per_year=12)
    E_Ri_capm = rf_anual + beta * (E_Rm_ann - rf_anual)     # predicción CAPM
    mispricing = E_Ri_ann - E_Ri_capm                       # real - CAPM

    rows.append({
        'Ticker': tk,
        'Beta': beta,
        'Alpha_excesos': alpha,
        'R2': r2,
        'N_obs': nobs,
        'E_Ri_ann_geom': E_Ri_ann,
        'E_Ri_CAPM': E_Ri_capm,
        'Mispricing': mispricing
    })

df_points = pd.DataFrame(rows)
if df_points.empty:
    raise RuntimeError("No hay puntos para graficar. Revisa cobertura mensual de los activos.")
df_points = df_points.sort_values('Beta').reset_index(drop=True)

# -----------------------------
# SML y rangos de ejes
# -----------------------------
beta_min = min(0.0, df_points['Beta'].min() - 0.1)
beta_max = max(2.0, df_points['Beta'].max() + 0.2)
betas_line = np.linspace(beta_min, beta_max, 200)
E_R_line   = rf_anual + market_premium * betas_line

y_vals = np.concatenate([
    df_points['E_Ri_ann_geom'].values,
    [rf_anual, E_Rm_ann, E_R_line.min(), E_R_line.max()]
])
y_min = float(np.nanmin(y_vals)) - 0.03
y_max = float(np.nanmax(y_vals)) + 0.03

# -----------------------------
# GRÁFICO Plotly
# -----------------------------
# -----------------------------
# GRÁFICO Plotly — SML MENSUAL (sin leyenda; Ȳ a la derecha)
# -----------------------------
fig = go.Figure()

# SML
fig.add_trace(go.Scatter(
    x=betas_line, y=E_R_line, mode='lines',
    line=dict(width=3),
    showlegend=False,  # ocultar en leyenda
    name='SML'
))

# Y medio del mercado (E[Rm]) — etiqueta a la DERECHA
fig.add_hline(
    y=E_Rm_ann, line_dash="dash", line_width=2,
    annotation_text=f"Ȳ = E[Rm] {E_Rm_ann:.2%}",
    annotation_position="top right", annotation_yshift=6
)

# Rf (β=0)
fig.add_trace(go.Scatter(
    x=[0], y=[rf_anual], mode='markers',
    marker=dict(symbol='x', size=12, line=dict(width=1.5)),
    showlegend=False,
    name='Rf',
    hovertemplate="<b>Activo sin riesgo</b><br>β=0.00<br>Rendimiento= %{y:.2%}<extra></extra>"
))

# Mercado (β=1)
fig.add_trace(go.Scatter(
    x=[1], y=[E_Rm_ann], mode='markers',
    marker=dict(size=12, line=dict(width=1.5)),
    showlegend=False,
    name='Mercado',
    hovertemplate="<b>Mercado</b><br>β=1.00<br>E[Rm]= %{y:.2%}<extra></extra>"
))

# Activos (hover con CAPM y R² por activo)
fig.add_trace(go.Scatter(
    x=df_points['Beta'],
    y=df_points['E_Ri_ann_geom'],
    mode='markers',
    marker=dict(size=9, line=dict(width=1, color='rgba(0,0,0,0.4)')),
    showlegend=False,
    name='Activos',
    hovertemplate=(
        "<b>%{customdata[0]}</b><br>"
        "n = %{customdata[7]} meses<br>"
        "β = %{x:.3f} | E[Ri] (geom, anual) = %{y:.2%}<br>"
        "E[Rm] = %{customdata[1]:.2%} | Rf = %{customdata[2]:.2%}<br>"
        "<b>CAPM</b>: E[Ri]_CAPM = Rf + β·(E[Rm]−Rf) = "
        "%{customdata[2]:.2%} + %{x:.3f}·(%{customdata[1]:.2%} − %{customdata[2]:.2%}) "
        "= %{customdata[3]:.2%}<br>"
        "α (excesos) = %{customdata[4]:.2%} | R² = %{customdata[5]:.3f}<br>"
        "Mispricing = E[Ri] − E[Ri]_CAPM = %{customdata[6]:.2%}"
        "<extra></extra>"
    ),
    customdata=np.stack([
        df_points['Ticker'],                 # [0]
        np.full(len(df_points), E_Rm_ann),   # [1] E[Rm]
        np.full(len(df_points), rf_anual),   # [2] Rf
        df_points['E_Ri_CAPM'],              # [3] E[Ri]_CAPM
        df_points['Alpha_excesos'],          # [4] α
        df_points['R2'],                     # [5] R²
        df_points['Mispricing'],             # [6]
        df_points['N_obs']                   # [7]
    ], axis=-1)
))

# Etiquetas (texto sobre cada punto)
fig.add_trace(go.Scatter(
    x=df_points['Beta'],
    y=df_points['E_Ri_ann_geom'],
    mode='text',
    text=df_points['Ticker'],
    textposition='top center',
    textfont=dict(size=10),
    showlegend=False,
    name='Etiquetas'
))

# Caja/nota COMPACTA a la DERECHA (ya la tenías así)
nota = (
    "CAPM:  E[Ri] = Rf + β·(E[Rm]−Rf)<br>"
    f"Rf = {rf_anual:.2%} · E[Rm] (geom) = {E_Rm_ann:.2%}<br>"
    f"Prima de mercado = {market_premium:.2%}"
)
fig.add_annotation(
    xref="paper", yref="paper",
    x=0.99, y=0.98, xanchor="right", yanchor="top",
    text=nota, showarrow=False, align="right",
    bordercolor="rgba(0,0,0,0.15)", borderwidth=1,
    bgcolor="rgba(0,0,0,0.03)", font=dict(size=11)
)

# Layout sin leyenda
fig.update_layout(
    template='simple_white',
    title=("SML (CAPM):  E[Ri] = Rf + β(E[Rm]−Rf)  ·  "
           f"Periodo: {start_date} → {end_date}  ·  Frecuencia β: {freq_label}"),
    xaxis_title="Beta (β)",
    yaxis_title="Rendimiento esperado ANUAL",
    margin=dict(l=40, r=80, t=90, b=50),  # más aire a la derecha para la caja
    hovermode='closest',
    showlegend=False
)

# Ejes
fig.update_xaxes(
    range=[beta_min, beta_max],
    zeroline=True, zerolinewidth=1, zerolinecolor="#B0B0B0",
    showline=True, linecolor="#888", dtick=0.25
)
fig.update_yaxes(
    range=[y_min, y_max],
    zeroline=True, zerolinewidth=1, zerolinecolor="#B0B0B0",
    showline=True, linecolor="#888", tickformat=".0%"
)

fig.show()


[AVISO] SAN.MC: datos insuficientes tras alinear (n=0). Se omite.
[AVISO] IAG.MC: datos insuficientes tras alinear (n=0). Se omite.
[AVISO] ENG.MC: datos insuficientes tras alinear (n=0). Se omite.


In [8]:
# ============================================
# SML (Security Market Line) — VERSIÓN ANUAL (2020–2024)
# Snowflake → IBEX + Activos → β ANUAL (excesos, OLS) → SML anual (geom.)
# ============================================

import pandas as pd
import numpy as np
import snowflake.connector
import statsmodels.api as sm
import plotly.graph_objects as go

# -----------------------------
# Configuración (ANUAL)
# -----------------------------
start_date = '2020-01-01'
end_date   = '2024-12-31'
ticker_ibex = '^IBEX'
rf_anual = 0.03                # 3% anual
freq_label = 'anual'
periodo_label = 'años'

tickers = [
    "IBE.MC", "ITX.MC", "TEF.MC", "BBVA.MC", "SAN.MC", "REP.MC",
    "AENA.MC", "IAG.MC", "ENG.MC", "ACS.MC", "FER.MC", "CABK.MC",
    "ELE.MC", "MAP.MC"
]

# -----------------------------
# Conexión Snowflake
# -----------------------------
conn = snowflake.connector.connect(
    user='TFMGRUPO4',
    password='TFMgrupo4ucm01_01#',
    account='WYNIFVB-YE01854',
    warehouse='COMPUTE_WH',
    database='YAHOO_FINANCE',
    schema='MACHINE_LEARNING',
    role='ACCOUNTADMIN'
)
cursor = conn.cursor()

# -----------------------------
# IBEX (mercado)
# -----------------------------
q_ibex = f"""
    SELECT DATE, STOCKCLOSE
    FROM INDEX_TOTALES
    WHERE TICKER = '{ticker_ibex}'
      AND DATE BETWEEN '{start_date}' AND '{end_date}'
    ORDER BY DATE
"""
cursor.execute(q_ibex)
df_ibex = pd.DataFrame(cursor.fetchall(), columns=['DATE', 'IBEX'])
df_ibex['DATE'] = pd.to_datetime(df_ibex['DATE'])
df_ibex['IBEX'] = pd.to_numeric(df_ibex['IBEX'], errors='coerce').astype(float)
df_ibex.set_index('DATE', inplace=True)
df_ibex = df_ibex.sort_index()

# -----------------------------
# Activos (precios)
# -----------------------------
dfs = {'IBEX': df_ibex['IBEX']}
for tk in tickers:
    q = f"""
        SELECT FECHA, CLOSE
        FROM TICKERS_INDEX
        WHERE TICKER = '{tk}'
          AND FECHA BETWEEN '{start_date}' AND '{end_date}'
        ORDER BY FECHA
    """
    cursor.execute(q)
    df = pd.DataFrame(cursor.fetchall(), columns=['FECHA', tk])
    df['FECHA'] = pd.to_datetime(df['FECHA'])
    df[tk] = pd.to_numeric(df[tk], errors='coerce').astype(float)
    df.set_index('FECHA', inplace=True)
    df = df.sort_index()
    dfs[tk] = df[tk]

cursor.close()
conn.close()

# -----------------------------
# Panel OUTER y remuestreo ANUAL (último válido del año)
# -----------------------------
panel = pd.concat(dfs, axis=1, join='outer').sort_index()

def last_valid(series: pd.Series):
    s = series.dropna()
    return s.iloc[-1] if not s.empty else np.nan

# Fin de año: 'YE' (Year End). Tomamos el último precio válido del año.
panel_y = panel.resample('YE').apply(last_valid)

# Retornos anuales (sin forward fill)
rets = panel_y.pct_change(fill_method=None)

# -----------------------------
# rf del periodo (ANUAL) para excesos
# -----------------------------
rf_period = rf_anual  # k = 1

# -----------------------------
# Utilidades
# -----------------------------
def geometric_annualized(r: pd.Series, periods_per_year=1):
    r = r.dropna()
    n = r.shape[0]
    if n == 0:
        return np.nan
    gross = (1 + r).prod()
    return gross**(periods_per_year / n) - 1  # con k=1 → media geométrica anual

def estimate_beta_alpha_r2(ri: pd.Series, rm: pd.Series, rf_p: float):
    """OLS en EXCESOS con constante: (Ri - Rf) = α + β (Rm - Rf) + ε"""
    pair = pd.concat([rm, ri], axis=1, join='inner').dropna()
    pair.columns = ['Rm', 'Ri']
    nobs = pair.shape[0]
    if nobs < 3:  # mínimo 3 retornos anuales
        return None, None, None, nobs
    X = pair['Rm'] - rf_p
    y = pair['Ri'] - rf_p
    Xc = sm.add_constant(X)
    res = sm.OLS(y, Xc).fit()
    beta  = res.params.get('Rm', np.nan)
    alpha = res.params.get('const', np.nan)
    r2    = res.rsquared
    return float(beta), float(alpha), float(r2), int(nobs)

# -----------------------------
# E[Rm] (geom) anual y prima de mercado
# -----------------------------
E_Rm_ann = geometric_annualized(rets['IBEX'], periods_per_year=1)
if pd.isna(E_Rm_ann) or rets['IBEX'].dropna().shape[0] < 3:
    raise RuntimeError("No hay años suficientes para β anual. Amplía el rango o revisa la cobertura del IBEX.")
market_premium = E_Rm_ann - rf_anual

# Chequeo: en β=0, SML debe valer Rf
print(f"Chequeo SML: y(β=0) = {rf_anual + market_premium*0.0:.2%} (Rf = {rf_anual:.2%})")

# -----------------------------
# β anual, α, R² y E[Ri] anual por activo
# -----------------------------
rows = []
for tk in tickers:
    beta, alpha, r2, nobs = estimate_beta_alpha_r2(rets[tk], rets['IBEX'], rf_period)
    if beta is None:
        print(f"[AVISO] {tk}: años insuficientes tras alinear (n={nobs}). Se omite.")
        continue

    pair = pd.concat([rets['IBEX'], rets[tk]], axis=1, join='inner').dropna()
    E_Ri_ann = geometric_annualized(pair[tk], periods_per_year=1)

    E_Ri_capm = rf_anual + beta * (E_Rm_ann - rf_anual)   # Predicción CAPM anual
    mispricing = E_Ri_ann - E_Ri_capm                     # Real - CAPM

    rows.append({
        'Ticker': tk,
        'Beta': beta,
        'Alpha_excesos': alpha,
        'R2': r2,
        'N_obs': nobs,
        'E_Ri_ann_geom': E_Ri_ann,
        'E_Ri_CAPM': E_Ri_capm,
        'Mispricing': mispricing
    })

df_points = pd.DataFrame(rows)
if df_points.empty:
    raise RuntimeError("No hay puntos para graficar (β anual). Amplía el rango o revisa cobertura.")
df_points = df_points.sort_values('Beta').reset_index(drop=True)

# -----------------------------
# SML y rangos de ejes (forzamos que arranque en β=0)
# -----------------------------
beta_min = 0.0
beta_max = max(1.5, df_points['Beta'].max() + 0.2)
betas_line = np.linspace(beta_min, beta_max, 200)
E_R_line   = rf_anual + market_premium * betas_line

y_vals = np.concatenate([
    df_points['E_Ri_ann_geom'].values,
    [rf_anual, E_Rm_ann, E_R_line.min(), E_R_line.max()]
])
y_min = float(np.nanmin(y_vals)) - 0.03
y_max = float(np.nanmax(y_vals)) + 0.03

# -----------------------------
# GRÁFICO Plotly — SML ANUAL (sin leyenda; Ȳ a la derecha)
# -----------------------------
fig = go.Figure()

# SML (desde β=0)
fig.add_trace(go.Scatter(
    x=betas_line, y=E_R_line, mode='lines',
    line=dict(width=3),
    showlegend=False,
    name='SML'
))

# Y medio del mercado (E[Rm]) — etiqueta a la DERECHA
fig.add_hline(
    y=E_Rm_ann, line_dash="dash", line_width=2,
    annotation_text=f"Ȳ = E[Rm] {E_Rm_ann:.2%}",
    annotation_position="top right", annotation_yshift=6
)

# Rf (β=0)
fig.add_trace(go.Scatter(
    x=[0], y=[rf_anual], mode='markers',
    marker=dict(symbol='x', size=12, line=dict(width=1.5)),
    showlegend=False,
    name='Rf',
    hovertemplate="<b>Activo sin riesgo</b><br>β=0.00<br>Rendimiento= %{y:.2%}<extra></extra>"
))

# Mercado (β=1)
fig.add_trace(go.Scatter(
    x=[1], y=[E_Rm_ann], mode='markers',
    marker=dict(size=12, line=dict(width=1.5)),
    showlegend=False,
    name='Mercado',
    hovertemplate="<b>Mercado</b><br>β=1.00<br>E[Rm]= %{y:.2%}<extra></extra>"
))

# Activos
fig.add_trace(go.Scatter(
    x=df_points['Beta'],
    y=df_points['E_Ri_ann_geom'],
    mode='markers',
    marker=dict(size=9, line=dict(width=1, color='rgba(0,0,0,0.4)')),
    showlegend=False,
    name='Activos',
    hovertemplate=(
        "<b>%{customdata[0]}</b><br>"
        f"n = %{{customdata[7]}} {periodo_label}<br>"
        "β = %{x:.3f} | E[Ri] (geom, anual) = %{y:.2%}<br>"
        "E[Rm] = %{customdata[1]:.2%} | Rf = %{customdata[2]:.2%}<br>"
        "<b>CAPM</b>: E[Ri]_CAPM = Rf + β·(E[Rm]−Rf) = "
        "%{customdata[2]:.2%} + %{x:.3f}·(%{customdata[1]:.2%} − %{customdata[2]:.2%}) "
        "= %{customdata[3]:.2%}<br>"
        "α (excesos) = %{customdata[4]:.2%} | R² = %{customdata[5]:.3f}<br>"
        "Mispricing = E[Ri] − E[Ri]_CAPM = %{customdata[6]:.2%}"
        "<extra></extra>"
    ),
    customdata=np.stack([
        df_points['Ticker'],
        np.full(len(df_points), E_Rm_ann),
        np.full(len(df_points), rf_anual),
        df_points['E_Ri_CAPM'],
        df_points['Alpha_excesos'],
        df_points['R2'],
        df_points['Mispricing'],
        df_points['N_obs']
    ], axis=-1)
))

# Etiquetas (texto sobre cada punto)
fig.add_trace(go.Scatter(
    x=df_points['Beta'],
    y=df_points['E_Ri_ann_geom'],
    mode='text',
    text=df_points['Ticker'],
    textposition='top center',
    textfont=dict(size=10),
    showlegend=False,
    name='Etiquetas'
))

# Caja/nota compacta a la DERECHA
nota = (
    "CAPM:  E[Ri] = Rf + β·(E[Rm]−Rf)<br>"
    f"Rf = {rf_anual:.2%} · E[Rm] (geom) = {E_Rm_ann:.2%}<br>"
    f"Prima de mercado = {market_premium:.2%}"
)
fig.add_annotation(
    xref="paper", yref="paper",
    x=0.99, y=0.98, xanchor="right", yanchor="top",
    text=nota, showarrow=False, align="right",
    bordercolor="rgba(0,0,0,0.15)", borderwidth=1,
    bgcolor="rgba(0,0,0,0.03)", font=dict(size=11)
)

# Layout sin leyenda
fig.update_layout(
    template='simple_white',
    title=("SML (CAPM):  E[Ri] = Rf + β(E[Rm]−Rf)  ·  "
           f"Periodo: {start_date} → {end_date}  ·  Frecuencia β: {freq_label}"),
    xaxis_title="Beta (β)",
    yaxis_title="Rendimiento esperado ANUAL",
    margin=dict(l=40, r=80, t=90, b=50),
    hovermode='closest',
    showlegend=False
)

# Ejes (β desde 0 para que la recta parta en Rf visible)
fig.update_xaxes(
    range=[beta_min, beta_max],
    zeroline=True, zerolinewidth=1, zerolinecolor="#B0B0B0",
    showline=True, linecolor="#888", dtick=0.25
)
fig.update_yaxes(
    range=[y_min, y_max],
    zeroline=True, zerolinewidth=1, zerolinecolor="#B0B0B0",
    showline=True, linecolor="#888", tickformat=".0%"
)

fig.show()


Chequeo SML: y(β=0) = 3.00% (Rf = 3.00%)
[AVISO] SAN.MC: años insuficientes tras alinear (n=0). Se omite.
[AVISO] IAG.MC: años insuficientes tras alinear (n=0). Se omite.
[AVISO] ENG.MC: años insuficientes tras alinear (n=0). Se omite.


In [15]:
# ============================================
# SML (Security Market Line) interactiva (Plotly)
# Snowflake → IBEX + activos → β mensual → SML anual (geom.)
# + Clasificación TP / FP / FN / TN
# + Caja de clases ABAJO-DERECHA (dentro del gráfico)
# ============================================

import pandas as pd
import numpy as np
import snowflake.connector
import statsmodels.api as sm
import plotly.graph_objects as go

# -----------------------------
# Configuración
# -----------------------------
start_date = '2022-01-01'
end_date   = '2024-12-31'      # ¡sin coma!
ticker_ibex = '^IBEX'
rf_anual = 0.03                # 3% anual
freq_label = 'mensual'         # etiqueta gráfica
periodo_label = 'meses'

# Lista editable de activos
tickers = [
    "IBE.MC", "ITX.MC", "TEF.MC", "BBVA.MC", "SAN.MC", "REP.MC",
    "AENA.MC", "IAG.MC", "ENG.MC", "ACS.MC", "FER.MC", "CABK.MC",
    "ELE.MC", "MAP.MC"
]

# -----------------------------
# Conexión Snowflake
# -----------------------------
conn = snowflake.connector.connect(
    user='TFMGRUPO4',
    password='TFMgrupo4ucm01_01#',
    account='WYNIFVB-YE01854',
    warehouse='COMPUTE_WH',
    database='YAHOO_FINANCE',
    schema='MACHINE_LEARNING',
    role='ACCOUNTADMIN'
)
cursor = conn.cursor()

# -----------------------------
# IBEX (mercado)
# -----------------------------
q_ibex = f"""
    SELECT DATE, STOCKCLOSE
    FROM INDEX_TOTALES
    WHERE TICKER = '{ticker_ibex}'
      AND DATE BETWEEN '{start_date}' AND '{end_date}'
    ORDER BY DATE
"""
cursor.execute(q_ibex)
df_ibex = pd.DataFrame(cursor.fetchall(), columns=['DATE', 'IBEX'])
df_ibex['DATE'] = pd.to_datetime(df_ibex['DATE'])
df_ibex['IBEX'] = pd.to_numeric(df_ibex['IBEX'], errors='coerce').astype(float)
df_ibex.set_index('DATE', inplace=True)
df_ibex = df_ibex.sort_index()

# -----------------------------
# Activos (precios)
# -----------------------------
dfs = {'IBEX': df_ibex['IBEX']}
for tk in tickers:
    q = f"""
        SELECT FECHA, CLOSE
        FROM TICKERS_INDEX
        WHERE TICKER = '{tk}'
          AND FECHA BETWEEN '{start_date}' AND '{end_date}'
        ORDER BY FECHA
    """
    cursor.execute(q)
    df = pd.DataFrame(cursor.fetchall(), columns=['FECHA', tk])
    df['FECHA'] = pd.to_datetime(df['FECHA'])
    df[tk] = pd.to_numeric(df[tk], errors='coerce').astype(float)
    df.set_index('FECHA', inplace=True)
    df = df.sort_index()
    dfs[tk] = df[tk]

cursor.close()
conn.close()

# -----------------------------
# Panel OUTER y mensual robusto
# -----------------------------
panel = pd.concat(dfs, axis=1, join='outer').sort_index()
panel_m = panel.resample('ME').mean()                # media de mes
rets = panel_m.pct_change(fill_method=None)          # retornos mensuales

# -----------------------------
# Rf mensual (para excesos)
# -----------------------------
k = 12
rf_p = (1 + rf_anual)**(1/k) - 1

# -----------------------------
# Utilidades
# -----------------------------
def geometric_annualized(r, periods_per_year=12):
    r = r.dropna()
    n = r.shape[0]
    if n == 0:
        return np.nan
    gross = (1 + r).prod()
    return gross**(periods_per_year / n) - 1

def estimate_beta_alpha_r2(ri, rm, rf_period):
    pair = pd.concat([rm, ri], axis=1, join='inner').dropna()
    pair.columns = ['Rm', 'Ri']
    nobs = pair.shape[0]
    if nobs < 3:
        return None, None, None, nobs
    X = pair['Rm'] - rf_period
    y = pair['Ri'] - rf_period
    Xc = sm.add_constant(X)
    res = sm.OLS(y, Xc).fit()
    beta  = res.params.get('Rm', np.nan)
    alpha = res.params.get('const', np.nan)
    r2    = res.rsquared
    return float(beta), float(alpha), float(r2), int(nobs)

# -----------------------------
# E[Rm] anual geométrico y prima
# -----------------------------
E_Rm_ann = geometric_annualized(rets['IBEX'], periods_per_year=12)
market_premium = E_Rm_ann - rf_anual

# -----------------------------
# β, α, R² y E[Ri] (anual geom.) por activo
# -----------------------------
rows = []
for tk in tickers:
    beta, alpha, r2, nobs = estimate_beta_alpha_r2(rets[tk], rets['IBEX'], rf_p)
    if beta is None:
        print(f"[AVISO] {tk}: datos insuficientes tras alinear (n={nobs}). Se omite.")
        continue
    pair = pd.concat([rets['IBEX'], rets[tk]], axis=1, join='inner').dropna()
    E_Ri_ann = geometric_annualized(pair[tk], periods_per_year=12)
    E_Ri_capm = rf_anual + beta * (E_Rm_ann - rf_anual)     # predicción CAPM
    mispricing = E_Ri_ann - E_Ri_capm                       # real - CAPM

    rows.append({
        'Ticker': tk,
        'Beta': beta,
        'Alpha_excesos': alpha,
        'R2': r2,
        'N_obs': nobs,
        'E_Ri_ann_geom': E_Ri_ann,
        'E_Ri_CAPM': E_Ri_capm,
        'Mispricing': mispricing
    })

df_points = pd.DataFrame(rows)
if df_points.empty:
    raise RuntimeError("No hay puntos para graficar. Revisa cobertura mensual de los activos.")
df_points = df_points.sort_values('Beta').reset_index(drop=True)

# -----------------------------
# Clasificación TP / FP / FN / TN (como en el anual)
# -----------------------------
above_mean = df_points['E_Ri_ann_geom'] >= E_Rm_ann
above_sml  = df_points['E_Ri_ann_geom'] >= df_points['E_Ri_CAPM']

def _class_row(am, asml):
    if am and asml:        return 'TP'  # arriba media y arriba SML
    if am and not asml:    return 'FP'  # arriba media pero abajo SML
    if (not am) and asml:  return 'FN'  # abajo media pero arriba SML
    return 'TN'                           # abajo media y abajo SML

df_points['Class'] = [_class_row(am, asml) for am, asml in zip(above_mean, above_sml)]

# Colores y símbolos por clase
class_color  = {'TP':'#2ca02c', 'FP':'#ff7f0e', 'FN':'#1f77b4', 'TN':'#d62728'}
class_symbol = {'TP':'circle',  'FP':'diamond', 'FN':'triangle-up', 'TN':'x'}

# Conteos para anotación
cnt = df_points['Class'].value_counts().to_dict()
cTP = cnt.get('TP', 0); cFP = cnt.get('FP', 0); cFN = cnt.get('FN', 0); cTN = cnt.get('TN', 0)

# -----------------------------
# SML y rangos de ejes
# -----------------------------
beta_min = 0.0  # forzamos desde 0 para que y(β=0)=Rf quede visible
beta_max = max(2.0, df_points['Beta'].max() + 0.2)
betas_line = np.linspace(beta_min, beta_max, 200)
E_R_line   = rf_anual + market_premium * betas_line

y_vals = np.concatenate([
    df_points['E_Ri_ann_geom'].values,
    [rf_anual, E_Rm_ann, E_R_line.min(), E_R_line.max()]
])
y_min = float(np.nanmin(y_vals)) - 0.03
y_max = float(np.nanmax(y_vals)) + 0.03

# -----------------------------
# GRÁFICO Plotly — SML MENSUAL (sin leyenda; Ȳ a la derecha)
# -----------------------------
fig = go.Figure()

# SML
fig.add_trace(go.Scatter(
    x=betas_line, y=E_R_line, mode='lines',
    line=dict(width=3),
    showlegend=False,
    name='SML'
))

# Y medio del mercado (E[Rm]) — etiqueta a la DERECHA
fig.add_hline(
    y=E_Rm_ann, line_dash="dash", line_width=2,
    annotation_text=f"Ȳ = E[Rm] {E_Rm_ann:.2%}",
    annotation_position="top right", annotation_yshift=6
)

# Rf (β=0)
fig.add_trace(go.Scatter(
    x=[0], y=[rf_anual], mode='markers',
    marker=dict(symbol='x', size=12, line=dict(width=1.5)),
    showlegend=False,
    name='Rf',
    hovertemplate="<b>Activo sin riesgo</b><br>β=0.00<br>Rendimiento= %{y:.2%}<extra></extra>"
))

# Mercado (β=1)
fig.add_trace(go.Scatter(
    x=[1], y=[E_Rm_ann], mode='markers',
    marker=dict(size=12, line=dict(width=1.5)),
    showlegend=False,
    name='Mercado',
    hovertemplate="<b>Mercado</b><br>β=1.00<br>E[Rm]= %{y:.2%}<extra></extra>"
))

# Activos por clase (4 trazas): colores/símbolos distintos, sin leyenda
for cls in ['TP','FP','FN','TN']:
    sub = df_points[df_points['Class'] == cls]
    if sub.empty:
        continue
    fig.add_trace(go.Scatter(
        x=sub['Beta'],
        y=sub['E_Ri_ann_geom'],
        mode='markers',
        marker=dict(
            size=10,
            symbol=class_symbol[cls],
            color=class_color[cls],
            line=dict(width=1, color='rgba(0,0,0,0.45)')
        ),
        showlegend=False,
        name=f'Activos {cls}',
        hovertemplate=(
            "<b>%{customdata[0]}</b><br>"
            f"Clase = {cls} "
            "(%{customdata[8]})<br>"
            f"n = %{{customdata[7]}} {periodo_label}<br>"
            "β = %{x:.3f} | E[Ri] (geom, anual) = %{y:.2%}<br>"
            "E[Rm] = %{customdata[1]:.2%} | Rf = %{customdata[2]:.2%}<br>"
            "<b>CAPM</b>: E[Ri]_CAPM = Rf + β·(E[Rm]−Rf) = "
            "%{customdata[2]:.2%} + %{x:.3f}·(%{customdata[1]:.2%} − %{customdata[2]:.2%}) "
            "= %{customdata[3]:.2%}<br>"
            "α (excesos) = %{customdata[4]:.2%} | R² = %{customdata[5]:.3f}<br>"
            "Mispricing = E[Ri] − E[Ri]_CAPM = %{customdata[6]:.2%}"
            "<extra></extra>"
        ),
        customdata=np.stack([
            sub['Ticker'],
            np.full(len(sub), E_Rm_ann),
            np.full(len(sub), rf_anual),
            sub['E_Ri_CAPM'],
            sub['Alpha_excesos'],
            sub['R2'],
            sub['Mispricing'],
            sub['N_obs'],
            np.where(
                (sub['E_Ri_ann_geom'] >= E_Rm_ann) & (sub['E_Ri_ann_geom'] >= sub['E_Ri_CAPM']),
                "↑media & ↑SML",
                np.where(
                    (sub['E_Ri_ann_geom'] >= E_Rm_ann) & (sub['E_Ri_ann_geom'] < sub['E_Ri_CAPM']),
                    "↑media & ↓SML",
                    np.where(
                        (sub['E_Ri_ann_geom'] < E_Rm_ann) & (sub['E_Ri_ann_geom'] >= sub['E_Ri_CAPM']),
                        "↓media & ↑SML",
                        "↓media & ↓SML"
                    )
                )
            )
        ], axis=-1)
    ))

# Etiquetas (texto sobre cada punto)
fig.add_trace(go.Scatter(
    x=df_points['Beta'],
    y=df_points['E_Ri_ann_geom'],
    mode='text',
    text=df_points['Ticker'],
    textposition='top center',
    textfont=dict(size=10),
    showlegend=False,
    name='Etiquetas'
))

# -----------------------------
# Caja/nota COMPACTA con fórmula — ARRIBA-DERECHA (como tenías)
# -----------------------------
nota_formula = (
    "CAPM:  E[Ri] = Rf + β·(E[Rm]−Rf)<br>"
    f"Rf = {rf_anual:.2%} · E[Rm] (geom) = {E_Rm_ann:.2%}<br>"
    f"Prima de mercado = {market_premium:.2%}"
)
fig.add_annotation(
    xref="paper", yref="paper",
    x=0.99, y=0.98, xanchor="right", yanchor="top",
    text=nota_formula, showarrow=False, align="right",
    bordercolor="rgba(0,0,0,0.15)", borderwidth=1,
    bgcolor="rgba(0,0,0,0.03)", font=dict(size=11)
)

# -----------------------------
# Caja de CLASES — ABAJO-DERECHA (dentro del gráfico)
# -----------------------------
nota_clases = (
    "<b>Clases</b>: "
    f"<span style='color:{class_color['TP']}'>■ TP</span> "
    f"<span style='color:{class_color['FP']}'>■ FP</span> "
    f"<span style='color:{class_color['FN']}'>■ FN</span> "
    f"<span style='color:{class_color['TN']}'>■ TN</span>"
    f"<br>TP={cTP} · FP={cFP} · FN={cFN} · TN={cTN}"
)
fig.add_annotation(
    xref="paper", yref="paper",
    x=0.98, y=0.06,                 # ABAJO-DERECHA dentro del plot
    xanchor="right", yanchor="bottom",
    text=nota_clases, showarrow=False, align="right",
    bordercolor="rgba(0,0,0,0.15)", borderwidth=1,
    bgcolor="rgba(255,255,255,0.90)", font=dict(size=11),
    borderpad=6
)

# Layout sin leyenda
fig.update_layout(
    template='simple_white',
    title=("SML (CAPM):  E[Ri] = Rf + β(E[Rm]−Rf)  ·  "
           f"Periodo: {start_date} → {end_date}  ·  Frecuencia β: {freq_label}"),
    xaxis_title="Beta (β)",
    yaxis_title="Rendimiento esperado ANUAL",
    margin=dict(l=40, r=80, t=90, b=80),  # algo más de aire abajo
    hovermode='closest',
    showlegend=False
)

# Ejes
fig.update_xaxes(
    range=[beta_min, beta_max],
    zeroline=True, zerolinewidth=1, zerolinecolor="#B0B0B0",
    showline=True, linecolor="#888", dtick=0.25
)
fig.update_yaxes(
    range=[y_min, y_max],
    zeroline=True, zerolinewidth=1, zerolinecolor="#B0B0B0",
    showline=True, linecolor="#888", tickformat=".0%"
)

fig.show()


[AVISO] SAN.MC: datos insuficientes tras alinear (n=0). Se omite.
[AVISO] IAG.MC: datos insuficientes tras alinear (n=0). Se omite.
[AVISO] ENG.MC: datos insuficientes tras alinear (n=0). Se omite.


In [1]:
# ============================================
# SML (Security Market Line) — VERSIÓN ANUAL (2020–2024)
# Snowflake → IBEX + Activos → β ANUAL (excesos, OLS) → SML anual (geom.)
# + Clasificación TP / FP / FN / TN
# + Caja de FÓRMULA ARRIBA-DERECHA y caja de CLASES ABAJO-DERECHA (dentro del gráfico)
# ============================================

import pandas as pd
import numpy as np
import snowflake.connector
import statsmodels.api as sm
import plotly.graph_objects as go

# -----------------------------
# Configuración (ANUAL)
# -----------------------------
start_date   = '2020-01-01'
end_date     = '2024-12-31'
ticker_ibex  = '^IBEX'
rf_anual     = 0.03                 # 3% anual
freq_label   = 'anual'
periodo_label = 'años'

tickers = [
    "IBE.MC", "ITX.MC", "TEF.MC", "BBVA.MC", "SAN.MC", "REP.MC",
    "AENA.MC", "IAG.MC", "ENG.MC", "ACS.MC", "FER.MC", "CABK.MC",
    "ELE.MC", "MAP.MC"
]

# -----------------------------
# Conexión Snowflake
# -----------------------------
conn = snowflake.connector.connect(
    user='TFMGRUPO4',
    password='TFMgrupo4ucm01_01#',
    account='WYNIFVB-YE01854',
    warehouse='COMPUTE_WH',
    database='YAHOO_FINANCE',
    schema='MACHINE_LEARNING',
    role='ACCOUNTADMIN'
)
cursor = conn.cursor()

# -----------------------------
# IBEX (mercado)
# -----------------------------
q_ibex = f"""
    SELECT DATE, STOCKCLOSE
    FROM INDEX_TOTALES
    WHERE TICKER = '{ticker_ibex}'
      AND DATE BETWEEN '{start_date}' AND '{end_date}'
    ORDER BY DATE
"""
cursor.execute(q_ibex)
df_ibex = pd.DataFrame(cursor.fetchall(), columns=['DATE', 'IBEX'])
df_ibex['DATE'] = pd.to_datetime(df_ibex['DATE'])
df_ibex['IBEX'] = pd.to_numeric(df_ibex['IBEX'], errors='coerce').astype(float)
df_ibex.set_index('DATE', inplace=True)
df_ibex = df_ibex.sort_index()

# -----------------------------
# Activos (precios)
# -----------------------------
dfs = {'IBEX': df_ibex['IBEX']}
for tk in tickers:
    q = f"""
        SELECT FECHA, CLOSE
        FROM TICKERS_INDEX
        WHERE TICKER = '{tk}'
          AND FECHA BETWEEN '{start_date}' AND '{end_date}'
        ORDER BY FECHA
    """
    cursor.execute(q)
    df = pd.DataFrame(cursor.fetchall(), columns=['FECHA', tk])
    df['FECHA'] = pd.to_datetime(df['FECHA'])
    df[tk] = pd.to_numeric(df[tk], errors='coerce').astype(float)
    df.set_index('FECHA', inplace=True)
    df = df.sort_index()
    dfs[tk] = df[tk]

cursor.close()
conn.close()

# -----------------------------
# Panel OUTER y remuestreo ANUAL (último válido del año)
# -----------------------------
panel = pd.concat(dfs, axis=1, join='outer').sort_index()

def last_valid(series: pd.Series):
    s = series.dropna()
    return s.iloc[-1] if not s.empty else np.nan

# Fin de año ('Y'): tomamos el último precio válido del año
panel_y = panel.resample('Y').apply(last_valid)
rets    = panel_y.pct_change(fill_method=None)     # retornos anuales

# -----------------------------
# rf del periodo (ANUAL) para excesos
# -----------------------------
rf_period = rf_anual  # k = 1

# -----------------------------
# Utilidades
# -----------------------------
def geometric_annualized(r: pd.Series, periods_per_year=1):
    r = r.dropna()
    n = r.shape[0]
    if n == 0:
        return np.nan
    gross = (1 + r).prod()
    return gross**(periods_per_year / n) - 1  # k=1 ⇒ media geométrica anual

def estimate_beta_alpha_r2(ri: pd.Series, rm: pd.Series, rf_p: float):
    """OLS en EXCESOS con constante: (Ri - Rf) = α + β (Rm - Rf) + ε"""
    pair = pd.concat([rm, ri], axis=1, join='inner').dropna()
    pair.columns = ['Rm', 'Ri']
    nobs = pair.shape[0]
    if nobs < 3:  # mínimo 3 retornos anuales
        return None, None, None, nobs
    X = pair['Rm'] - rf_p
    y = pair['Ri'] - rf_p
    Xc = sm.add_constant(X)
    res = sm.OLS(y, Xc).fit()
    beta  = float(res.params.get('Rm', np.nan))
    alpha = float(res.params.get('const', np.nan))
    r2    = float(res.rsquared)
    return beta, alpha, r2, nobs

# -----------------------------
# E[Rm] (geom) anual y prima de mercado
# -----------------------------
E_Rm_ann = geometric_annualized(rets['IBEX'], periods_per_year=1)
if pd.isna(E_Rm_ann) or rets['IBEX'].dropna().shape[0] < 3:
    raise RuntimeError("No hay años suficientes para β anual. Amplía el rango o revisa la cobertura del IBEX.")
market_premium = E_Rm_ann - rf_anual

# Chequeo: en β=0, SML debe valer Rf
print(f"Chequeo SML: y(β=0) = {rf_anual + market_premium*0.0:.2%} (Rf = {rf_anual:.2%})")

# -----------------------------
# β anual, α, R² y E[Ri] anual por activo
# -----------------------------
rows = []
for tk in tickers:
    beta, alpha, r2, nobs = estimate_beta_alpha_r2(rets[tk], rets['IBEX'], rf_period)
    if beta is None:
        print(f"[AVISO] {tk}: años insuficientes tras alinear (n={nobs}). Se omite.")
        continue

    pair = pd.concat([rets['IBEX'], rets[tk]], axis=1, join='inner').dropna()
    E_Ri_ann = geometric_annualized(pair[tk], periods_per_year=1)

    E_Ri_capm = rf_anual + beta * (E_Rm_ann - rf_anual)   # Predicción CAPM anual
    mispricing = E_Ri_ann - E_Ri_capm                     # Real - CAPM

    rows.append({
        'Ticker': tk,
        'Beta': beta,
        'Alpha_excesos': alpha,
        'R2': r2,
        'N_obs': nobs,
        'E_Ri_ann_geom': E_Ri_ann,
        'E_Ri_CAPM': E_Ri_capm,
        'Mispricing': mispricing
    })

df_points = pd.DataFrame(rows)
if df_points.empty:
    raise RuntimeError("No hay puntos para graficar (β anual). Amplía el rango o revisa cobertura.")
df_points = df_points.sort_values('Beta').reset_index(drop=True)

# -----------------------------
# Clasificación TP / FP / FN / TN
# -----------------------------
above_mean = df_points['E_Ri_ann_geom'] >= E_Rm_ann
above_sml  = df_points['E_Ri_ann_geom'] >= df_points['E_Ri_CAPM']

def _class_row(am, asml):
    if am and asml:        return 'TP'  # arriba media y arriba SML
    if am and not asml:    return 'FP'  # arriba media pero abajo SML
    if (not am) and asml:  return 'FN'  # abajo media pero arriba SML
    return 'TN'                           # abajo media y abajo SML

df_points['Class'] = [_class_row(am, asml) for am, asml in zip(above_mean, above_sml)]

# Colores y símbolos por clase
class_color  = {'TP':'#2ca02c', 'FP':'#ff7f0e', 'FN':'#1f77b4', 'TN':'#d62728'}
class_symbol = {'TP':'circle',  'FP':'diamond', 'FN':'triangle-up', 'TN':'x'}

# Conteos para anotación
cnt = df_points['Class'].value_counts().to_dict()
cTP = cnt.get('TP', 0); cFP = cnt.get('FP', 0); cFN = cnt.get('FN', 0); cTN = cnt.get('TN', 0)

# -----------------------------
# SML y rangos de ejes (forzamos que arranque en β=0)
# -----------------------------
beta_min = 0.0
beta_max = max(1.5, float(df_points['Beta'].max() + 0.2))
betas_line = np.linspace(beta_min, beta_max, 200)
E_R_line   = rf_anual + market_premium * betas_line

y_vals = np.concatenate([
    df_points['E_Ri_ann_geom'].values,
    [rf_anual, E_Rm_ann, E_R_line.min(), E_R_line.max()]
])
y_min = float(np.nanmin(y_vals)) - 0.03
y_max = float(np.nanmax(y_vals)) + 0.03

# -----------------------------
# GRÁFICO Plotly — SML ANUAL (uniforme con el mensual)
# -----------------------------
fig = go.Figure()

# SML (desde β=0)
fig.add_trace(go.Scatter(
    x=betas_line, y=E_R_line, mode='lines',
    line=dict(width=3),
    showlegend=False,
    name='SML'
))

# Y medio del mercado (E[Rm]) — etiqueta a la DERECHA
fig.add_hline(
    y=E_Rm_ann, line_dash="dash", line_width=2,
    annotation_text=f"Ȳ = E[Rm] {E_Rm_ann:.2%}",
    annotation_position="top right", annotation_yshift=6
)

# Rf (β=0)
fig.add_trace(go.Scatter(
    x=[0], y=[rf_anual], mode='markers',
    marker=dict(symbol='x', size=12, line=dict(width=1.5)),
    showlegend=False,
    name='Rf',
    hovertemplate="<b>Activo sin riesgo</b><br>β=0.00<br>Rendimiento= %{y:.2%}<extra></extra>"
))

# Mercado (β=1)
fig.add_trace(go.Scatter(
    x=[1], y=[E_Rm_ann], mode='markers',
    marker=dict(size=12, line=dict(width=1.5)),
    showlegend=False,
    name='Mercado',
    hovertemplate="<b>Mercado</b><br>β=1.00<br>E[Rm]= %{y:.2%}<extra></extra>"
))

# Activos por clase (4 trazas): colores/símbolos distintos, sin leyenda
for cls in ['TP','FP','FN','TN']:
    sub = df_points[df_points['Class'] == cls]
    if sub.empty:
        continue
    fig.add_trace(go.Scatter(
        x=sub['Beta'],
        y=sub['E_Ri_ann_geom'],
        mode='markers',
        marker=dict(
            size=10,
            symbol=class_symbol[cls],
            color=class_color[cls],
            line=dict(width=1, color='rgba(0,0,0,0.45)')
        ),
        showlegend=False,
        name=f'Activos {cls}',
        hovertemplate=(
            "<b>%{customdata[0]}</b><br>"
            f"Clase = {cls} "
            "(%{customdata[8]})<br>"
            f"n = %{{customdata[7]}} {periodo_label}<br>"
            "β = %{x:.3f} | E[Ri] (geom, anual) = %{y:.2%}<br>"
            "E[Rm] = %{customdata[1]:.2%} | Rf = %{customdata[2]:.2%}<br>"
            "<b>CAPM</b>: E[Ri]_CAPM = Rf + β·(E[Rm]−Rf) = "
            "%{customdata[2]:.2%} + %{x:.3f}·(%{customdata[1]:.2%} − %{customdata[2]:.2%}) "
            "= %{customdata[3]:.2%}<br>"
            "α (excesos) = %{customdata[4]:.2%} | R² = %{customdata[5]:.3f}<br>"
            "Mispricing = E[Ri] − E[Ri]_CAPM = %{customdata[6]:.2%}"
            "<extra></extra>"
        ),
        customdata=np.stack([
            sub['Ticker'],
            np.full(len(sub), E_Rm_ann),
            np.full(len(sub), rf_anual),
            sub['E_Ri_CAPM'],
            sub['Alpha_excesos'],
            sub['R2'],
            sub['Mispricing'],
            sub['N_obs'],
            np.where(
                (sub['E_Ri_ann_geom'] >= E_Rm_ann) & (sub['E_Ri_ann_geom'] >= sub['E_Ri_CAPM']),
                "↑media & ↑SML",
                np.where(
                    (sub['E_Ri_ann_geom'] >= E_Rm_ann) & (sub['E_Ri_ann_geom'] < sub['E_Ri_CAPM']),
                    "↑media & ↓SML",
                    np.where(
                        (sub['E_Ri_ann_geom'] < E_Rm_ann) & (sub['E_Ri_ann_geom'] >= sub['E_Ri_CAPM']),
                        "↓media & ↑SML",
                        "↓media & ↓SML"
                    )
                )
            )
        ], axis=-1)
    ))

# Etiquetas (texto sobre cada punto)
fig.add_trace(go.Scatter(
    x=df_points['Beta'],
    y=df_points['E_Ri_ann_geom'],
    mode='text',
    text=df_points['Ticker'],
    textposition='top center',
    textfont=dict(size=10),
    showlegend=False,
    name='Etiquetas'
))

# -----------------------------
# Caja de FÓRMULA — ARRIBA-DERECHA (dentro del gráfico)
# -----------------------------
nota_formula = (
    "CAPM:  E[Ri] = Rf + β·(E[Rm]−Rf)"
    f"<br>Rf = {rf_anual:.2%} · E[Rm] (geom) = {E_Rm_ann:.2%}"
    f"<br>Prima de mercado = {market_premium:.2%}"
)
fig.add_annotation(
    xref="paper", yref="paper",
    x=0.99, y=0.98, xanchor="right", yanchor="top",
    text=nota_formula, showarrow=False, align="right",
    bordercolor="rgba(0,0,0,0.15)", borderwidth=1,
    bgcolor="rgba(255,255,255,0.90)", font=dict(size=11),
    borderpad=6
)

# -----------------------------
# Caja de CLASES — ABAJO-DERECHA (dentro del gráfico)
# -----------------------------
nota_clases = (
    "<b>Clases</b>: "
    f"<span style='color:{class_color['TP']}'>■ TP</span> "
    f"<span style='color:{class_color['FP']}'>■ FP</span> "
    f"<span style='color:{class_color['FN']}'>■ FN</span> "
    f"<span style='color:{class_color['TN']}'>■ TN</span>"
    f"<br>TP={cTP} · FP={cFP} · FN={cFN} · TN={cTN}"
)
fig.add_annotation(
    xref="paper", yref="paper",
    x=0.98, y=0.06,                # abajo-derecha
    xanchor="right", yanchor="bottom",
    text=nota_clases, showarrow=False, align="right",
    bordercolor="rgba(0,0,0,0.15)", borderwidth=1,
    bgcolor="rgba(255,255,255,0.90)", font=dict(size=11),
    borderpad=6
)

# Layout (sin leyenda) con márgenes moderados
fig.update_layout(
    template='simple_white',
    title=("SML (CAPM):  E[Ri] = Rf + β(E[Rm]−Rf)  ·  "
           f"Periodo: {start_date} → {end_date}  ·  Frecuencia β: {freq_label}"),
    xaxis_title="Beta (β)",
    yaxis_title="Rendimiento esperado ANUAL",
    margin=dict(l=40, r=60, t=90, b=80),
    hovermode='closest',
    showlegend=False
)

# Ejes (β desde 0 para que la recta parta en Rf visible)
fig.update_xaxes(
    range=[beta_min, beta_max],
    zeroline=True, zerolinewidth=1, zerolinecolor="#B0B0B0",
    showline=True, linecolor="#888", dtick=0.25
)
fig.update_yaxes(
    range=[y_min, y_max],
    zeroline=True, zerolinewidth=1, zerolinecolor="#B0B0B0",
    showline=True, linecolor="#888", tickformat=".0%"
)

fig.show()


  panel_y = panel.resample('Y').apply(last_valid)


Chequeo SML: y(β=0) = 3.00% (Rf = 3.00%)
[AVISO] IBE.MC: años insuficientes tras alinear (n=0). Se omite.
[AVISO] ITX.MC: años insuficientes tras alinear (n=0). Se omite.
[AVISO] TEF.MC: años insuficientes tras alinear (n=0). Se omite.
[AVISO] BBVA.MC: años insuficientes tras alinear (n=0). Se omite.
[AVISO] SAN.MC: años insuficientes tras alinear (n=0). Se omite.
[AVISO] REP.MC: años insuficientes tras alinear (n=0). Se omite.
[AVISO] AENA.MC: años insuficientes tras alinear (n=0). Se omite.
[AVISO] IAG.MC: años insuficientes tras alinear (n=0). Se omite.
[AVISO] ENG.MC: años insuficientes tras alinear (n=0). Se omite.
[AVISO] ACS.MC: años insuficientes tras alinear (n=0). Se omite.
[AVISO] FER.MC: años insuficientes tras alinear (n=0). Se omite.
[AVISO] CABK.MC: años insuficientes tras alinear (n=0). Se omite.
[AVISO] ELE.MC: años insuficientes tras alinear (n=0). Se omite.
[AVISO] MAP.MC: años insuficientes tras alinear (n=0). Se omite.


RuntimeError: No hay puntos para graficar (β anual). Amplía el rango o revisa cobertura.

In [2]:
# ============================================
# SML (Security Market Line) — VERSIÓN ANUAL (2020–2024)
# Snowflake → IBEX + Activos → β ANUAL (excesos, OLS) → SML anual (geom.)
# + Clasificación TP / FP / FN / TN
# + Caja de FÓRMULA ARRIBA-DERECHA y caja de CLASES ABAJO-DERECHA (dentro del gráfico)
# ============================================

import pandas as pd
import numpy as np
import snowflake.connector
import statsmodels.api as sm
import plotly.graph_objects as go

# -----------------------------
# Configuración (ANUAL)
# -----------------------------
start_date   = '2020-01-01'
end_date     = '2024-12-31'
ticker_ibex  = 'IBEX 35'          # usamos el nombre del índice en INDEX_DAILY
rf_anual     = 0.03               # 3% anual
freq_label   = 'anual'
periodo_label = 'años'

tickers = [
    "IBE.MC", "ITX.MC", "TEF.MC", "BBVA.MC", "SAN.MC", "REP.MC",
    "AENA.MC", "IAG.MC", "ENG.MC", "ACS.MC", "FER.MC", "CABK.MC",
    "ELE.MC", "MAP.MC"
]

# -----------------------------
# Conexión Snowflake (DB/Schema correctos)
# -----------------------------
conn = snowflake.connector.connect(
    user='TFMGRUPO4',
    password='TFMgrupo4ucm01_01#',
    account='WYNIFVB-YE01854',
    warehouse='COMPUTE_WH',
    database='YAHOO_PRUEBA',   # <<<<<<<<<<
    schema='IBEX',             # <<<<<<<<<<
    role='ACCOUNTADMIN'
)
cursor = conn.cursor()

# -----------------------------
# IBEX (mercado) - INDEX_DAILY
# -----------------------------
q_ibex = f"""
    SELECT FECHA, CLOSE
    FROM INDEX_DAILY
    WHERE INDEX_NAME = '{ticker_ibex}'
      AND FECHA BETWEEN '{start_date}' AND '{end_date}'
    ORDER BY FECHA
"""
cursor.execute(q_ibex)
df_ibex = pd.DataFrame(cursor.fetchall(), columns=['FECHA', 'IBEX'])
df_ibex['FECHA'] = pd.to_datetime(df_ibex['FECHA'])
df_ibex['IBEX'] = pd.to_numeric(df_ibex['IBEX'], errors='coerce').astype(float)
df_ibex.rename(columns={'FECHA': 'DATE'}, inplace=True)  # mantener tu índice con 'DATE'
df_ibex.set_index('DATE', inplace=True)
df_ibex = df_ibex.sort_index()

# -----------------------------
# Activos (precios) - TICKERS_INDEX
# -----------------------------
dfs = {'IBEX': df_ibex['IBEX']}
for tk in tickers:
    q = f"""
        SELECT FECHA, CLOSE
        FROM TICKERS_INDEX
        WHERE TICKER = '{tk}'
          AND FECHA BETWEEN '{start_date}' AND '{end_date}'
        ORDER BY FECHA
    """
    cursor.execute(q)
    df = pd.DataFrame(cursor.fetchall(), columns=['FECHA', tk])
    df['FECHA'] = pd.to_datetime(df['FECHA'])
    df[tk] = pd.to_numeric(df[tk], errors='coerce').astype(float)
    df.set_index('FECHA', inplace=True)
    df = df.sort_index()
    dfs[tk] = df[tk]

cursor.close()
conn.close()

# -----------------------------
# Panel OUTER y remuestreo ANUAL (último válido del año)
# -----------------------------
panel = pd.concat(dfs, axis=1, join='outer').sort_index()

def last_valid(series: pd.Series):
    s = series.dropna()
    return s.iloc[-1] if not s.empty else np.nan

# Fin de año ('Y'): tomamos el último precio válido del año
panel_y = panel.resample('Y').apply(last_valid)
rets    = panel_y.pct_change(fill_method=None)     # retornos anuales

# -----------------------------
# rf del periodo (ANUAL) para excesos
# -----------------------------
rf_period = rf_anual  # k = 1

# -----------------------------
# Utilidades
# -----------------------------
def geometric_annualized(r: pd.Series, periods_per_year=1):
    r = r.dropna()
    n = r.shape[0]
    if n == 0:
        return np.nan
    gross = (1 + r).prod()
    return gross**(periods_per_year / n) - 1  # k=1 ⇒ media geométrica anual

def estimate_beta_alpha_r2(ri: pd.Series, rm: pd.Series, rf_p: float):
    """OLS en EXCESOS con constante: (Ri - Rf) = α + β (Rm - Rf) + ε"""
    pair = pd.concat([rm, ri], axis=1, join='inner').dropna()
    pair.columns = ['Rm', 'Ri']
    nobs = pair.shape[0]
    if nobs < 3:  # mínimo 3 retornos anuales
        return None, None, None, nobs
    X = pair['Rm'] - rf_p
    y = pair['Ri'] - rf_p
    Xc = sm.add_constant(X)
    res = sm.OLS(y, Xc).fit()
    beta  = float(res.params.get('Rm', np.nan))
    alpha = float(res.params.get('const', np.nan))
    r2    = float(res.rsquared)
    return beta, alpha, r2, nobs

# -----------------------------
# E[Rm] (geom) anual y prima de mercado
# -----------------------------
E_Rm_ann = geometric_annualized(rets['IBEX'], periods_per_year=1)
if pd.isna(E_Rm_ann) or rets['IBEX'].dropna().shape[0] < 3:
    raise RuntimeError("No hay años suficientes para β anual. Amplía el rango o revisa la cobertura del IBEX.")
market_premium = E_Rm_ann - rf_anual

# Chequeo: en β=0, SML debe valer Rf
print(f"Chequeo SML: y(β=0) = {rf_anual + market_premium*0.0:.2%} (Rf = {rf_anual:.2%})")

# -----------------------------
# β anual, α, R² y E[Ri] anual por activo
# -----------------------------
rows = []
for tk in tickers:
    beta, alpha, r2, nobs = estimate_beta_alpha_r2(rets[tk], rets['IBEX'], rf_period)
    if beta is None:
        print(f"[AVISO] {tk}: años insuficientes tras alinear (n={nobs}). Se omite.")
        continue

    pair = pd.concat([rets['IBEX'], rets[tk]], axis=1, join='inner').dropna()
    E_Ri_ann = geometric_annualized(pair[tk], periods_per_year=1)

    E_Ri_capm = rf_anual + beta * (E_Rm_ann - rf_anual)   # Predicción CAPM anual
    mispricing = E_Ri_ann - E_Ri_capm                     # Real - CAPM

    rows.append({
        'Ticker': tk,
        'Beta': beta,
        'Alpha_excesos': alpha,
        'R2': r2,
        'N_obs': nobs,
        'E_Ri_ann_geom': E_Ri_ann,
        'E_Ri_CAPM': E_Ri_capm,
        'Mispricing': mispricing
    })

df_points = pd.DataFrame(rows)
if df_points.empty:
    raise RuntimeError("No hay puntos para graficar (β anual). Amplía el rango o revisa cobertura.")
df_points = df_points.sort_values('Beta').reset_index(drop=True)

# -----------------------------
# Clasificación TP / FP / FN / TN
# -----------------------------
above_mean = df_points['E_Ri_ann_geom'] >= E_Rm_ann
above_sml  = df_points['E_Ri_ann_geom'] >= df_points['E_Ri_CAPM']

def _class_row(am, asml):
    if am and asml:        return 'TP'  # arriba media y arriba SML
    if am and not asml:    return 'FP'  # arriba media pero abajo SML
    if (not am) and asml:  return 'FN'  # abajo media pero arriba SML
    return 'TN'                           # abajo media y abajo SML

df_points['Class'] = [_class_row(am, asml) for am, asml in zip(above_mean, above_sml)]

# Colores y símbolos por clase
class_color  = {'TP':'#2ca02c', 'FP':'#ff7f0e', 'FN':'#1f77b4', 'TN':'#d62728'}
class_symbol = {'TP':'circle',  'FP':'diamond', 'FN':'triangle-up', 'TN':'x'}

# Conteos para anotación
cnt = df_points['Class'].value_counts().to_dict()
cTP = cnt.get('TP', 0); cFP = cnt.get('FP', 0); cFN = cnt.get('FN', 0); cTN = cnt.get('TN', 0)

# -----------------------------
# SML y rangos de ejes (forzamos que arranque en β=0)
# -----------------------------
beta_min = 0.0
beta_max = max(1.5, float(df_points['Beta'].max() + 0.2))
betas_line = np.linspace(beta_min, beta_max, 200)
E_R_line   = rf_anual + market_premium * betas_line

y_vals = np.concatenate([
    df_points['E_Ri_ann_geom'].values,
    [rf_anual, E_Rm_ann, E_R_line.min(), E_R_line.max()]
])
y_min = float(np.nanmin(y_vals)) - 0.03
y_max = float(np.nanmax(y_vals)) + 0.03

# -----------------------------
# GRÁFICO Plotly — SML ANUAL (uniforme con el mensual)
# -----------------------------
fig = go.Figure()

# SML (desde β=0)
fig.add_trace(go.Scatter(
    x=betas_line, y=E_R_line, mode='lines',
    line=dict(width=3),
    showlegend=False,
    name='SML'
))

# Y medio del mercado (E[Rm]) — etiqueta a la DERECHA
fig.add_hline(
    y=E_Rm_ann, line_dash="dash", line_width=2,
    annotation_text=f"Ȳ = E[Rm] {E_Rm_ann:.2%}",
    annotation_position="top right", annotation_yshift=6
)

# Rf (β=0)
fig.add_trace(go.Scatter(
    x=[0], y=[rf_anual], mode='markers',
    marker=dict(symbol='x', size=12, line=dict(width=1.5)),
    showlegend=False,
    name='Rf',
    hovertemplate="<b>Activo sin riesgo</b><br>β=0.00<br>Rendimiento= %{y:.2%}<extra></extra>"
))

# Mercado (β=1)
fig.add_trace(go.Scatter(
    x=[1], y=[E_Rm_ann], mode='markers',
    marker=dict(size=12, line=dict(width=1.5)),
    showlegend=False,
    name='Mercado',
    hovertemplate="<b>Mercado</b><br>β=1.00<br>E[Rm]= %{y:.2%}<extra></extra>"
))

# Activos por clase (4 trazas)
for cls in ['TP','FP','FN','TN']:
    sub = df_points[df_points['Class'] == cls]
    if sub.empty:
        continue
    fig.add_trace(go.Scatter(
        x=sub['Beta'],
        y=sub['E_Ri_ann_geom'],
        mode='markers',
        marker=dict(
            size=10,
            symbol=class_symbol[cls],
            color=class_color[cls],
            line=dict(width=1, color='rgba(0,0,0,0.45)')
        ),
        showlegend=False,
        name=f'Activos {cls}',
        hovertemplate=(
            "<b>%{customdata[0]}</b><br>"
            f"Clase = {cls} "
            "(%{customdata[8]})<br>"
            f"n = %{{customdata[7]}} {periodo_label}<br>"
            "β = %{x:.3f} | E[Ri] (geom, anual) = %{y:.2%}<br>"
            "E[Rm] = %{customdata[1]:.2%} | Rf = %{customdata[2]:.2%}<br>"
            "<b>CAPM</b>: E[Ri]_CAPM = Rf + β·(E[Rm]−Rf) = "
            "%{customdata[2]:.2%} + %{x:.3f}·(%{customdata[1]:.2%} − %{customdata[2]:.2%}) "
            "= %{customdata[3]:.2%}<br>"
            "α (excesos) = %{customdata[4]:.2%} | R² = %{customdata[5]:.3f}<br>"
            "Mispricing = E[Ri] − E[Ri]_CAPM = %{customdata[6]:.2%}"
            "<extra></extra>"
        ),
        customdata=np.stack([
            sub['Ticker'],
            np.full(len(sub), E_Rm_ann),
            np.full(len(sub), rf_anual),
            sub['E_Ri_CAPM'],
            sub['Alpha_excesos'],
            sub['R2'],
            sub['Mispricing'],
            sub['N_obs'],
            np.where(
                (sub['E_Ri_ann_geom'] >= E_Rm_ann) & (sub['E_Ri_ann_geom'] >= sub['E_Ri_CAPM']),
                "↑media & ↑SML",
                np.where(
                    (sub['E_Ri_ann_geom'] >= E_Rm_ann) & (sub['E_Ri_ann_geom'] < sub['E_Ri_CAPM']),
                    "↑media & ↓SML",
                    np.where(
                        (sub['E_Ri_ann_geom'] < E_Rm_ann) & (sub['E_Ri_ann_geom'] >= sub['E_Ri_CAPM']),
                        "↓media & ↑SML",
                        "↓media & ↓SML"
                    )
                )
            )
        ], axis=-1)
    ))

# Etiquetas
fig.add_trace(go.Scatter(
    x=df_points['Beta'],
    y=df_points['E_Ri_ann_geom'],
    mode='text',
    text=df_points['Ticker'],
    textposition='top center',
    textfont=dict(size=10),
    showlegend=False,
    name='Etiquetas'
))

# Caja de FÓRMULA
nota_formula = (
    "CAPM:  E[Ri] = Rf + β·(E[Rm]−Rf)"
    f"<br>Rf = {rf_anual:.2%} · E[Rm] (geom) = {E_Rm_ann:.2%}"
    f"<br>Prima de mercado = {market_premium:.2%}"
)
fig.add_annotation(
    xref="paper", yref="paper",
    x=0.99, y=0.98, xanchor="right", yanchor="top",
    text=nota_formula, showarrow=False, align="right",
    bordercolor="rgba(0,0,0,0.15)", borderwidth=1,
    bgcolor="rgba(255,255,255,0.90)", font=dict(size=11),
    borderpad=6
)

# Caja de CLASES
class_color  = {'TP':'#2ca02c', 'FP':'#ff7f0e', 'FN':'#1f77b4', 'TN':'#d62728'}
nota_clases = (
    "<b>Clases</b>: "
    f"<span style='color:{class_color['TP']}'>■ TP</span> "
    f"<span style='color:{class_color['FP']}'>■ FP</span> "
    f"<span style='color:{class_color['FN']}'>■ FN</span> "
    f"<span style='color:{class_color['TN']}'>■ TN</span>"
    f"<br>TP={cTP} · FP={cFP} · FN={cFN} · TN={cTN}"
)
fig.add_annotation(
    xref="paper", yref="paper",
    x=0.98, y=0.06,
    xanchor="right", yanchor="bottom",
    text=nota_clases, showarrow=False, align="right",
    bordercolor="rgba(0,0,0,0.15)", borderwidth=1,
    bgcolor="rgba(255,255,255,0.90)", font=dict(size=11),
    borderpad=6
)

# Layout
fig.update_layout(
    template='simple_white',
    title=("SML (CAPM):  E[Ri] = Rf + β(E[Rm]−Rf)  ·  "
           f"Periodo: {start_date} → {end_date}  ·  Frecuencia β: {freq_label}"),
    xaxis_title="Beta (β)",
    yaxis_title="Rendimiento esperado ANUAL",
    margin=dict(l=40, r=60, t=90, b=80),
    hovermode='closest',
    showlegend=False
)

# Ejes
beta_min = 0.0
beta_max = max(1.5, float(df_points['Beta'].max() + 0.2))
E_R_line = rf_anual + (E_Rm_ann - rf_anual) * np.linspace(beta_min, beta_max, 200)
y_vals = np.concatenate([
    df_points['E_Ri_ann_geom'].values,
    [rf_anual, E_Rm_ann, E_R_line.min(), E_R_line.max()]
])
y_min = float(np.nanmin(y_vals)) - 0.03
y_max = float(np.nanmax(y_vals)) + 0.03

fig.update_xaxes(range=[beta_min, beta_max], zeroline=True, zerolinewidth=1, zerolinecolor="#B0B0B0",
                 showline=True, linecolor="#888", dtick=0.25)
fig.update_yaxes(range=[y_min, y_max], zeroline=True, zerolinewidth=1, zerolinecolor="#B0B0B0",
                 showline=True, linecolor="#888", tickformat=".0%")

fig.show()


  panel_y = panel.resample('Y').apply(last_valid)


Chequeo SML: y(β=0) = 3.00% (Rf = 3.00%)
