In [None]:
# ============================================================
# FF5 Factor Backtesting — FINAL (PANDAS-SAFE)
# ============================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from pathlib import Path

# ------------------------------------------------------------
# Configuration
# ------------------------------------------------------------
ROLLING_WINDOW = 36
INITIAL_CAPITAL = 1000
ANNUALIZATION = 12

FACTORS = ["SMB", "HML", "RMW", "CMA"]
MARKET = "MKT"

COLORS = {
    "MKT": "#1f77b4",
    "SMB": "#ff7f0e",
    "HML": "#2ca02c",
    "RMW": "#d62728",
    "CMA": "#9467bd",
}

# ------------------------------------------------------------
# Paths
# ------------------------------------------------------------
BASE = Path(
    "/PC/data/processed/Factor Backtesting"
)

FIGURES = BASE / "figures"
TABLES = BASE / "tables"

for p in [
    FIGURES / "pnl",
    FIGURES / "rolling_beta",
    FIGURES / "rolling_alpha",
    FIGURES / "correlations",
    TABLES / "pnl",
    TABLES / "regressions",
    TABLES / "summary",
]:
    p.mkdir(parents=True, exist_ok=True)

# ------------------------------------------------------------
# Styling
# ------------------------------------------------------------
sns.set_theme(style="white")
plt.rcParams.update({
    "font.size": 10,
    "axes.titlesize": 11,
    "axes.labelsize": 10
})

def clean_axes(ax):
    ax.grid(False)
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)

# ------------------------------------------------------------
# Load data
# ------------------------------------------------------------
df = pd.read_csv(
    "/PC/data/processed/ff5_monthly_clean.csv",
    parse_dates=["Date"],
    index_col="Date"
)

# ------------------------------------------------------------
# Build P&L series
# ------------------------------------------------------------
pnl = (1 + df[[MARKET] + FACTORS]).cumprod() * INITIAL_CAPITAL

# ------------------------------------------------------------
# Summary performance table
# ------------------------------------------------------------
summary_rows = []

for f in [MARKET] + FACTORS:
    r = df[f]
    pnl_f = pnl[f]

    vol = r.std() * np.sqrt(ANNUALIZATION)
    sharpe = (r.mean() / r.std()) * np.sqrt(ANNUALIZATION)
    drawdown = (pnl_f / pnl_f.cummax() - 1).min()

    summary_rows.append([
        f,
        round(pnl_f.iloc[-1], 0),
        round(vol, 2),
        round(sharpe, 2),
        round(drawdown, 2),
    ])

summary = pd.DataFrame(
    summary_rows,
    columns=["Factor", "Final Value", "Volatility", "Sharpe", "Max Drawdown"]
)

summary.to_latex(
    TABLES / "summary/performance_summary.tex",
    index=False,
    float_format="%.2f"
)

# ------------------------------------------------------------
# Per-factor analysis
# ------------------------------------------------------------
for f in FACTORS:

    # =======================
    # P&L Plot
    # =======================
    fig, ax = plt.subplots(figsize=(8, 4))
    ax.plot(pnl[f], color=COLORS[f], lw=2, label=f)
    ax.plot(pnl[MARKET], color=COLORS[MARKET], ls="--", alpha=0.6, label="Market")
    ax.set_title(f"{f} — P&L ($1000 base)")
    ax.annotate(
        f"${pnl[f].iloc[-1]:.0f}",
        xy=(pnl.index[-1], pnl[f].iloc[-1]),
        xytext=(6, 0),
        textcoords="offset points",
        color="green",
        va="center"
    )
    ax.legend()
    clean_axes(ax)
    plt.tight_layout()
    plt.savefig(FIGURES / "pnl" / f"pnl_{f}.png", dpi=300)
    plt.close()

    # =======================
    # P&L Table
    # =======================
    pnl_table = pd.DataFrame({
        "Metric": ["Final Value", "Volatility", "Sharpe", "Max Drawdown"],
        "Value": [
            round(pnl[f].iloc[-1], 0),
            round(df[f].std() * np.sqrt(ANNUALIZATION), 2),
            round((df[f].mean() / df[f].std()) * np.sqrt(ANNUALIZATION), 2),
            round((pnl[f] / pnl[f].cummax() - 1).min(), 2)
        ]
    })

    pnl_table.to_latex(
        TABLES / "pnl" / f"pnl_{f}.tex",
        index=False
    )

    # =======================
    # Rolling Beta & Alpha
    # =======================
    betas, alphas, dates = [], [], []

    for i in range(ROLLING_WINDOW, len(df)):
        y = df[f].iloc[i-ROLLING_WINDOW:i]
        X = sm.add_constant(df[MARKET].iloc[i-ROLLING_WINDOW:i])
        res = sm.OLS(y, X).fit()

        betas.append(res.params[MARKET])
        alphas.append(res.params["const"])
        dates.append(df.index[i])

    betas = pd.Series(betas, index=dates)
    alphas = pd.Series(alphas, index=dates)

    for series, name in [(betas, "beta"), (alphas, "alpha")]:
        fig, ax = plt.subplots(figsize=(8, 4))
        ax.plot(series, color=COLORS[f], lw=2)
        ax.set_title(f"{f} — Rolling {name.capitalize()} (36m)")
        ax.annotate(
            f"{name[0]} = {series.iloc[-1]:.3f}",
            xy=(series.index[-1], series.iloc[-1]),
            xytext=(6, 0),
            textcoords="offset points",
            color="green",
            va="center"
        )
        clean_axes(ax)
        plt.tight_layout()
        plt.savefig(FIGURES / f"rolling_{name}" / f"{f}_rolling_{name}.png", dpi=300)
        plt.close()

    # =======================
    # Regression diagnostics
    # =======================
    X = sm.add_constant(df[MARKET])
    model = sm.OLS(df[f], X).fit()

    reg = pd.DataFrame({
        "Coefficient": model.params,
        "t-stat": model.tvalues,
        "p-value": model.pvalues
    })

    reg["R2"] = model.rsquared
    reg["N"] = int(model.nobs)

    reg.to_latex(
        TABLES / "regressions" / f"regression_{f}.tex",
        float_format="%.4f"
    )

# ------------------------------------------------------------
# Correlation matrix
# ------------------------------------------------------------
corr = df[[MARKET] + FACTORS].corr()

fig, ax = plt.subplots(figsize=(6, 5))
sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", ax=ax)
ax.xaxis.tick_bottom()
clean_axes(ax)
plt.tight_layout()
plt.savefig(FIGURES / "correlations/correlation_matrix.png", dpi=300)
plt.close()

print("✔ Fully fixed factor backtesting complete.")
