In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm
from pathlib import Path

In [None]:
df = pd.read_csv(r"C:\Users\PC\Desktop\Masterarbeit\AI_narrative_index\data\processed\variables\n_articles_log.csv")
df["date"] = df["Date"]
df.to_csv(r"C:\Users\PC\Desktop\Masterarbeit\AI_narrative_index\data\processed\variables\n_articles_log.csv")

In [None]:
# find root 
root = Path.cwd().parent

# define outpath
out_path = root / "reports" / "tables" 

# set start date on 2023-03-31 to enable log-growth computation from 03-04-2023 onwards
start, end = "2023-03-31", "2025-07-16"

# define tickers 
tickers = [
    "AAPL","AIQ","AMD","AMZN","ARKQ","AVGO","BOTZ",
    "GOOGL","IRBO","META","MSFT","NVDA","ROBO","TSLA","TSM"
]

# Prices (Adj Close)
px = yf.download(tickers + ["^GSPC"], start=start, end=end, auto_adjust=False)["Adj Close"]

# Risk-free: 3M T-Bill (^IRX), annualized percent yield
rf = yf.download("^IRX", start=start, end=end, auto_adjust=False)["Adj Close"]


# Asset & market daily log returns from prices
logrets = np.log(px).diff()

# Convert RF
rf_simple_daily = (rf / 100.0) / 252.0
rf_simple_daily = rf_simple_daily.reindex(logrets.index).ffill()
rf_log_daily = np.log1p(rf_simple_daily)

# Compute Excess log returns
excess_mkt_log = logrets["^GSPC"] - rf_log_daily.squeeze()
excess_assets_log = logrets[tickers].sub(rf_log_daily.squeeze(), axis=0)

# Combine and clean
df = pd.concat([excess_assets_log, excess_mkt_log.rename("MKT")], axis=1)
df = df.replace([np.inf, -np.inf], np.nan).dropna(how="any")

# run regressions
results = []
for t in tickers:
    # daily log excess return (asset)
    y = df[t]                  
    # daily log excess return
    X = sm.add_constant(df["MKT"])

    n = y.shape[0]
    # Newey–West lag rule 
    q = int(4 * (n / 100)**(2/9)) if n > 0 else 0

    model = sm.OLS(y, X, missing="drop").fit(
        cov_type="HAC",
        cov_kwds={"maxlags": q, "use_correction": True}
    )

    # daily log excess alpha
    alpha = model.params["const"]       # 
    beta = model.params["MKT"]
    t_alpha = model.tvalues["const"]
    t_beta = model.tvalues["MKT"]
    r2 = model.rsquared
    nobs = int(model.nobs)              

    # Alpha over the whole period: compounded simple %
    alpha_total_comp_pct = np.expm1(alpha * nobs) * 100.0

    results.append((t, alpha_total_comp_pct, beta, t_alpha, t_beta, r2, nobs))


# Table Cols
cols = ["Ticker","Alpha_total_comp_%","Beta","t(Alpha)","t(Beta)","R2","N"]
capm_df = pd.DataFrame(results, columns=cols).set_index("Ticker")

# Formatting
capm_df["Alpha_total_comp_%"] = capm_df["Alpha_total_comp_%"].round(2)
capm_df["Beta"] = capm_df["Beta"].round(3)
capm_df["t(Alpha)"] = capm_df["t(Alpha)"].round(2)
capm_df["t(Beta)"] = capm_df["t(Beta)"].round(2)
capm_df["R2"] = (capm_df["R2"] * 100).round(1)

print(capm_df.sort_values("Beta", ascending=False))

# save CSV
capm_df.to_csv(out_path / "capm_results_compounded_alpha.csv", encoding="utf-8", index=True)


# LaTeX export
latex = capm_df.rename(columns={
    "Alpha_total_comp_%": "$\\alpha$ (total, \\%, compounded)",
    "Beta": "$\\beta$",
    "t(Alpha)": "$t(\\alpha)$",
    "t(Beta)": "$t(\\beta)$",
    "R2": "$R^2$ (\\%)",
    "N": "$N$"
}).to_latex(
    escape=False,
    column_format="lrrrrrr",  # index + 6 numeric cols
    bold_rows=False
)

with open(out_path / "capm_results.tex","w",encoding="utf-8") as f:
    f.write(latex)



In [None]:
def avg_logret_period(df, start, end):
    """Compute mean daily log return (%) for a given subperiod."""
    sub = df.loc[start:end]
    return sub.mean() * 100  # in %

# define periods within overall bounds
periods = {
    "2023": ("2023-03-31", "2023-12-31"),
    "2024": ("2024-01-01", "2024-12-31"),
    "2025": ("2025-01-01", "2025-06-16"),
    "2023-2024": ("2023-03-31", "2024-12-31"),
    "2024-2025": ("2024-01-01", "2025-06-16"),
    "2023-2025": ("2023-03-31", "2025-06-16"),
}

# compute averages
avg_returns = {label: avg_logret_period(logrets, s, e) for label, (s, e) in periods.items()}

# combine to DataFrame (rows = period, columns = tickers + MKT)
avg_daily_pct = pd.DataFrame(avg_returns).T.round(4)

# save to CSV
avg_daily_pct.to_csv(out_path / "avg_daily_log_returns_by_period_pct.csv", encoding="utf-8")

# show summary
print("Average daily log returns (%):")
print(avg_daily_pct)