### Q2.1

In [20]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.stats.sandwich_covariance import cov_hac
import numpy as np

In [21]:
names = pd.read_parquet("C:/Users/ASUS/Desktop/7037/crsp_202501.dsenames.parquet")

spmo_names = (names[names["ticker"]=="SPMO"]
              .sort_values("namedt"))

spmo_names[["permno","ticker","comnam","namedt","nameendt"]].tail(20)

Unnamed: 0,permno,ticker,comnam,namedt,nameendt
16595,15725,SPMO,POWERSHARES E T F TRUST II,2015-10-09,2017-12-25
16596,15725,SPMO,POWERSHARES E T F TRUST II,2017-12-26,2018-06-03
16597,15725,SPMO,INVESCO E T F TRUST II,2018-06-04,2019-09-10
16598,15725,SPMO,INVESCO E T F TRUST II,2019-09-11,2024-12-31


In [22]:
permno = 15725

msf = pd.read_parquet("C:/Users/ASUS/Desktop/7037/crsp_202501.msf.parquet")
msf["dt"] = pd.to_datetime(msf["date"]).dt.to_period("M").dt.to_timestamp("M")
spmo = msf.loc[msf["permno"]==permno, ["dt","ret"]].dropna().copy()

ff4 = pd.read_parquet("C:/Users/ASUS/Desktop/7037/ff.four_factor.parquet").copy()
ff4["dt"] = pd.to_datetime(ff4["dt"]).dt.to_period("M").dt.to_timestamp("M")
ff4 = ff4.drop_duplicates(subset=["dt"], keep="last")  

df = spmo.merge(ff4[["dt","rf","mom"]], on="dt", how="inner").dropna()
df["rex"] = df["ret"] - df["rf"]

X = sm.add_constant(df["mom"])
res = sm.OLS(df["rex"], X).fit()
res_hac = res.get_robustcov_results(cov_type="HAC", maxlags=6)  
print(res_hac.summary())

                            OLS Regression Results                            
Dep. Variable:                    rex   R-squared:                       0.059
Model:                            OLS   Adj. R-squared:                  0.051
Method:                 Least Squares   F-statistic:                     4.649
Date:                Tue, 24 Feb 2026   Prob (F-statistic):             0.0333
Time:                        20:16:18   Log-Likelihood:                 186.21
No. Observations:                 110   AIC:                            -368.4
Df Residuals:                     108   BIC:                            -363.0
Df Model:                           1                                         
Covariance Type:                  HAC                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0133      0.004      3.617      0.0

### Q2.3

In [23]:
cr = msf[["permno", "dt", "ret", "prc", "shrout", "hexcd"]].copy()
cr = cr.dropna(subset=["ret"]).copy()
cr["mktcap"] = cr["prc"].abs() * cr["shrout"]
cr = cr.sort_values(["permno", "dt"])
cr["mktcap_lag1"] = cr.groupby("permno")["mktcap"].shift(1)
cr["logret"] = np.log1p(cr["ret"])
cr.loc[~np.isfinite(cr["logret"]), "logret"] = np.nan
cr["mom_signal"] = (
    cr.groupby("permno")["logret"]
      .transform(lambda s: s.rolling(11, min_periods=11).sum().shift(2))
)
cr["mom_signal"] = np.expm1(cr["mom_signal"])
sig = cr.dropna(subset=["mom_signal", "mktcap_lag1", "hexcd"]).copy()

In [24]:
# Fama-French implementation: assign 4 portfolios
nyse_hexcd = 1
nyse = sig[sig["hexcd"] == nyse_hexcd].copy()
min_n_nyse = 30
    
def quantile(s: pd.Series, q: float) -> float:
    """
    return NaN if too few observations.
    """
    s = s.dropna()
    if len(s) < min_n_nyse:
        return np.nan
    return float(s.quantile(q))

bp = (
    nyse.groupby("dt")
        .agg(
            mom30=("mom_signal", lambda s: quantile(s, 0.30)),
            mom70=("mom_signal", lambda s: quantile(s, 0.70)),
            size50=("mktcap_lag1", lambda s: quantile(s, 0.50)),
        )
        .reset_index()
)

sig = sig.merge(bp, on="dt", how="left")
sig = sig.dropna(subset=["mom30", "mom70", "size50"]).copy()

sig["mom_grp"] = pd.Series(pd.NA, index=sig.index, dtype="object")
sig.loc[sig["mom_signal"] >= sig["mom70"], "mom_grp"] = "MOMUP"
sig.loc[sig["mom_signal"] <= sig["mom30"], "mom_grp"] = "MOMDOWN"
sig = sig.dropna(subset=["mom_grp"]).copy()
sig["size_grp"] = np.where(sig["mktcap_lag1"] <= sig["size50"], "SMALL", "BIG")

In [25]:
def vw_ret(g: pd.DataFrame) -> float: 
    g = g.dropna(subset=["ret", "mktcap_lag1"])
    if g.empty:
        return np.nan
    w = g["mktcap_lag1"].clip(lower=0)
    s = w.sum()
    if s <= 0:
        return np.nan
    return float(np.average(g["ret"], weights=w))

port_ret = (
    sig.groupby(["dt", "size_grp", "mom_grp"])
        .apply(vw_ret)
        .rename("port_ret")
        .reset_index()
)

port_piv = (
    port_ret.assign(port=lambda d: d["size_grp"] + "_" + d["mom_grp"])
            .pivot(index="dt", columns="port", values="port_ret")
            .reset_index()
)

port_piv["momup_ret"]   = 0.5 * (port_piv["SMALL_MOMUP"]   + port_piv["BIG_MOMUP"])
winners_ret = port_piv.merge(ff4[["dt","rf"]], on="dt", how="inner").dropna(subset=["rf", "momup_ret"])
winners_ret["winners_rex"] = winners_ret["momup_ret"] - winners_ret["rf"]

In [26]:
df1 = df.copy()
df1= df1.merge(winners_ret[["dt","winners_rex"]], on="dt", how="inner").dropna()

X = sm.add_constant(df1["winners_rex"])
res_long = sm.OLS(df1["rex"], X).fit()
res_long_hac = res_long.get_robustcov_results(cov_type="HAC", maxlags=6)

print(res_long_hac.summary())

                            OLS Regression Results                            
Dep. Variable:                    rex   R-squared:                       0.728
Model:                            OLS   Adj. R-squared:                  0.725
Method:                 Least Squares   F-statistic:                     172.2
Date:                Tue, 24 Feb 2026   Prob (F-statistic):           4.29e-24
Time:                        20:16:48   Log-Likelihood:                 254.41
No. Observations:                 110   AIC:                            -504.8
Df Residuals:                     108   BIC:                            -499.4
Df Model:                           1                                         
Covariance Type:                  HAC                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const           0.0058      0.002      2.421      

In [27]:
corr_umd = df1["rex"].corr(df1["mom"])
corr_win = df1["rex"].corr(df1["winners_rex"])

te_umd = (df1["rex"] - df1["mom"]).std() * np.sqrt(12)
te_win = (df1["rex"] - df1["winners_rex"]).std() * np.sqrt(12)

print("\n===== Tracking comparison =====")
print(f"Corr(SPMO, UMD)     = {corr_umd:.4f} | TE = {te_umd:.4f}")
print(f"Corr(SPMO, Winners) = {corr_win:.4f} | TE = {te_win:.4f}")

summary = pd.DataFrame({
    "Model": ["SPMO ~ UMD", "SPMO ~ Winners (Long Leg)"],
    "Beta":  [res_hac.params[1],  res_long_hac.params[1]],
    "Beta t-stat": [res_hac.tvalues[1], res_long_hac.tvalues[1]],
    "Alpha (ann.)": [res_hac.params[0]*12, res_long_hac.params[0]*12],
    "Alpha t-stat": [res_hac.tvalues[0],   res_long_hac.tvalues[0]],
    "R²":    [res_hac.rsquared,   res_long_hac.rsquared],
    "Corr w/ SPMO": [corr_umd, corr_win],
    "Tracking Error (ann.)": [te_umd, te_win],
})
summary = summary.set_index("Model").T
print("\n========== Regression Comparison ==========")
print(summary.to_string(float_format="{:.4f}".format))


===== Tracking comparison =====
Corr(SPMO, UMD)     = 0.2439 | TE = 0.1553
Corr(SPMO, Winners) = 0.8531 | TE = 0.0888

Model                  SPMO ~ UMD  SPMO ~ Winners (Long Leg)
Beta                       1.3706                     0.8169
Beta t-stat                2.1562                    13.1212
Alpha (ann.)               0.1601                     0.0697
Alpha t-stat               3.6175                     2.4215
R²                         0.0595                     0.7279
Corr w/ SPMO               0.2439                     0.8531
Tracking Error (ann.)      0.1553                     0.0888


### Q2.4

In [28]:
ff5 = pd.read_parquet("C:/Users/ASUS/Desktop/7037/ff.five_factor.parquet").copy()
ff5["dt"] = pd.to_datetime(ff5["dt"]).dt.to_period("M").dt.to_timestamp("M")
ff5 = ff5.drop_duplicates(subset=["dt"], keep="last")
df2 = df.merge(ff5[["dt","mkt_rf","smb","hml","rmw","cma"]], on="dt", how="inner").dropna()

factors = ["mkt_rf", "smb", "hml", "mom","rmw","cma"]

X = sm.add_constant(df2[factors])
res2 = sm.OLS(df2["rex"], X).fit()
res2_hac = res2.get_robustcov_results(cov_type="HAC", maxlags=6)
print(res2_hac.summary())

                            OLS Regression Results                            
Dep. Variable:                    rex   R-squared:                       0.186
Model:                            OLS   Adj. R-squared:                  0.139
Method:                 Least Squares   F-statistic:                     4.412
Date:                Tue, 24 Feb 2026   Prob (F-statistic):           0.000522
Time:                        20:16:48   Log-Likelihood:                 194.18
No. Observations:                 110   AIC:                            -374.4
Df Residuals:                     103   BIC:                            -355.5
Df Model:                           6                                         
Covariance Type:                  HAC                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0154      0.004      3.511      0.0

### Q2.5

In [29]:
def mom_etf(permno):

    etf = msf.loc[msf["permno"]==permno, ["dt","ret"]].dropna().copy()

    df_etf = (
        etf
        .merge(ff4[["dt","rf","mom"]], on="dt", how="inner")
        .merge(ff5[["dt","mkt_rf","smb","hml","rmw","cma"]], on="dt", how="inner")
        .dropna()
    )
    df_etf["rex"] = df_etf["ret"] - df_etf["rf"]
    factors = ["mkt_rf", "smb", "hml", "mom","rmw","cma"]

    X = sm.add_constant(df_etf[factors])
    res_etf = sm.OLS(df_etf["rex"], X).fit()
    res_etf_hac = res_etf.get_robustcov_results(cov_type="HAC", maxlags=6)

    return res_etf_hac

In [30]:
mtum_names = (names[names["ticker"]=="MTUM"]
              .sort_values("namedt"))

mtum_names[["permno","ticker","comnam","namedt","nameendt"]].tail(20)

Unnamed: 0,permno,ticker,comnam,namedt,nameendt
11683,13851,MTUM,ISHARES TRUST,2013-04-18,2017-08-01
11684,13851,MTUM,ISHARES TRUST,2017-08-02,2019-07-02
11685,13851,MTUM,ISHARES TRUST,2019-07-03,2020-09-03
11686,13851,MTUM,ISHARES TRUST,2020-09-04,2024-12-31


In [31]:
print(mom_etf(13851).summary())

                            OLS Regression Results                            
Dep. Variable:                    rex   R-squared:                       0.186
Model:                            OLS   Adj. R-squared:                  0.149
Method:                 Least Squares   F-statistic:                     4.265
Date:                Tue, 24 Feb 2026   Prob (F-statistic):           0.000581
Time:                        20:16:49   Log-Likelihood:                 252.05
No. Observations:                 140   AIC:                            -490.1
Df Residuals:                     133   BIC:                            -469.5
Df Model:                           6                                         
Covariance Type:                  HAC                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0131      0.003      4.298      0.0

In [32]:
vfmo_names = (names[names["ticker"]=="VFMO"]
              .sort_values("namedt"))

vfmo_names[["permno","ticker","comnam","namedt","nameendt"]].tail(20)

Unnamed: 0,permno,ticker,comnam,namedt,nameendt
20786,17622,VFMO,VANGUARD WELLINGTON FUND,2018-02-15,2019-09-11
20787,17622,VFMO,VANGUARD WELLINGTON FUND,2019-09-12,2020-02-10
20788,17622,VFMO,VANGUARD WELLINGTON FUND,2020-02-11,2024-12-31


In [33]:
print(mom_etf(17622).summary())

                            OLS Regression Results                            
Dep. Variable:                    rex   R-squared:                       0.275
Model:                            OLS   Adj. R-squared:                  0.217
Method:                 Least Squares   F-statistic:                     8.973
Date:                Tue, 24 Feb 2026   Prob (F-statistic):           2.20e-07
Time:                        20:16:49   Log-Likelihood:                 129.16
No. Observations:                  82   AIC:                            -244.3
Df Residuals:                      75   BIC:                            -227.5
Df Model:                           6                                         
Covariance Type:                  HAC                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0117      0.005      2.302      0.0