# Quantitative Finance Final Project  
## Barry Cox Price Momentum Longâ€“Short Portfolio

Fully self-contained: run cells from top to bottom.

In [None]:
# Install required packages if missing
import sys, subprocess

packages = ["yfinance", "pandas", "numpy", "scipy", "matplotlib"]
for pkg in packages:
    try:
        __import__(pkg)
    except ImportError:
        subprocess.check_call([sys.executable, "-m", "pip", "install", pkg])

In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
import datetime as dt
from scipy import stats
import matplotlib.pyplot as plt

plt.style.use("default")
pd.set_option("display.max_rows", 50)
pd.set_option("display.max_columns", 20)

In [None]:
ETF_TICKER = "SPY"
YEARS_LOOKBACK = 5
TRADING_DAYS_PER_YEAR = 252
DAYS_LOOKBACK = YEARS_LOOKBACK * TRADING_DAYS_PER_YEAR

REBALANCE_FREQ = "M"
N_LONG = 15
N_SHORT = 15
VOL_LOOKBACK_DAYS = 60

end_date = dt.date.today()
start_date = end_date - dt.timedelta(days=int(DAYS_LOOKBACK * 1.1))

# Universe of 120 large-cap US stocks (acts as ETF constituents)
tickers = [
"AAPL","MSFT","AMZN","GOOGL","GOOG","META","NVDA","TSLA","BRK-B","JNJ",
"JPM","V","PG","XOM","HD","CVX","MA","LLY","ABBV","PFE",
"KO","PEP","BAC","AVGO","COST","MCD","WMT","ACN","CSCO","ADBE",
"CRM","NFLX","INTC","T","VZ","ABT","CMCSA","WFC","TXN","LIN",
"MS","DIS","IBM","ORCL","NKE","PM","UPS","UNH","MDT","BA",
"GE","CAT","AMGN","HON","LOW","CVS","SBUX","GS","BLK","BKNG",
"NOW","ISRG","LMT","AMD","DE","AMAT","SPGI","SYK","GILD","CI",
"CB","MMC","ADI","PNC","ADP","MDLZ","USB","DUK","SO","SCHW",
"MO","TGT","PLD","CL","FIS","AON","CCI","BDX","ZTS","NSC",
"HUM","AEP","EQIX","ICE","REGN","MAR","EL","ETN","PSA","KMB",
"FDX","HCA","GM","DAL","ROST","EXC","EMR","APD","EOG","SLB",
"OXY","COP","MPC","PSX","ADM","AFL","ALL","AIG","DOW","WY"
]

holdings = pd.DataFrame({"Symbol": tickers})
print("Number of ETF constituents:", len(tickers))
holdings.head()

In [None]:
all_tickers = tickers + [ETF_TICKER]

data = yf.download(
    tickers=all_tickers,
    start=start_date,
    end=end_date,
    auto_adjust=False,
    progress=True,
    group_by="ticker",
)

# Normalize columns to (ticker, field) multi-index
if isinstance(data.columns, pd.MultiIndex):
    if set(data.columns.levels[0]) & {"Adj Close", "Close", "Open", "High", "Low"}:
        data = data.swaplevel(axis=1).sort_index(axis=1)
else:
    raise ValueError("Unexpected data format from yfinance.")

data = data.sort_index()

available_tickers = sorted({t for (t, field) in data.columns})
tickers = [t for t in tickers if t in available_tickers]

print("Tickers requested:", len(holdings))
print("Tickers successfully downloaded:", len(tickers))

close = data.xs("Close", axis=1, level=1)[tickers + [ETF_TICKER]]
volume = data.xs("Volume", axis=1, level=1)[tickers + [ETF_TICKER]]

close.tail()

In [None]:
def rolling_slope(log_prices: pd.Series, window: int) -> pd.Series:
    y = log_prices.values
    n = len(y)
    x = np.arange(n)
    slopes = np.full(n, np.nan, dtype=float)
    for i in range(window - 1, n):
        y_window = y[i - window + 1 : i + 1]
        x_window = x[i - window + 1 : i + 1]
        slope, _, _, _, _ = stats.linregress(x_window, y_window)
        slopes[i] = slope
    return pd.Series(slopes, index=log_prices.index)


def compute_price_momentum_factors(close: pd.DataFrame,
                                   volume: pd.DataFrame,
                                   lag_days: int = 20) -> pd.DataFrame:
    prices = close.sort_index()
    vols = volume.sort_index().reindex(prices.index)

    prices = prices[sorted(prices.columns)]
    vols = vols[prices.columns]

    log_prices = np.log(prices)

    window_52w = 252
    window_39w = 195
    window_51w = 255
    window_4w = 20
    window_260d = 260

    factor_panels = {}

    slope_df = pd.DataFrame(index=prices.index, columns=prices.columns, dtype=float)
    for ticker in prices.columns:
        slope_df[ticker] = rolling_slope(log_prices[ticker], window_52w)
    factor_panels["slope_52w"] = slope_df.shift(lag_days)

    rolling_min_260 = prices.rolling(window_260d, min_periods=150).min()
    ref_price = prices.shift(lag_days)
    pct_above_260 = ref_price / rolling_min_260.shift(lag_days) - 1.0
    factor_panels["pct_above_260d_low"] = pct_above_260

    price_4w_ago = prices.shift(lag_days + window_4w)
    price_52w_ago = prices.shift(lag_days + window_52w)
    ret_4w = (ref_price / price_4w_ago) - 1.0
    ret_52w = (ref_price / price_52w_ago) - 1.0
    osc_4_52 = ret_4w - ret_52w
    factor_panels["osc_4w_52w"] = osc_4_52

    price_39w_ago = prices.shift(lag_days + window_39w)
    ret_39w = (ref_price / price_39w_ago) - 1.0
    factor_panels["ret_39w"] = ret_39w

    vpt = pd.DataFrame(index=prices.index, columns=prices.columns, dtype=float)
    vpt.iloc[0] = 0.0
    for t in range(1, len(prices)):
        prev_price = prices.iloc[t - 1]
        curr_price = prices.iloc[t]
        curr_vol = vols.iloc[t]
        change_ratio = (curr_price - prev_price) / prev_price.replace(0, np.nan)
        vpt.iloc[t] = vpt.iloc[t - 1] + curr_vol * change_ratio
    vpt_change = vpt - vpt.shift(window_51w)
    factor_panels["vpt_51w"] = vpt_change.shift(lag_days)

    factor_frames = []
    for name, df in factor_panels.items():
        df_cols = pd.MultiIndex.from_product([[name], df.columns])
        frame = pd.DataFrame(df.values, index=df.index, columns=df_cols)
        factor_frames.append(frame)

    factors = pd.concat(factor_frames, axis=1).sort_index(axis=1)
    return factors

In [None]:
asset_close = close[tickers]
asset_volume = volume[tickers]

factors = compute_price_momentum_factors(asset_close, asset_volume)
factors.tail()

In [None]:
def cross_sectional_zscore(row: pd.Series) -> pd.Series:
    valid = row.replace([np.inf, -np.inf], np.nan).dropna()
    if len(valid) < 5:
        return pd.Series(index=row.index, dtype=float)
    mean = valid.mean()
    std = valid.std(ddof=0)
    if std == 0 or np.isnan(std):
        return pd.Series(index=row.index, dtype=float)
    return (row - mean) / std


def compute_monthly_scores(factors: pd.DataFrame,
                           tickers: list,
                           freq: str = "M") -> pd.DataFrame:
    factor_names = sorted(set(factors.columns.get_level_values(0)))
    monthly_factors = factors.resample(freq).last()
    scores_list = []

    for date, row in monthly_factors.iterrows():
        z_per_factor = []
        for fname in factor_names:
            sub = row[fname]
            z = cross_sectional_zscore(sub)
            z_per_factor.append(z)
        if z_per_factor:
            z_stack = pd.concat(z_per_factor, axis=1)
            composite = z_stack.mean(axis=1)
            composite.name = date
            scores_list.append(composite)

    scores = pd.DataFrame(scores_list)
    scores = scores[tickers]
    return scores


monthly_scores = compute_monthly_scores(factors, tickers, freq=REBALANCE_FREQ)
monthly_scores.tail()

In [None]:
def pick_long_short_baskets(scores: pd.Series, n_long: int, n_short: int):
    s = scores.dropna().sort_values(ascending=False)
    long = list(s.head(n_long).index)
    short = list(s.tail(n_short).index)
    return long, short

In [None]:
def backtest_long_short_equal(close: pd.DataFrame,
                              etf_close: pd.Series,
                              monthly_scores: pd.DataFrame,
                              n_long: int,
                              n_short: int) -> dict:
    prices = close[tickers].copy().sort_index()
    etf_prices = etf_close.sort_index().reindex(prices.index).ffill()

    daily_ret = prices.pct_change()
    etf_daily_ret = etf_prices.pct_change()

    rebalance_dates = monthly_scores.dropna(how="all").index

    basket_records = []
    long_ret_series = pd.Series(0.0, index=prices.index)
    short_ret_series = pd.Series(0.0, index=prices.index)

    for i, date in enumerate(rebalance_dates[:-1]):
        next_date = rebalance_dates[i + 1]
        scores = monthly_scores.loc[date]
        long, short = pick_long_short_baskets(scores, n_long, n_short)

        period_mask = (daily_ret.index > date) & (daily_ret.index <= next_date)
        period_idx = daily_ret.index[period_mask]
        if len(period_idx) == 0:
            continue

        w_long = 1.0 / n_long if n_long > 0 else 0.0
        w_short = -1.0 / n_short if n_short > 0 else 0.0

        long_ret_period = daily_ret.loc[period_idx, long].mean(axis=1) * (n_long * w_long)
        short_ret_period = daily_ret.loc[period_idx, short].mean(axis=1) * (n_short * w_short)

        long_ret_series.loc[period_idx] = long_ret_period
        short_ret_series.loc[period_idx] = short_ret_period

        basket_records.append({
            "rebalance_date": date,
            "next_date": next_date,
            "long": long,
            "short": short
        })

    long_short_ret = long_ret_series + short_ret_series

    daily = pd.DataFrame({
        "long": long_ret_series,
        "short": short_ret_series,
        "long_short": long_short_ret,
        "etf": etf_daily_ret
    }).dropna(how="all")

    monthly = daily.resample("M").apply(lambda x: (1 + x).prod() - 1)
    baskets = pd.DataFrame(basket_records).set_index("rebalance_date")

    return {"daily": daily, "monthly": monthly, "baskets": baskets}


etf_close = close[ETF_TICKER]
results_equal = backtest_long_short_equal(close, etf_close, monthly_scores, N_LONG, N_SHORT)

daily_equal = results_equal["daily"]
monthly_equal = results_equal["monthly"]
baskets_equal = results_equal["baskets"]

monthly_equal.tail()

In [None]:
def compute_inverse_vol_weights(ret_window: pd.DataFrame) -> pd.Series:
    vol = ret_window.std()
    vol = vol.replace(0, np.nan)
    inv_vol = 1.0 / vol
    inv_vol = inv_vol.replace([np.inf, -np.inf], np.nan).dropna()
    if inv_vol.empty:
        return pd.Series(dtype=float)
    return inv_vol / inv_vol.sum()


def backtest_long_short_optimized(close: pd.DataFrame,
                                  etf_close: pd.Series,
                                  monthly_scores: pd.DataFrame,
                                  n_long: int,
                                  n_short: int,
                                  vol_lookback: int) -> dict:
    prices = close[tickers].copy().sort_index()
    etf_prices = etf_close.sort_index().reindex(prices.index).ffill()

    daily_ret = prices.pct_change()
    etf_daily_ret = etf_prices.pct_change()

    rebalance_dates = monthly_scores.dropna(how="all").index

    basket_records = []
    long_ret_series = pd.Series(0.0, index=prices.index)
    short_ret_series = pd.Series(0.0, index=prices.index)

    for i, date in enumerate(rebalance_dates[:-1]):
        next_date = rebalance_dates[i + 1]
        scores = monthly_scores.loc[date]
        long, short = pick_long_short_baskets(scores, n_long, n_short)

        period_mask = (daily_ret.index > date) & (daily_ret.index <= next_date)
        period_idx = daily_ret.index[period_mask]
        if len(period_idx) == 0:
            continue

        lookback_mask = (daily_ret.index <= date)
        lookback_idx = daily_ret.index[lookback_mask][-vol_lookback:]
        ret_window = daily_ret.loc[lookback_idx]

        long_weights_raw = compute_inverse_vol_weights(ret_window[long])
        short_weights_raw = compute_inverse_vol_weights(ret_window[short])

        if long_weights_raw.empty or short_weights_raw.empty:
            continue

        long_weights = long_weights_raw / long_weights_raw.sum()
        short_weights = short_weights_raw / short_weights_raw.sum()

        long_weights *= 1.0
        short_weights *= -1.0

        long_rets_period = daily_ret.loc[period_idx, long_weights.index]
        short_rets_period = daily_ret.loc[period_idx, short_weights.index]

        long_ret_series.loc[period_idx] = (long_rets_period * long_weights).sum(axis=1)
        short_ret_series.loc[period_idx] = (short_rets_period * short_weights).sum(axis=1)

        basket_records.append({
            "rebalance_date": date,
            "next_date": next_date,
            "long": list(long_weights.index),
            "short": list(short_weights.index),
            "long_weights": long_weights.to_dict(),
            "short_weights": short_weights.to_dict(),
        })

    long_short_ret = long_ret_series + short_ret_series

    daily = pd.DataFrame({
        "long": long_ret_series,
        "short": short_ret_series,
        "long_short": long_short_ret,
        "etf": etf_daily_ret
    }).dropna(how="all")

    monthly = daily.resample("M").apply(lambda x: (1 + x).prod() - 1)
    baskets = pd.DataFrame(basket_records).set_index("rebalance_date")

    return {"daily": daily, "monthly": monthly, "baskets": baskets}


results_opt = backtest_long_short_optimized(
    close,
    etf_close,
    monthly_scores,
    N_LONG,
    N_SHORT,
    VOL_LOOKBACK_DAYS
)

daily_opt = results_opt["daily"]
monthly_opt = results_opt["monthly"]
baskets_opt = results_opt["baskets"]

monthly_opt.tail()

In [None]:
fig, ax = plt.subplots(figsize=(12, 5))
bar_positions = np.arange(len(monthly_equal.index))
ax.bar(bar_positions - 0.15, monthly_equal["long_short"], width=0.3, label="Equal-Weight Long-Short")
ax.bar(bar_positions + 0.15, monthly_equal["etf"], width=0.3, label=f"{ETF_TICKER} ETF")
ax.axhline(0, linewidth=1)
ax.set_xticks(bar_positions)
ax.set_xticklabels([d.strftime("%Y-%m") for d in monthly_equal.index], rotation=90)
ax.set_ylabel("Monthly Return")
ax.set_title("Monthly Portfolio Return vs ETF (Equal-Weight)")
ax.legend()
plt.tight_layout()
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(12, 5))
ax.plot(monthly_equal.index, monthly_equal["long"], marker="o", label="Long Leg (Equal)")
ax.plot(monthly_equal.index, monthly_equal["short"], marker="o", label="Short Leg (Equal)")
ax.plot(monthly_equal.index, monthly_equal["etf"], marker="o", label=f"{ETF_TICKER} ETF")
ax.axhline(0, linewidth=1)
ax.set_ylabel("Monthly Return")
ax.set_title("Monthly Returns: Long vs Short vs ETF (Equal-Weight)")
ax.legend()
plt.tight_layout()
plt.show()

In [None]:
cum_equal = (1 + daily_equal["long_short"].fillna(0)).cumprod()
cum_etf = (1 + daily_equal["etf"].fillna(0)).cumprod()

fig, ax = plt.subplots(figsize=(12, 5))
ax.plot(cum_equal.index, cum_equal, label="Long-Short Portfolio (Equal)")
ax.plot(cum_etf.index, cum_etf, label=f"{ETF_TICKER} ETF")
ax.set_ylabel("Cumulative Growth of $1")
ax.set_title("Cumulative Return: Portfolio vs ETF (Equal-Weight)")
ax.legend()
plt.tight_layout()
plt.show()

In [None]:
cum_opt = (1 + daily_opt["long_short"].fillna(0)).cumprod()
cum_etf = (1 + daily_opt["etf"].fillna(0)).cumprod()

fig, ax = plt.subplots(figsize=(12, 5))
ax.plot(cum_opt.index, cum_opt, label="Long-Short Portfolio (Optimized)")
ax.plot(cum_etf.index, cum_etf, label=f"{ETF_TICKER} ETF")
ax.set_ylabel("Cumulative Growth of $1")
ax.set_title("Cumulative Return: Portfolio vs ETF (Optimized Weights)")
ax.legend()
plt.tight_layout()
plt.show()

In [None]:
def performance_summary(monthly: pd.DataFrame) -> pd.DataFrame:
    out = pd.DataFrame({
        "Mean Monthly Return": monthly.mean(),
        "Std Monthly Return": monthly.std(),
    })
    out["Sharpe (Monthly, rf=0)"] = out["Mean Monthly Return"] / out["Std Monthly Return"]
    return out


summary_equal = performance_summary(monthly_equal)
summary_opt = performance_summary(monthly_opt)

print("Equal-weight portfolio (monthly stats):")
print(summary_equal)
print("\nOptimized portfolio (monthly stats):")
print(summary_opt)

In [None]:
def annualize(monthly: pd.DataFrame) -> pd.DataFrame:
    n_months = len(monthly)
    ann_return = (1 + monthly).prod() ** (12 / n_months) - 1
    ann_vol = monthly.std() * np.sqrt(12)
    sharpe = ann_return / ann_vol
    return pd.DataFrame({
        "Annualized Return": ann_return,
        "Annualized Vol": ann_vol,
        "Sharpe (Annualized, rf=0)": sharpe
    })


annual_equal = annualize(monthly_equal)
annual_opt = annualize(monthly_opt)

print("Equal-weight portfolio (annualized stats):")
print(annual_equal)
print("\nOptimized portfolio (annualized stats):")
print(annual_opt)