In [1]:
# imports and paths 

import os
import pickle
import numpy as np
import pandas as pd

pd.set_option("display.width", 120)

PROJECT_ROOT = os.path.abspath("..")
DATA_RAW = os.path.join(PROJECT_ROOT, "data", "raw")
DATA_PROCESSED = os.path.join(PROJECT_ROOT, "data", "processed")
RESULTS_DIR = os.path.join(PROJECT_ROOT, "experiments", "classical")

os.makedirs(RESULTS_DIR, exist_ok=True)


In [2]:
# Load data and covariances 

prices = pd.read_csv(os.path.join(DATA_RAW, "prices.csv"), index_col=0, parse_dates=True)
returns = pd.read_csv(os.path.join(DATA_PROCESSED, "returns.csv"), index_col=0, parse_dates=True)

with open(os.path.join(DATA_PROCESSED, "covariance_matrices.pkl"), "rb") as f:
    scenario_covs = pickle.load(f)

prices.shape, returns.shape


((1270, 25), (1269, 25))

In [3]:
# Reblance dates

rebalance_dates = returns.resample("M").last().index
rebalance_dates[:5], len(rebalance_dates)


  rebalance_dates = returns.resample("M").last().index


(DatetimeIndex(['2021-01-31', '2021-02-28', '2021-03-31', '2021-04-30', '2021-05-31'], dtype='datetime64[ns]', name='Date', freq='ME'),
 61)

In [4]:
# define constraints and cost model 

CONSTRAINTS = {
    "cardinality_max": 10,
    "turnover_max": 0.20
}

TRANSACTION_COST_RATE = 0.001  # 10 bps
CONSTRAINTS, TRANSACTION_COST_RATE


({'cardinality_max': 10, 'turnover_max': 0.2}, 0.001)

In [5]:
# Helper functions 

def portfolio_variance(weights, cov):
    return weights.T @ cov.values @ weights

def portfolio_turnover(w_old, w_new):
    return np.sum(np.abs(w_new - w_old))

def transaction_cost(w_old, w_new, cost_rate):
    return cost_rate * portfolio_turnover(w_old, w_new)


In [6]:
# classical greddy and rebalancer 

def classical_rebalance(date, prev_weights):
    cov = scenario_covs[date]["base"]
    vols = np.sqrt(np.diag(cov.values))
    
    ranked_assets = np.argsort(vols)
    selected = ranked_assets[:CONSTRAINTS["cardinality_max"]]
    
    new_weights = np.zeros_like(prev_weights)
    new_weights[selected] = 1.0 / len(selected)
    
    turnover = portfolio_turnover(prev_weights, new_weights)
    cost = transaction_cost(prev_weights, new_weights, TRANSACTION_COST_RATE)
    var = portfolio_variance(new_weights, cov)
    
    return {
        "weights": new_weights,
        "variance": var,
        "turnover": turnover,
        "cost": cost,
        "selected_assets": selected
    }


In [7]:
# Run baseline across all rebalance dates

n_assets = returns.shape[1]
prev_weights = np.ones(n_assets) / n_assets

results = []

for date in rebalance_dates:
    if date not in scenario_covs:
        continue
        
    res = classical_rebalance(date, prev_weights)
    res["date"] = date
    results.append(res)
    
    prev_weights = res["weights"]


In [8]:
# collect results table

results_df = pd.DataFrame([{
    "date": r["date"],
    "variance": r["variance"],
    "volatility": np.sqrt(r["variance"] * 252),
    "turnover": r["turnover"],
    "transaction_cost": r["cost"]
} for r in results])

results_df.head()


Unnamed: 0,date,variance,volatility,turnover,transaction_cost
0,2021-04-30,4.6e-05,0.10806,1.2,0.0012
1,2021-06-30,4.1e-05,0.101328,0.4,0.0004
2,2021-08-31,3.3e-05,0.090717,0.6,0.0006
3,2021-09-30,3.7e-05,0.096302,0.4,0.0004
4,2021-11-30,4.2e-05,0.102956,0.4,0.0004


In [9]:
# sanity check 

results_df.describe()


Unnamed: 0,date,variance,volatility,turnover,transaction_cost
count,41,41.0,41.0,41.0,41.0
mean,2023-08-26 11:42:26.341463296,6.2e-05,0.119143,0.37561,0.000376
min,2021-04-30 00:00:00,2.4e-05,0.077391,0.0,0.0
25%,2022-06-30 00:00:00,3.3e-05,0.090717,0.2,0.0002
50%,2023-07-31 00:00:00,4.3e-05,0.103572,0.4,0.0004
75%,2024-10-31 00:00:00,6.9e-05,0.131791,0.4,0.0004
max,2025-12-31 00:00:00,0.000184,0.215304,1.2,0.0012
std,,4.5e-05,0.039611,0.205889,0.000206


In [10]:
# save classical results 

results_df.to_csv(os.path.join(RESULTS_DIR, "results.csv"), index=False)

"Classical baseline results saved"


'Classical baseline results saved'