In [1]:
import qmcpy as qp
import pandas as pd
import numpy as np

try:
    import yfinance as yf
except ImportError:
    !pip install -q yfinance
    import yfinance as yf

import seaborn as sn
import matplotlib.pyplot as plt
import time

from scipy.optimize import minimize
import timeit

from pathlib import Path

In [13]:
# make folders `data` and `images`
import os
data_dir = 'data' + os.sep
images_dir = 'images' + os.sep
if not os.path.exists('data'):
    os.makedirs(data_dir)
if not os.path.exists('images'):
    os.makedirs(images_dir)

In [2]:
start_date = '2014-01-01'
#today = date.today()
end_date = '2025-12-13'

In [3]:
def download_data(tickers, description):
    col_names = ['Ticker', 'Company', 'Date', 'Adj Close Price', 'Volume']
    rows = []
    for i, ticker in enumerate(tickers):
        company = description[i]
        data = yf.download(ticker, start=start_date, end=end_date)
        if data.empty:
            print(f"Warning: no data for {ticker}")
            continue
        data = data.reset_index()
        
        # Handle column names robustly (could be MultiIndex or different names)
        cols = list(data.columns)
        
        # Find date column
        date_col = next((c for c in cols if 'Date' in str(c) or 'date' in str(c)), cols[0])
        
        # Find adj close column
        adj_col_candidates = [c for c in cols if 'Adj' in str(c)]
        if not adj_col_candidates:
            adj_col_candidates = [c for c in cols if 'Close' in str(c)]
        
        # Find volume column
        vol_col_candidates = [c for c in cols if 'Volume' in str(c) or 'Vol' in str(c)]
        
        if not adj_col_candidates or not vol_col_candidates:
            raise KeyError(f"Adj Close or Volume column not found for {ticker}. Columns: {cols}")
        
        adj_col_sel = adj_col_candidates[0]
        vol_col_sel = vol_col_candidates[0]
        
        # Select and rename columns
        data = data[[date_col, adj_col_sel, vol_col_sel]].copy()
        data.insert(0, 'Company', company)
        data.insert(0, 'Ticker', ticker)
        data.columns = col_names
        rows.append(data)
    
    df = pd.concat(rows, ignore_index=True)
    return df

In [4]:
def get_log_ret(df):
    stocks = df.pivot(index='Date', columns='Ticker', values='Adj Close Price')
    log_ret = np.log(stocks/stocks.shift(1))

    return log_ret.dropna()

In [5]:
def gen_weights(sampler_type, n_tickers, n_ports, seed=42):
    """
    Generate portfolio weights using specified sampling method.
    
    Parameters:
        sampler_type: str - 'lattice', 'sobol', 'halton', or 'iid'
        n: int - number of assets (dimension)
        num_ports: int - number of portfolios to generate
        seed: int - random seed for reproducibility
    
    Returns:
        weights: ndarray of shape (num_ports, n) with rows summing to 1
    """
    if sampler_type == 'lattice':
        sampler = qp.Lattice(dimension=n_tickers, seed=seed)
        weights = sampler.gen_samples(n_ports)
    elif sampler_type == 'sobol':
        sampler = qp.Sobol(dimension=n_tickers, seed=seed)
        weights = sampler.gen_samples(n_ports)
    elif sampler_type == 'halton':
        sampler = qp.Halton(dimension=n_tickers, seed=seed)
        weights = sampler.gen_samples(n_ports)
    elif sampler_type == 'iid':
        sampler = qp.IIDStdUniform(dimension=n_tickers, seed=seed)
        weights = sampler.gen_samples(n_ports)
    else:
        raise ValueError(f"Unknown sampler type: {sampler_type}. Use 'lattice', 'sobol', 'halton', or 'iid'.")
    
    # Normalize weights to sum to 1 (simplex projection)
    weights /= weights.sum(axis=1, keepdims=True)
    return weights

# Legacy functions for backward compatibility
def gen_weights_lattice(n_tickers, n_ports):
    return gen_weights('lattice', n_tickers, n_ports)

def gen_weights_sobol(n_tickers, n_ports):
    return gen_weights('sobol', n_tickers, n_ports)

def gen_weights_halton(n_tickers, n_ports):
    return gen_weights('halton', n_tickers, n_ports)

def gen_weights_iid(n_tickers, n_ports):
    return gen_weights('iid', n_tickers, n_ports)

In [6]:
def gen_weights_reps(sampler_type, n_tickers, n_ports, replications, seed=42):

    if sampler_type == 'lattice':
        sampler = qp.Lattice(dimension=n_tickers, replications=replications, seed=seed)
        weights = sampler.gen_samples(n_ports)
    elif sampler_type == 'sobol':
        sampler = qp.Sobol(dimension=n_tickers, replications=replications, seed=seed)
        weights = sampler.gen_samples(n_ports)
    elif sampler_type == 'halton':
        sampler = qp.Halton(dimension=n_tickers, replications=replications, seed=seed)
        weights = sampler.gen_samples(n_ports)
    elif sampler_type == 'iid':
        sampler = qp.IIDStdUniform(dimension=n_tickers, replications=replications, seed=seed)
        weights = sampler.gen_samples(n_ports)
    else:
        raise ValueError(f"Unknown sampler type: {sampler_type}. Use 'lattice', 'sobol', 'halton', or 'iid'.")
    
    # Normalize weights to sum to 1 (simplex projection)
    weights /= weights.sum(axis=2, keepdims=True)
    return weights

In [7]:
def sharpe(weights,log_ret):
    # Expected return
    ret_arr = np.sum((log_ret.mean().values * weights * 252), axis=1)

    # Expected volatility
    vol_arr = np.sqrt((weights @ (log_ret.cov().values * 252)) @ weights.T).diagonal()

    # Sharpe Ratio
    sharpe_arr = ret_arr/vol_arr

    # Risk levels
    medium_risk_tolerance = np.quantile(vol_arr, 2/3, axis=0)
    low_risk_tolerance = np.quantile(vol_arr, 1/3, axis=0)

    medium_risk_idx = np.where(vol_arr<medium_risk_tolerance)
    low_risk_idx = np.where(vol_arr<low_risk_tolerance)
    
    # High risk
    high_idx = sharpe_arr.argmax()

    # Medium risk
    medium_rel_idx = sharpe_arr[medium_risk_idx].argmax()
    medium_idx = medium_risk_idx[0][medium_rel_idx]

    # Low risk
    low_rel_idx = sharpe_arr[low_risk_idx].argmax() 
    low_idx = low_risk_idx[0][low_rel_idx]

    return {
        "number of tickers": weights.shape[1],
        "number of portfolios": weights.shape[0],

        "low": np.round(weights[low_idx], 3).tolist(),
        "medium": np.round(weights[medium_idx], 3).tolist(),
        "high": np.round(weights[high_idx], 3).tolist(),

        "low risk Sharpe": np.round(sharpe_arr[low_idx], 3),
        "medium risk Sharpe": np.round(sharpe_arr[medium_idx], 3),
        "high risk Sharpe": np.round(sharpe_arr[high_idx], 3),
    }


In [8]:
sampler_type = 'lattice'
n_tickers = 4
n_ports = 2**3
replications = 2
weights_r = gen_weights_reps(sampler_type, n_tickers, n_ports, replications)
#weights_r.shape #(replications, n_ports, n_tickers)
weights_r

array([[[0.23296728, 0.16628356, 0.41561624, 0.18513292],
        [0.0131603 , 0.38609054, 0.19580926, 0.4049399 ],
        [0.43946917, 0.07226425, 0.39184246, 0.09642413],
        [0.15773506, 0.35399836, 0.11010835, 0.37815823],
        [0.32346945, 0.37202572, 0.15824971, 0.14625512],
        [0.07652209, 0.12507836, 0.40519707, 0.39320248],
        [0.59350848, 0.33005931, 0.04618008, 0.03025213],
        [0.26558007, 0.0021309 , 0.37410849, 0.35818054]],

       [[0.25702139, 0.12968201, 0.40081513, 0.21248148],
        [0.09447934, 0.55091168, 0.33446479, 0.02014419],
        [0.44676378, 0.03686426, 0.37595509, 0.14041687],
        [0.19612041, 0.28750763, 0.12531172, 0.39106024],
        [0.34132723, 0.31117189, 0.16703896, 0.18046192],
        [0.11859681, 0.08844147, 0.38976938, 0.40319234],
        [0.02179448, 0.60217425, 0.16778868, 0.20824259],
        [0.18807254, 0.34556848, 0.22769056, 0.23866841]]])

In [9]:
weights_r[0]

array([[0.23296728, 0.16628356, 0.41561624, 0.18513292],
       [0.0131603 , 0.38609054, 0.19580926, 0.4049399 ],
       [0.43946917, 0.07226425, 0.39184246, 0.09642413],
       [0.15773506, 0.35399836, 0.11010835, 0.37815823],
       [0.32346945, 0.37202572, 0.15824971, 0.14625512],
       [0.07652209, 0.12507836, 0.40519707, 0.39320248],
       [0.59350848, 0.33005931, 0.04618008, 0.03025213],
       [0.26558007, 0.0021309 , 0.37410849, 0.35818054]])

In [10]:
sampler_type = 'lattice'
n_tickers = 4
n_ports = 2**3
weights = gen_weights(sampler_type, n_tickers, n_ports)
weights

array([[0.23296728, 0.16628356, 0.41561624, 0.18513292],
       [0.0131603 , 0.38609054, 0.19580926, 0.4049399 ],
       [0.43946917, 0.07226425, 0.39184246, 0.09642413],
       [0.15773506, 0.35399836, 0.11010835, 0.37815823],
       [0.32346945, 0.37202572, 0.15824971, 0.14625512],
       [0.07652209, 0.12507836, 0.40519707, 0.39320248],
       [0.59350848, 0.33005931, 0.04618008, 0.03025213],
       [0.26558007, 0.0021309 , 0.37410849, 0.35818054]])

In [11]:
n_tickers = 4
n_ports = 2**3
weights = qp.Lattice(dimension=n_tickers,seed=42).gen_samples(n_ports)
weights /= weights.sum(axis=1, keepdims=True)
weights

array([[0.23296728, 0.16628356, 0.41561624, 0.18513292],
       [0.0131603 , 0.38609054, 0.19580926, 0.4049399 ],
       [0.43946917, 0.07226425, 0.39184246, 0.09642413],
       [0.15773506, 0.35399836, 0.11010835, 0.37815823],
       [0.32346945, 0.37202572, 0.15824971, 0.14625512],
       [0.07652209, 0.12507836, 0.40519707, 0.39320248],
       [0.59350848, 0.33005931, 0.04618008, 0.03025213],
       [0.26558007, 0.0021309 , 0.37410849, 0.35818054]])

In [14]:
tickers = ["AAPL", "AMZN", "CSCO", "IBM"]
description = ["Apple", "Amazon", "CISCO", "IBM"]

fname = f"{data_dir}df_{start_date}_to_{end_date}.csv"
p = Path(fname)

if p.exists():
    df = pd.read_csv(p, parse_dates=['Date'])
else:
    df = download_data(tickers,description)
    df.to_csv(p, index=False)

lr = get_log_ret(df)

In [15]:
lr.mean().values * weights_r * 252

array([[[0.05443389, 0.03389552, 0.05665003, 0.01609407],
        [0.00307497, 0.07870134, 0.02668953, 0.03520243],
        [0.10268402, 0.01473047, 0.05340958, 0.00838239],
        [0.03685553, 0.07215962, 0.01500818, 0.03287424],
        [0.07558015, 0.07583434, 0.02157002, 0.01271432],
        [0.01787974, 0.02549618, 0.05522986, 0.03418207],
        [0.13867602, 0.06727984, 0.00629452, 0.00262989],
        [0.06205402, 0.00043437, 0.05099237, 0.03113753]],

       [[0.06005425, 0.0264346 , 0.05463259, 0.01847154],
        [0.02207554, 0.11229875, 0.04558879, 0.00175118],
        [0.10438844, 0.00751447, 0.05124407, 0.01220679],
        [0.04582445, 0.05860603, 0.01708045, 0.03399584],
        [0.0797527 , 0.0634298 , 0.02276803, 0.015688  ],
        [0.0277107 , 0.01802806, 0.05312701, 0.03505051],
        [0.00509238, 0.1227482 , 0.02287022, 0.01810305],
        [0.04394403, 0.07044126, 0.03103507, 0.02074804]]])

In [17]:
np.sqrt((weights @ (lr.cov().values * 252)) @ weights.T).diagonal()

array([0.20780704, 0.21314812, 0.21885933, 0.21298378, 0.22767495,
       0.20168237, 0.25325773, 0.20413606])

In [18]:
np.sqrt((weights_r[0] @ (lr.cov().values * 252)) @ weights_r[0].T).diagonal()

array([0.20780704, 0.21314812, 0.21885933, 0.21298378, 0.22767495,
       0.20168237, 0.25325773, 0.20413606])

In [19]:
np.sqrt((weights_r[1] @ (lr.cov().values * 252)) @ weights_r[1].T).diagonal()

array([0.20638125, 0.24649686, 0.21675198, 0.20832315, 0.22116523,
       0.20132159, 0.24380821, 0.21460271])

In [21]:
r = weights_r.shape[0]
for i in range(r):
    

2