# Portfolio Optimization Problem Set (Step 3)

This notebook follows the course instructions for Step 3 using adjusted close daily returns.
- Step 3.1: 5% grid search for the minimum-variance WFC/MSFT portfolio.
- Step 3.2: Exact minimum-variance weights and the tangency (optimal risky) portfolio for WFC/MSFT.
- Step 3.3: Tangency portfolios using all 10 stocks (unconstrained and long-only).


In [1]:
import csv
from datetime import datetime
from math import sqrt
from pathlib import Path

DATA_DIR = Path('Historical Stock Data')
PATHS = {
    'AAPL': DATA_DIR / '_c6159646c4566d0145fba7a91f251330_aapl.csv',
    'MSFT': DATA_DIR / '_c6159646c4566d0145fba7a91f251330_msft.csv',
    'WFC': DATA_DIR / '_d54e93ebdf2971e11509d23595ca6209_wfc.csv',
    'DIS': DATA_DIR / '_c6159646c4566d0145fba7a91f251330_dis.csv',
    'COP': DATA_DIR / '_c6159646c4566d0145fba7a91f251330_cop.csv',
    'XOM': DATA_DIR / '_d54e93ebdf2971e11509d23595ca6209_xom.csv',
    'GOOG': DATA_DIR / '_c6159646c4566d0145fba7a91f251330_goog.csv',
    'BIDU': DATA_DIR / '_c6159646c4566d0145fba7a91f251330_bidu.csv',
    'TSLA': DATA_DIR / '_d54e93ebdf2971e11509d23595ca6209_tsla.csv',
    'TTM': DATA_DIR / '_d54e93ebdf2971e11509d23595ca6209_ttm.csv',
}


In [2]:
def parse_date(value):
    return datetime.strptime(value, '%m/%d/%y').date()

def load_adj_close(path):
    rows = []
    with path.open(newline='') as handle:
        reader = csv.DictReader(handle)
        for row in reader:
            rows.append({
                'date': parse_date(row['Date']),
                'adj_close': float(row['Adj Close']),
            })
    rows.sort(key=lambda item: item['date'])
    return rows

def returns_by_date(rows):
    returns = {}
    previous = None
    for row in rows:
        if previous is not None:
            returns[row['date']] = (row['adj_close'] - previous['adj_close']) / previous['adj_close'] * 100.0
        previous = row
    return returns

def align_returns(symbols, paths):
    returns_map = {sym: returns_by_date(load_adj_close(paths[sym])) for sym in symbols}
    common_dates = None
    for sym in symbols:
        dates = set(returns_map[sym].keys())
        common_dates = dates if common_dates is None else common_dates & dates
    dates_sorted = sorted(common_dates)
    aligned = {sym: [returns_map[sym][d] for d in dates_sorted] for sym in symbols}
    return aligned

def mean(values):
    return sum(values) / len(values)

def covariance(x, y):
    n = len(x)
    mx = mean(x)
    my = mean(y)
    return sum((xi - mx) * (yi - my) for xi, yi in zip(x, y)) / (n - 1)

def covariance_matrix(returns_by_asset, symbols):
    return [[covariance(returns_by_asset[a], returns_by_asset[b]) for b in symbols] for a in symbols]

def dot(a, b):
    return sum(x * y for x, y in zip(a, b))

def matvec(mat, vec):
    return [dot(row, vec) for row in mat]

def portfolio_stats(weights, mean_returns, cov):
    port_mean = dot(mean_returns, weights)
    var = dot(weights, matvec(cov, weights))
    std = sqrt(var)
    sharpe = port_mean / std
    return port_mean, std, sharpe

def min_variance_weight_two_asset(cov):
    var1 = cov[0][0]
    var2 = cov[1][1]
    cov12 = cov[0][1]
    w1 = (var2 - cov12) / (var1 + var2 - 2 * cov12)
    return w1, 1 - w1

def invert_matrix(matrix):
    n = len(matrix)
    mat = [row[:] + [0.0] * n for row in matrix]
    for i in range(n):
        mat[i][n + i] = 1.0
    for i in range(n):
        pivot = mat[i][i]
        if abs(pivot) < 1e-12:
            for r in range(i + 1, n):
                if abs(mat[r][i]) > abs(pivot):
                    mat[i], mat[r] = mat[r], mat[i]
                    pivot = mat[i][i]
                    break
        if abs(pivot) < 1e-12:
            raise ValueError('Singular matrix')
        scale = 1.0 / pivot
        mat[i] = [v * scale for v in mat[i]]
        for r in range(n):
            if r == i:
                continue
            factor = mat[r][i]
            if factor != 0.0:
                mat[r] = [rv - factor * iv for rv, iv in zip(mat[r], mat[i])]
    return [row[n:] for row in mat]

def tangency_weights_unconstrained(mean_returns, cov):
    inv = invert_matrix(cov)
    raw = matvec(inv, mean_returns)
    total = sum(raw)
    return [v / total for v in raw]

def project_to_simplex(vector):
    sorted_v = sorted(vector, reverse=True)
    cumsum = 0.0
    rho = -1
    for i, val in enumerate(sorted_v):
        cumsum += val
        t = (cumsum - 1.0) / (i + 1)
        if val - t > 0:
            rho = i
    if rho == -1:
        return [1.0 / len(vector)] * len(vector)
    theta = (sum(sorted_v[:rho + 1]) - 1.0) / (rho + 1)
    return [max(v - theta, 0.0) for v in vector]

def sharpe_ratio(weights, mean_returns, cov):
    mu = dot(mean_returns, weights)
    var = dot(weights, matvec(cov, weights))
    if var <= 0:
        return -1e18
    return mu / sqrt(var)

def gradient(weights, mean_returns, cov):
    mu = dot(mean_returns, weights)
    v = dot(weights, matvec(cov, weights))
    if v <= 0:
        return [0.0] * len(weights)
    sigma_w = matvec(cov, weights)
    denom = v ** 1.5
    return [((m * v) - (mu * sw)) / denom for m, sw in zip(mean_returns, sigma_w)]

def maximize_sharpe_long_only(mean_returns, cov, max_iter=5000):
    weights = [1.0 / len(mean_returns)] * len(mean_returns)
    current = sharpe_ratio(weights, mean_returns, cov)
    steps = [1.0, 0.5, 0.1, 0.05, 0.01, 0.005, 0.001]
    for _ in range(max_iter):
        grad = gradient(weights, mean_returns, cov)
        best = current
        best_w = weights
        improved = False
        for step in steps:
            candidate = project_to_simplex([w + step * g for w, g in zip(weights, grad)])
            score = sharpe_ratio(candidate, mean_returns, cov)
            if score > best + 1e-12:
                best = score
                best_w = candidate
                improved = True
        weights = best_w
        current = best
        if not improved:
            break
    return weights


In [3]:
# Step 3.1 and 3.2: WFC/MSFT minimum-variance portfolio
two_symbols = ['WFC', 'MSFT']
two_returns = align_returns(two_symbols, PATHS)
two_means = [mean(two_returns[s]) for s in two_symbols]
two_cov = covariance_matrix(two_returns, two_symbols)

best_std = None
best_weights = None
for i in range(0, 101, 5):
    w_wfc = i / 100.0
    w_msft = 1 - w_wfc
    _, std, _ = portfolio_stats([w_wfc, w_msft], two_means, two_cov)
    if best_std is None or std < best_std:
        best_std = std
        best_weights = (w_wfc, w_msft)

minvar_wfc, minvar_msft = min_variance_weight_two_asset(two_cov)

# Step 3.2: WFC/MSFT optimal risky portfolio (max Sharpe, rf = 0)
tan_two = tangency_weights_unconstrained(two_means, two_cov)
tan_two_stats = portfolio_stats(tan_two, two_means, two_cov)

wfc_sharpe = two_means[0] / sqrt(two_cov[0][0])
msft_sharpe = two_means[1] / sqrt(two_cov[1][1])

# Step 3.3: Tangency portfolios using all 10 securities (no DJI)
all_symbols = ['AAPL', 'MSFT', 'WFC', 'DIS', 'COP', 'XOM', 'GOOG', 'BIDU', 'TSLA', 'TTM']
all_returns = align_returns(all_symbols, PATHS)
all_means = [mean(all_returns[s]) for s in all_symbols]
all_cov = covariance_matrix(all_returns, all_symbols)

tan_unconstrained = tangency_weights_unconstrained(all_means, all_cov)
uncon_stats = portfolio_stats(tan_unconstrained, all_means, all_cov)

tan_long_only = maximize_sharpe_long_only(all_means, all_cov)
long_stats = portfolio_stats(tan_long_only, all_means, all_cov)


In [4]:
print('Step 3.1 (Grid, 5%): WFC {:.0f}%, MSFT {:.0f}%'.format(best_weights[0] * 100, best_weights[1] * 100))
print('Step 3.2 (Min-Var Formula): WFC {:.1f}%, MSFT {:.1f}%'.format(minvar_wfc * 100, minvar_msft * 100))
print('Step 3.2 (Tangency WFC/MSFT): WFC {:.1f}%'.format(tan_two[0] * 100))
print('Sharpe Ratios - WFC {:.3f}, MSFT {:.3f}, Portfolio {:.3f}'.format(wfc_sharpe, msft_sharpe, tan_two_stats[2]))

print('Unconstrained Tangency (10-asset) Weights:')
for sym, w in zip(all_symbols, tan_unconstrained):
    print('  {}: {:.1f}%'.format(sym, w * 100))
print('Unconstrained Mean {:.2f}%, Std {:.2f}%, Sharpe {:.3f}'.format(uncon_stats[0], uncon_stats[1], uncon_stats[2]))

print('Long-Only Tangency (10-asset) Weights:')
for sym, w in zip(all_symbols, tan_long_only):
    print('  {}: {:.1f}%'.format(sym, w * 100))
print('Long-Only Mean {:.2f}%, Std {:.2f}%, Sharpe {:.3f}'.format(long_stats[0], long_stats[1], long_stats[2]))


Step 3.1 (Grid, 5%): WFC 45%, MSFT 55%
Step 3.2 (Min-Var Formula): WFC 44.7%, MSFT 55.3%
Step 3.2 (Tangency WFC/MSFT): WFC 31.9%
Sharpe Ratios - WFC 0.040, MSFT 0.050, Portfolio 0.053
Unconstrained Tangency (10-asset) Weights:
  AAPL: 20.7%
  MSFT: 15.2%
  WFC: -17.4%
  DIS: 49.7%
  COP: -24.6%
  XOM: 22.0%
  GOOG: 20.1%
  BIDU: -1.5%
  TSLA: 17.9%
  TTM: -2.1%
Unconstrained Mean 0.11%, Std 1.30%, Sharpe 0.088
Long-Only Tangency (10-asset) Weights:
  AAPL: 17.1%
  MSFT: 9.9%
  WFC: 0.0%
  DIS: 38.8%
  COP: 0.0%
  XOM: 0.0%
  GOOG: 17.5%
  BIDU: 0.0%
  TSLA: 16.7%
  TTM: 0.0%
Long-Only Mean 0.11%, Std 1.26%, Sharpe 0.084


## Final Answers

1. 45%; 55%
2. 44.7%; 55.3%
3. 31.9
4. The optimal risky portfolio displays a higher Sharpe Ratio than either of the two stocks used in the portfolio.
5. 22.0
6. 17.5
7. 0.11
8. 1.26
9. 0.088
10. When short selling is allowed, the portfolio is able to attain a higher level of Sharpe Ratio.
