In [21]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from scipy.optimize import minimize

In [22]:
def _mean_variance_optimization(mu, cov, tickers, max_weight=0.2, risk_free_rate=0.0):
    """
    Mean-Variance Optimization (Markowitz) with max weight and sum constraints.
    """
    n = len(mu)
    
    def portfolio_performance(weights):
        ret = np.dot(weights, mu)
        vol = np.sqrt(np.dot(weights.T, np.dot(cov, weights)))
        return ret, vol

    def neg_sharpe(weights):
        ret, vol = portfolio_performance(weights)
        return -(ret - risk_free_rate) / vol

    constraints = {'type': 'eq', 'fun': lambda w: np.sum(w) - 1}
    bounds = [(0, max_weight) for _ in range(n)]
    x0 = np.ones(n) / n

    opt = minimize(neg_sharpe, x0=x0, bounds=bounds, constraints=constraints, method='SLSQP')
    
    weights = pd.Series(opt.x, index=tickers)
    weights /= weights.sum()  # safety normalization
    
    ret, vol = portfolio_performance(weights)
    sharpe = (ret - risk_free_rate) / vol

    return weights, ret, vol, sharpe

In [23]:
def _hierarchical_risk_parity(mu, cov):
    """
    Hierarchical Risk Parity Optimization (using PyPortfolioOpt)
    """
    try:
        from pypfopt.hierarchical_risk_parity import HRPOpt
    except ImportError:
        raise ImportError("Please install PyPortfolioOpt to use HRP optimization: pip install PyPortfolioOpt")

    hrp = HRPOpt(mu, cov)
    weights = hrp.optimize()
    weights = pd.Series(weights)
    return weights, None, None, None  # HRP doesnâ€™t compute MVO metrics

In [24]:
def optimize_portfolio(expected_returns_path: str,
                       cov_matrix_path: str,
                       method: str = 'mvo',
                       max_weight: float = 0.2,
                       risk_free_rate: float = 0.0):
    """
    Portfolio Optimizer
    Inputs:
        expected_returns_path : CSV path for expected returns from signals
        cov_matrix_path       : CSV path for covariance matrix from historical prices
        method                : 'mvo' (Mean-Variance Optimization) or 'hrp' (Hierarchical Risk Parity)
        max_weight            : Maximum allocation per asset (default 20%)
    Output:
        Dict containing:
            weights (pd.Series)
            expected_return
            volatility
            sharpe_ratio
    """
    expected_returns = pd.read_csv(expected_returns_path, index_col='ticker')
    cov_matrix = pd.read_csv(cov_matrix_path, index_col='ticker')

    mu = expected_returns['expected_return']
    cov = cov_matrix
    tickers = mu.index.tolist()

    if method.lower() == 'mvo':
        print("Running Mean-Variance Optimization...")
        weights, exp_ret, vol, sharpe = _mean_variance_optimization(mu.values, cov.values, tickers, max_weight, risk_free_rate)

    elif method.lower() == 'hrp':
        print("Running Hierarchical Risk Parity Optimization...")
        weights, exp_ret, vol, sharpe = _hierarchical_risk_parity(mu, cov)

    else:
        raise ValueError("Invalid method. Choose 'mvo' or 'hrp'.")

    print("\nOptimal Risk-Adjusted Portfolio")
    display(pd.DataFrame({'Weight': weights}).T)
    
    result = {
        "weights": weights,
        "expected_return": exp_ret,
        "volatility": vol,
        "sharpe_ratio": sharpe
    }
    return result

Generating synthetic inputs for testing

In [25]:
def generate_synthetic_inputs(tickers=None, periods=252, seed=42):
    """
    Generate mock expected returns (signals) and covariance matrix (from synthetic price data).
    Returns:
        expected_returns.csv and cov_matrix.csv written to disk
    """
    np.random.seed(seed)
    if tickers is None:
        tickers = ['AAPL', 'MSFT', 'GOOG', 'AMZN', 'META']

    # Simulate random daily returns (slightly correlated)
    n = len(tickers)
    corr = np.full((n, n), 0.3)
    np.fill_diagonal(corr, 1.0)
    vol = np.linspace(0.015, 0.025, n)
    cov = np.outer(vol, vol) * corr

    returns = np.random.multivariate_normal(np.zeros(n), cov, size=periods)
    prices = 100 * np.exp(np.cumsum(returns, axis=0))
    price_df = pd.DataFrame(prices, columns=tickers)

    # Compute historical annualized covariance matrix
    ret_df = price_df.pct_change().dropna()
    cov_matrix = ret_df.cov() * 252  # annualized covariance

    # Compute synthetic expected returns (signals)
    mean_returns = ret_df.mean() * 252  # annualized mean
    # add small signal tilt to make optimization interesting
    expected_returns = mean_returns + np.linspace(0.00, 0.02, n)

    # Write to CSVs
    expected_returns_df = pd.DataFrame({'ticker': tickers, 'expected_return': expected_returns})
    expected_returns_df.to_csv("expected_returns.csv", index=False)

    cov_matrix.insert(0, "ticker", cov_matrix.index)
    cov_matrix.to_csv("cov_matrix.csv", index=False)

    print("Synthetic data generated and saved as:")
    print("  - expected_returns.csv")
    print("  - cov_matrix.csv")
    return expected_returns_df, cov_matrix

Testing on synthetic data

In [26]:
# Step 1: Generate mock inputs
expected_returns_df, cov_matrix_df = generate_synthetic_inputs(
    tickers=['AAPL', 'MSFT', 'GOOG'], 
    periods=500
)

# Step 2: Run optimizer
result = optimize_portfolio(
    expected_returns_path="expected_returns.csv",
    cov_matrix_path="cov_matrix.csv",
    method='mvo',
    max_weight=0.2
)

# Step 3: Inspect results
print("\nOptimized Weights:")
print(result["weights"])
summary = pd.DataFrame({
    "Expected Return": [result["expected_return"]],
    "Volatility": [result["volatility"]],
    "Sharpe Ratio": [result["sharpe_ratio"]]
})
display(summary)

Synthetic data generated and saved as:
  - expected_returns.csv
  - cov_matrix.csv
Running Mean-Variance Optimization...

Optimal Risk-Adjusted Portfolio


Unnamed: 0,AAPL,MSFT,GOOG
Weight,0.333333,0.333333,0.333333



Optimized Weights:
AAPL    0.333333
MSFT    0.333333
GOOG    0.333333
dtype: float64


Unnamed: 0,Expected Return,Volatility,Sharpe Ratio
0,0.523732,0.223341,2.344983
