# Task 4: Portfolio Optimization Based on Forecast

## Objective
Use insights from the TSLA forecast (Task 3) to construct an optimal portfolio using Modern Portfolio Theory (MPT).

## Assets
- **TSLA**: Expected return from forecast model
- **BND**: Historical average return (bond stability)
- **SPY**: Historical average return (market exposure)

## Deliverables
- Efficient Frontier plot with key portfolios marked
- Covariance matrix visualization (heatmap)
- Final portfolio recommendation with weights and metrics
- Written justification for portfolio selection

## 1. Imports and Setup

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

from pypfopt import EfficientFrontier, risk_models, expected_returns
from pypfopt import plotting

import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8-whitegrid')
pd.set_option('display.float_format', '{:.4f}'.format)

# Constants
TRADING_DAYS = 252
RISK_FREE_RATE = 0.02  # 2% annual risk-free rate

print('Setup complete!')

## 2. Load Data

In [None]:
# Load price data
prices = pd.read_csv('../data/processed/adj_close_prices.csv', parse_dates=['Date'], index_col='Date')
prices = prices.sort_index()

# Ensure we have all three assets
tickers = ['TSLA', 'BND', 'SPY']
prices = prices[tickers].dropna()

print(f'Price data: {prices.index.min().date()} to {prices.index.max().date()}')
print(f'Shape: {prices.shape}')
prices.tail()

In [None]:
# Calculate daily returns
daily_returns = prices.pct_change().dropna()

print('Daily Returns Statistics:')
daily_returns.describe()

In [None]:
# Load forecast metrics from Task 3
forecast_metrics = joblib.load('../data/processed/models/tsla_forecast_metrics.joblib')

print('TSLA Forecast Metrics from Task 3:')
for key, value in forecast_metrics.items():
    print(f'  {key}: {value}')

## 3. Prepare Expected Returns

- **TSLA**: Use forecasted return from Task 3
- **BND, SPY**: Use historical average returns (annualized)

In [None]:
# TSLA: Forecasted annual return (from 12-month forecast)
tsla_expected_return = forecast_metrics['return_12m_pct'] / 100  # Convert from percentage

# BND and SPY: Historical annualized returns
hist_mean_daily = daily_returns[['BND', 'SPY']].mean()
hist_annual_returns = (1 + hist_mean_daily) ** TRADING_DAYS - 1

# Create expected returns vector
mu = pd.Series({
    'TSLA': tsla_expected_return,
    'BND': hist_annual_returns['BND'],
    'SPY': hist_annual_returns['SPY']
})

print('=' * 50)
print('EXPECTED ANNUAL RETURNS')
print('=' * 50)
for ticker, ret in mu.items():
    source = 'Forecast' if ticker == 'TSLA' else 'Historical'
    print(f'{ticker}: {ret*100:+.2f}% ({source})')

## 4. Compute Covariance Matrix

In [None]:
# Calculate sample covariance matrix (annualized)
cov_daily = daily_returns[tickers].cov()
cov_annual = cov_daily * TRADING_DAYS

print('Annualized Covariance Matrix:')
cov_annual

In [None]:
# Correlation matrix for interpretation
corr_matrix = daily_returns[tickers].corr()

print('Correlation Matrix:')
corr_matrix

In [None]:
# Covariance heatmap (deliverable)
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Covariance
sns.heatmap(cov_annual, annot=True, fmt='.4f', cmap='YlOrRd', ax=axes[0],
            annot_kws={'size': 12, 'weight': 'bold'})
axes[0].set_title('Annualized Covariance Matrix', fontsize=12, fontweight='bold')

# Correlation
sns.heatmap(corr_matrix, annot=True, fmt='.3f', cmap='RdYlGn', center=0, ax=axes[1],
            vmin=-1, vmax=1, annot_kws={'size': 12, 'weight': 'bold'})
axes[1].set_title('Correlation Matrix', fontsize=12, fontweight='bold')

plt.tight_layout()
plt.savefig('../data/processed/covariance_heatmap.png', dpi=150, bbox_inches='tight')
plt.show()

print('Covariance heatmap saved!')

## 5. Portfolio Optimization (Efficient Frontier)

In [None]:
# Maximum Sharpe Ratio Portfolio
ef_sharpe = EfficientFrontier(mu, cov_annual)
weights_sharpe = ef_sharpe.max_sharpe(risk_free_rate=RISK_FREE_RATE)
cleaned_weights_sharpe = ef_sharpe.clean_weights()
perf_sharpe = ef_sharpe.portfolio_performance(verbose=False, risk_free_rate=RISK_FREE_RATE)

print('=' * 50)
print('MAXIMUM SHARPE RATIO PORTFOLIO')
print('=' * 50)
print('\nWeights:')
for ticker, weight in cleaned_weights_sharpe.items():
    print(f'  {ticker}: {weight*100:.1f}%')
print(f'\nExpected Annual Return: {perf_sharpe[0]*100:.2f}%')
print(f'Annual Volatility: {perf_sharpe[1]*100:.2f}%')
print(f'Sharpe Ratio: {perf_sharpe[2]:.3f}')

In [None]:
# Minimum Volatility Portfolio
ef_minvol = EfficientFrontier(mu, cov_annual)
weights_minvol = ef_minvol.min_volatility()
cleaned_weights_minvol = ef_minvol.clean_weights()
perf_minvol = ef_minvol.portfolio_performance(verbose=False, risk_free_rate=RISK_FREE_RATE)

print('=' * 50)
print('MINIMUM VOLATILITY PORTFOLIO')
print('=' * 50)
print('\nWeights:')
for ticker, weight in cleaned_weights_minvol.items():
    print(f'  {ticker}: {weight*100:.1f}%')
print(f'\nExpected Annual Return: {perf_minvol[0]*100:.2f}%')
print(f'Annual Volatility: {perf_minvol[1]*100:.2f}%')
print(f'Sharpe Ratio: {perf_minvol[2]:.3f}')

## 6. Generate and Plot Efficient Frontier

In [None]:
# Generate efficient frontier points
def generate_efficient_frontier(mu, cov, n_points=100):
    """Generate points along the efficient frontier."""
    returns = []
    volatilities = []
    
    # Get return range
    ef_temp = EfficientFrontier(mu, cov)
    ef_temp.min_volatility()
    min_ret = ef_temp.portfolio_performance()[0]
    max_ret = mu.max()
    
    target_returns = np.linspace(min_ret, max_ret * 0.95, n_points)
    
    for target in target_returns:
        try:
            ef = EfficientFrontier(mu, cov)
            ef.efficient_return(target)
            ret, vol, _ = ef.portfolio_performance()
            returns.append(ret)
            volatilities.append(vol)
        except:
            continue
    
    return np.array(volatilities), np.array(returns)

frontier_vol, frontier_ret = generate_efficient_frontier(mu, cov_annual)
print(f'Generated {len(frontier_vol)} efficient frontier points')

In [None]:
# Plot Efficient Frontier (main deliverable)
fig, ax = plt.subplots(figsize=(12, 8))

# Plot efficient frontier
ax.plot(frontier_vol * 100, frontier_ret * 100, 'b-', linewidth=2, label='Efficient Frontier')

# Plot individual assets
asset_vols = np.sqrt(np.diag(cov_annual)) * 100
asset_rets = mu * 100
colors = {'TSLA': 'red', 'BND': 'green', 'SPY': 'orange'}
for i, ticker in enumerate(tickers):
    ax.scatter(asset_vols[i], asset_rets.iloc[i], s=150, c=colors[ticker], 
               marker='o', label=ticker, zorder=5, edgecolors='black')

# Mark Max Sharpe portfolio
ax.scatter(perf_sharpe[1] * 100, perf_sharpe[0] * 100, s=300, c='gold', 
           marker='*', label=f'Max Sharpe (SR={perf_sharpe[2]:.2f})', zorder=6, edgecolors='black')

# Mark Min Volatility portfolio
ax.scatter(perf_minvol[1] * 100, perf_minvol[0] * 100, s=300, c='cyan', 
           marker='D', label=f'Min Volatility', zorder=6, edgecolors='black')

# Capital Market Line (from risk-free to max Sharpe)
cml_x = np.linspace(0, perf_sharpe[1] * 100 * 1.5, 50)
cml_y = RISK_FREE_RATE * 100 + perf_sharpe[2] * cml_x
ax.plot(cml_x, cml_y, 'k--', linewidth=1, alpha=0.5, label='Capital Market Line')

ax.set_xlabel('Annual Volatility (%)', fontsize=12)
ax.set_ylabel('Expected Annual Return (%)', fontsize=12)
ax.set_title('Efficient Frontier with Optimal Portfolios', fontsize=14, fontweight='bold')
ax.legend(loc='upper left', fontsize=10)
ax.grid(True, alpha=0.3)
ax.set_xlim(0, max(asset_vols) * 1.1)

plt.tight_layout()
plt.savefig('../data/processed/efficient_frontier.png', dpi=150, bbox_inches='tight')
plt.show()

print('Efficient Frontier plot saved!')

## 7. Portfolio Comparison Table

In [None]:
# Create comparison table
comparison = pd.DataFrame({
    'Max Sharpe': {
        'TSLA Weight': f"{cleaned_weights_sharpe['TSLA']*100:.1f}%",
        'BND Weight': f"{cleaned_weights_sharpe['BND']*100:.1f}%",
        'SPY Weight': f"{cleaned_weights_sharpe['SPY']*100:.1f}%",
        'Expected Return': f"{perf_sharpe[0]*100:.2f}%",
        'Volatility': f"{perf_sharpe[1]*100:.2f}%",
        'Sharpe Ratio': f"{perf_sharpe[2]:.3f}"
    },
    'Min Volatility': {
        'TSLA Weight': f"{cleaned_weights_minvol['TSLA']*100:.1f}%",
        'BND Weight': f"{cleaned_weights_minvol['BND']*100:.1f}%",
        'SPY Weight': f"{cleaned_weights_minvol['SPY']*100:.1f}%",
        'Expected Return': f"{perf_minvol[0]*100:.2f}%",
        'Volatility': f"{perf_minvol[1]*100:.2f}%",
        'Sharpe Ratio': f"{perf_minvol[2]:.3f}"
    }
})

print('=' * 60)
print('PORTFOLIO COMPARISON')
print('=' * 60)
print(comparison.to_string())

## 8. Final Portfolio Recommendation

In [None]:
# Recommend Max Sharpe portfolio (best risk-adjusted returns)
recommended_weights = cleaned_weights_sharpe
recommended_perf = perf_sharpe
recommendation_type = 'Maximum Sharpe Ratio'

print('=' * 60)
print('FINAL PORTFOLIO RECOMMENDATION')
print('=' * 60)
print(f'\nRecommended Portfolio: {recommendation_type}')
print('\n--- Optimal Weights ---')
for ticker, weight in recommended_weights.items():
    print(f'  {ticker}: {weight*100:.1f}%')
print('\n--- Expected Performance ---')
print(f'  Expected Annual Return: {recommended_perf[0]*100:.2f}%')
print(f'  Expected Volatility:    {recommended_perf[1]*100:.2f}%')
print(f'  Sharpe Ratio:           {recommended_perf[2]:.3f}')

In [None]:
# Portfolio weights visualization
fig, ax = plt.subplots(figsize=(8, 8))

weights_list = [recommended_weights[t] for t in tickers]
colors_list = [colors[t] for t in tickers]

wedges, texts, autotexts = ax.pie(
    weights_list, 
    labels=tickers, 
    autopct='%1.1f%%',
    colors=colors_list,
    explode=[0.02] * 3,
    startangle=90,
    textprops={'fontsize': 12, 'fontweight': 'bold'}
)

ax.set_title(f'Recommended Portfolio Allocation\n({recommendation_type})', 
             fontsize=14, fontweight='bold')

plt.tight_layout()
plt.savefig('../data/processed/portfolio_weights.png', dpi=150, bbox_inches='tight')
plt.show()

## 9. Written Justification

In [None]:
justification = f"""
PORTFOLIO SELECTION JUSTIFICATION
{'=' * 60}

We recommend the Maximum Sharpe Ratio portfolio with the following allocation:
TSLA ({recommended_weights['TSLA']*100:.1f}%), BND ({recommended_weights['BND']*100:.1f}%), 
and SPY ({recommended_weights['SPY']*100:.1f}%).

This portfolio is selected because it offers the highest risk-adjusted return 
(Sharpe Ratio of {recommended_perf[2]:.3f}) among all feasible portfolios on the 
efficient frontier. The expected annual return of {recommended_perf[0]*100:.2f}% 
with a volatility of {recommended_perf[1]*100:.2f}% represents an optimal trade-off 
between risk and reward.

The allocation leverages the forecast-driven expected return for TSLA 
({forecast_metrics['return_12m_pct']:.1f}% over 12 months) while using BND for 
stability and SPY for broad market exposure. The low correlation between BND 
and the equity positions (TSLA, SPY) provides diversification benefits that 
reduce overall portfolio risk without sacrificing expected returns.

For investors with lower risk tolerance, the Minimum Volatility portfolio 
(volatility: {perf_minvol[1]*100:.2f}%) offers a more conservative alternative, 
though with reduced expected returns ({perf_minvol[0]*100:.2f}%).
"""

print(justification)

In [None]:
# Save portfolio recommendation for Task 5
portfolio_recommendation = {
    'weights': recommended_weights,
    'expected_return': recommended_perf[0],
    'volatility': recommended_perf[1],
    'sharpe_ratio': recommended_perf[2],
    'type': recommendation_type
}
joblib.dump(portfolio_recommendation, '../data/processed/models/portfolio_recommendation.joblib')

# Also save comparison table
comparison.to_csv('../data/processed/portfolio_comparison.csv')

print('Portfolio recommendation saved for Task 5!')

In [None]:
print('\n' + '=' * 60)
print('TASK 4 COMPLETE')
print('=' * 60)
print('\nDeliverables:')
print('  ✓ Efficient Frontier plot with key portfolios marked')
print('  ✓ Covariance matrix visualization (heatmap)')
print('  ✓ Final portfolio recommendation with weights and metrics')
print('  ✓ Written justification for portfolio selection')
print('\nSaved outputs:')
print('  - data/processed/covariance_heatmap.png')
print('  - data/processed/efficient_frontier.png')
print('  - data/processed/portfolio_weights.png')
print('  - data/processed/portfolio_comparison.csv')
print('  - data/processed/models/portfolio_recommendation.joblib')