# Tax-Optimized Portfolio Construction - Getting Started

This notebook demonstrates the core features of the Tax-Optimized Portfolio Construction system.

In [None]:
# Setup
import sys
sys.path.insert(0, '..')
sys.path.insert(0, '../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

print("Setup complete!")

## 1. Generate Sample Data

For demonstration, we'll generate synthetic stock data.

In [None]:
# Generate sample data
np.random.seed(42)

n_stocks = 30
n_days = 1000
tickers = [f'STOCK_{i:02d}' for i in range(n_stocks)]

# Generate correlated returns
mean_returns = np.random.uniform(0.0003, 0.0008, n_stocks)
vols = np.random.uniform(0.015, 0.03, n_stocks)

# Correlation matrix
corr = np.eye(n_stocks)
for i in range(n_stocks):
    for j in range(i+1, n_stocks):
        corr[i, j] = corr[j, i] = np.random.uniform(0.2, 0.5)

cov = np.outer(vols, vols) * corr

# Generate returns
dates = pd.date_range('2020-01-01', periods=n_days, freq='B')
returns = np.random.multivariate_normal(mean_returns, cov, n_days)
returns_df = pd.DataFrame(returns, index=dates, columns=tickers)

# Generate prices
prices = (1 + returns_df).cumprod() * 100

print(f"Generated {n_stocks} stocks with {n_days} trading days")
print(f"Date range: {dates[0].date()} to {dates[-1].date()}")

## 2. Portfolio Optimization

In [None]:
from portfolio_optimizer import TaxOptimizedPortfolio

# Prepare inputs
expected_returns = pd.Series(returns_df.mean() * 252, index=tickers)  # Annualized
cov_matrix = pd.DataFrame(returns_df.cov() * 252, index=tickers, columns=tickers)

# Initialize optimizer
optimizer = TaxOptimizedPortfolio(
    returns=expected_returns,
    cov_matrix=cov_matrix,
    risk_aversion=2.5
)

# Run optimization
result = optimizer.optimize_mean_variance(max_position=0.10)

if result['status'] == 'optimal':
    print("\n=== Optimal Portfolio ===")
    print(f"Expected Return: {result['expected_return']:.2%}")
    print(f"Volatility: {result['volatility']:.2%}")
    print(f"Sharpe Ratio: {result['sharpe_ratio']:.2f}")
    print(f"Number of Holdings: {result['n_holdings']}")

In [None]:
# Visualize portfolio weights
if result['status'] == 'optimal':
    weights = result['weights']
    sorted_weights = sorted(weights.items(), key=lambda x: x[1], reverse=True)[:15]
    
    plt.figure(figsize=(12, 5))
    plt.bar([w[0] for w in sorted_weights], [w[1] for w in sorted_weights])
    plt.ylabel('Weight')
    plt.title('Top 15 Portfolio Holdings')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

## 3. Efficient Frontier

In [None]:
# Generate efficient frontier
frontier = optimizer.generate_efficient_frontier(n_points=30)

plt.figure(figsize=(10, 6))
plt.scatter(frontier['volatility'], frontier['return'], c=frontier['sharpe'], cmap='viridis', s=50)
plt.colorbar(label='Sharpe Ratio')
plt.xlabel('Volatility (Annual)')
plt.ylabel('Expected Return (Annual)')
plt.title('Efficient Frontier')

# Mark optimal portfolio
if result['status'] == 'optimal':
    plt.scatter(result['volatility'], result['expected_return'], 
               color='red', s=200, marker='*', label='Optimal')
    plt.legend()

plt.tight_layout()
plt.show()

## 4. Tax-Loss Harvesting

In [None]:
from tax_loss_harvesting import TaxLossHarvester

# Initialize harvester
harvester = TaxLossHarvester(
    tax_rate_short_term=0.37,
    tax_rate_long_term=0.238
)

# Add positions (simulating purchases from 1 year ago)
purchase_date = dates[0]
purchase_prices = prices.iloc[0]

for ticker in tickers[:20]:
    harvester.add_purchase(ticker, purchase_date, 100, purchase_prices[ticker])

print(f"Added {len(tickers[:20])} positions purchased on {purchase_date.date()}")

In [None]:
# Find harvest opportunities
current_prices = prices.iloc[-1].to_dict()
current_date = dates[-1]

opportunities = harvester.identify_harvest_opportunities(
    current_prices=current_prices,
    current_date=current_date,
    min_loss_pct=0.03
)

print(f"\nFound {len(opportunities)} harvest opportunities")

if opportunities:
    print("\nTop 5 Opportunities:")
    for opp in opportunities[:5]:
        print(f"  {opp['ticker']}: Loss ${opp['unrealized_loss']:.2f} ({opp['unrealized_loss_pct']:.1%}), "
              f"Tax Benefit ${opp['tax_benefit']:.2f}")

## 5. Backtesting

In [None]:
from backtester import Backtester

# Initialize backtester
backtester = Backtester(
    prices=prices,
    initial_capital=1_000_000,
    transaction_cost=0.001
)

# Use optimal weights from earlier
if result['status'] == 'optimal':
    # Filter to top 15 holdings
    top_weights = dict(sorted(result['weights'].items(), key=lambda x: x[1], reverse=True)[:15])
    total = sum(top_weights.values())
    weights = {k: v/total for k, v in top_weights.items()}
else:
    # Equal weight
    weights = {t: 1/15 for t in tickers[:15]}

# Run backtest
bt_result = backtester.run_backtest(
    initial_weights=weights,
    rebalance_freq='M',
    tax_aware=True
)

print(backtester.generate_report(bt_result))

In [None]:
# Plot performance
fig, axes = plt.subplots(2, 1, figsize=(12, 8), sharex=True)

# Portfolio value
axes[0].plot(bt_result.portfolio_values.index, bt_result.portfolio_values.values, linewidth=1.5)
axes[0].set_ylabel('Portfolio Value ($)')
axes[0].set_title('Portfolio Performance')
axes[0].grid(True, alpha=0.3)

# Drawdown
rolling_max = bt_result.portfolio_values.cummax()
drawdown = (bt_result.portfolio_values - rolling_max) / rolling_max

axes[1].fill_between(drawdown.index, drawdown.values, 0, alpha=0.5, color='red')
axes[1].set_ylabel('Drawdown')
axes[1].set_xlabel('Date')
axes[1].set_title('Underwater Chart')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 6. Monte Carlo Simulation

In [None]:
# Run Monte Carlo
mc_results = backtester.run_monte_carlo(
    initial_weights=weights,
    n_simulations=500,
    n_days=252
)

print("\n=== Monte Carlo Results (1 Year Forward) ===")
print(f"Expected Value: ${mc_results['mean_final_value']:,.0f}")
print(f"Median Value: ${mc_results['median_final_value']:,.0f}")
print(f"5th Percentile (VaR): ${mc_results['percentile_5']:,.0f}")
print(f"95th Percentile: ${mc_results['percentile_95']:,.0f}")
print(f"Probability of Loss: {mc_results['prob_loss']:.1%}")

## Next Steps

1. **Download Real Data**: Run `python main.py download` to get actual market data
2. **Full Pipeline**: Run `python main.py full` to execute the complete workflow
3. **Explore Modules**: Check out the individual module files in `src/` for more features

Happy investing!