## Summary

**Key Findings:**
1. Portfolio composition validated (long/short deciles)
2. Turnover characteristics measured
3. Sector neutrality compliance checked
4. Position limits validated

**Next Steps:**
- Proceed to `04_backtest_evaluation.ipynb` for full performance analysis
- Consider implementing sector neutrality optimization if violations detected
- Monitor turnover impact on transaction costs

In [None]:
# Calculate sector exposures
if 'sector' in portfolio_df.columns:
    sector_exposure = portfolio_df.groupby(['date', 'sector'])['weight'].sum().reset_index()
    sector_pivot = sector_exposure.pivot(index='date', columns='sector', values='weight').fillna(0)
    
    # Heatmap
    fig, ax = plt.subplots(figsize=(14, 8))
    sns.heatmap(sector_pivot.T, cmap='RdYlGn', center=0, cbar_kws={'label': 'Net Weight'}, ax=ax)
    ax.set_title('Sector Exposures Over Time (GICS L1)', fontsize=14, fontweight='bold')
    ax.set_xlabel('Date')
    ax.set_ylabel('Sector')
    plt.tight_layout()
    plt.show()
    
    # Check violations (>5% deviation)
    max_exposure = sector_pivot.abs().max()
    violations = max_exposure[max_exposure > 0.05]
    
    print("\nSector Neutrality Check:")
    print("="*50)
    print(f"Target: <5% exposure per sector")
    print(f"Violations: {len(violations)} sectors")
    if len(violations) > 0:
        print("\nSectors exceeding 5%:")
        print(violations)

## 3. Sector Neutrality Validation

In [None]:
# Calculate daily turnover
portfolio_sorted = portfolio_df.sort_values(['ticker', 'date'])
portfolio_sorted['prev_weight'] = portfolio_sorted.groupby('ticker')['weight'].shift(1)
portfolio_sorted['weight_change'] = (portfolio_sorted['weight'] - portfolio_sorted['prev_weight'].fillna(0)).abs()

daily_turnover = portfolio_sorted.groupby('date')['weight_change'].sum().reset_index()
daily_turnover.columns = ['date', 'turnover']

fig, axes = plt.subplots(2, 1, figsize=(14, 10))

# Daily turnover
axes[0].plot(daily_turnover['date'], daily_turnover['turnover'], linewidth=1, alpha=0.7)
axes[0].set_title('Daily Portfolio Turnover', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Turnover (Sum of |Weight Changes|)')
axes[0].grid(True, alpha=0.3)

# Histogram
axes[1].hist(daily_turnover['turnover'], bins=50, edgecolor='black', alpha=0.7, color='orange')
axes[1].set_title('Turnover Distribution', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Daily Turnover')
axes[1].set_ylabel('Frequency')
axes[1].axvline(daily_turnover['turnover'].mean(), color='red', linestyle='--', label=f"Mean: {daily_turnover['turnover'].mean():.2f}")
axes[1].legend()

plt.tight_layout()
plt.show()

print(f"Average daily turnover: {daily_turnover['turnover'].mean():.2%}")
print(f"Annualized turnover: {daily_turnover['turnover'].mean() * 252:.2%}")
print(f"High turnover strategy - transaction costs critical!")

## 2. Turnover Analysis

In [None]:
# Count long and short positions over time
position_counts = portfolio_df.groupby('date').apply(
    lambda x: pd.Series({
        'long': (x['weight'] > 0).sum(),
        'short': (x['weight'] < 0).sum(),
        'total': len(x)
    })
).reset_index()

fig, ax = plt.subplots(figsize=(14, 6))
ax.plot(position_counts['date'], position_counts['long'], label='Long Positions', linewidth=2, color='green')
ax.plot(position_counts['date'], position_counts['short'], label='Short Positions', linewidth=2, color='red')
ax.plot(position_counts['date'], position_counts['total'], label='Total Positions', linewidth=2, color='blue', linestyle='--')
ax.set_title('Portfolio Composition Over Time', fontsize=14, fontweight='bold')
ax.set_xlabel('Date')
ax.set_ylabel('Number of Positions')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print(f"Average long positions: {position_counts['long'].mean():.0f}")
print(f"Average short positions: {position_counts['short'].mean():.0f}")
print(f"Target: Top/Bottom 10% deciles")

## 1. Portfolio Composition Over Time

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yaml
import warnings
warnings.filterwarnings('ignore')

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

# Load config
with open('config.yaml', 'r') as f:
    config = yaml.safe_load(f)

# Load portfolio weights
portfolio_df = pd.read_csv('data/portfolios/portfolio_weights.csv')
portfolio_df['date'] = pd.to_datetime(portfolio_df['date'])
print(f"Portfolio data loaded: {len(portfolio_df):,} positions")

# Load market data
market_df = pd.read_csv('data/market/prices.csv')
market_df['date'] = pd.to_datetime(market_df['date'])

# Portfolio Construction Analysis

This notebook analyzes the portfolio construction process for the sentiment-based long-short strategy.

**Objectives:**
- Validate portfolio composition (long/short positions)
- Analyze turnover characteristics
- Verify sector neutrality constraints
- Confirm position limit compliance