# A Computational Implementation of the Black-Litterman Model: Mathematical Foundations and Practical Applications

The Black-Litterman Model Examples: Demonstrates various use cases and features

In [None]:
!pip install nbimporter




In [None]:
import nbimporter
import numpy as np
import matplotlib.pyplot as plt
"""
Black-Litterman Model Implementation
Based on Meucci (2008): "The Black-Litterman Approach: Original Model and Extensions"
https://papers.ssrn.com/sol3/papers.cfm?abstract_id=1117574

This implementation follows the market formulation (Section 3) which is more intuitive
and handles null-confidence and full-confidence limits correctly.
"""

import numpy as np
from typing import Optional, Tuple, Union
import warnings


class BlackLittermanModel:
    """
    Black-Litterman portfolio optimization model.

    This class implements both the original BL formulation (Section 2) and
    the market-based formulation (Section 3) from Meucci's paper.

    The market formulation is recommended as it:
    1. Eliminates the need for the tau parameter in posterior calculations
    2. Correctly handles limiting cases (null and full confidence)
    3. Integrates naturally with scenario analysis

    Attributes:
        pi (np.ndarray): Prior expected returns (equilibrium or reference)
        sigma (np.ndarray): Covariance matrix of asset returns
        tau (float): Uncertainty scalar for prior (only used in original formulation)
        use_market_formulation (bool): If True, uses market-based BL (recommended)
    """

    def __init__(
        self,
        pi: np.ndarray,
        sigma: np.ndarray,
        tau: float = 0.025,
        use_market_formulation: bool = True
    ):
        """
        Initialize the Black-Litterman model.

        Args:
            pi: Prior expected returns (N x 1 array or N-length vector)
                Typically derived from equilibrium (CAPM) as pi = 2*lambda*Sigma*w_eq
            sigma: Covariance matrix of returns (N x N)
            tau: Scalar representing uncertainty in prior (typically 0.01 to 0.05)
                 Meucci suggests tau ≈ 1/T where T is the time series length
                 Only used in original formulation
            use_market_formulation: If True, uses market-based formulation (Section 3)
                                   If False, uses original formulation (Section 2)
        """
        # Convert to numpy arrays and validate inputs
        self.pi = np.asarray(pi).flatten()
        self.sigma = np.asarray(sigma)
        self.tau = tau
        self.use_market_formulation = use_market_formulation

        # Validate dimensions
        n_assets = len(self.pi)
        if self.sigma.shape != (n_assets, n_assets):
            raise ValueError(
                f"Sigma shape {self.sigma.shape} incompatible with pi length {n_assets}"
            )

        # Check if sigma is symmetric positive definite
        if not np.allclose(self.sigma, self.sigma.T):
            warnings.warn("Covariance matrix is not symmetric. Symmetrizing.")
            self.sigma = (self.sigma + self.sigma.T) / 2

        # Check positive definiteness
        eigenvalues = np.linalg.eigvalsh(self.sigma)
        if np.any(eigenvalues <= 0):
            warnings.warn(
                f"Covariance matrix has non-positive eigenvalues: {eigenvalues[eigenvalues <= 0]}"
            )

    @staticmethod
    def compute_equilibrium_returns(
        w_eq: np.ndarray,
        sigma: np.ndarray,
        risk_aversion: float = 2.5
    ) -> np.ndarray:
        """
        Compute equilibrium expected returns from market weights.

        Based on equation (5) in Meucci:
        π = 2λΣw_eq

        where λ is the market risk aversion parameter.

        Args:
            w_eq: Market equilibrium weights (N x 1 or N-length)
            sigma: Covariance matrix (N x N)
            risk_aversion: Market risk aversion parameter (λ)
                          Typical values: 2-3 for equity markets
                          Black-Litterman suggest λ ≈ 2.5

        Returns:
            Equilibrium expected returns (N x 1 array)
        """
        w_eq = np.asarray(w_eq).flatten()
        sigma = np.asarray(sigma)

        # Equation (5): π = 2λΣw_eq
        pi = 2 * risk_aversion * sigma @ w_eq

        return pi

    def compute_posterior_original(
        self,
        P: np.ndarray,
        Q: np.ndarray,
        Omega: Optional[np.ndarray] = None,
        confidence: float = 1.0,
        relative_confidence: Optional[np.ndarray] = None
    ) -> Tuple[np.ndarray, np.ndarray]:
        """
        Compute posterior distribution using ORIGINAL Black-Litterman formulation.

        This implements equations (20) and (21) from Section 2:

        μ_BL = π + τΣP'(τPΣP' + Ω)^(-1)(Q - Pπ)
        Σ_BL = (1+τ)Σ - τ²ΣP'(τPΣP' + Ω)^(-1)PΣ

        Args:
            P: View pick matrix (K x N) where K = number of views
               Each row defines a portfolio for one view
            Q: View expected returns (K x 1 or K-length)
               Expected returns on the view portfolios
            Omega: Uncertainty matrix for views (K x K)
                   If None, computed from equation (12) or (13)
            confidence: Overall confidence level c in equations (12-13)
                       Higher values = more confident in views
            relative_confidence: Relative confidence for each view (K-length)
                                Used in equation (13) as vector u

        Returns:
            mu_bl: Posterior expected returns (N x 1)
            sigma_bl: Posterior covariance matrix (N x N)
        """
        # Convert inputs to numpy arrays
        P = np.asarray(P)
        Q = np.asarray(Q).flatten()

        K, N = P.shape
        if len(Q) != K:
            raise ValueError(f"Q length {len(Q)} must match P rows {K}")
        if N != len(self.pi):
            raise ValueError(f"P columns {N} must match asset count {len(self.pi)}")

        # Compute Omega if not provided
        if Omega is None:
            Omega = self._compute_omega(P, confidence, relative_confidence)
        else:
            Omega = np.asarray(Omega)
            # Handle scalar Omega for single view
            if Omega.ndim == 0 and K == 1:
                Omega = Omega.reshape((1, 1))
            elif Omega.shape != (K, K):
                raise ValueError(f"Omega shape {Omega.shape} must be ({K}, {K})")

        # Compute intermediate term: τPΣP' + Ω
        tau_P_sigma_Pt = self.tau * P @ self.sigma @ P.T
        M = tau_P_sigma_Pt + Omega

        # Compute M^(-1) using stable inversion
        try:
            M_inv = np.linalg.inv(M)
        except np.linalg.LinAlgError:
            warnings.warn("Matrix M is singular, using pseudo-inverse")
            M_inv = np.linalg.pinv(M)

        # Equation (20): μ_BL = π + τΣP'(τPΣP' + Ω)^(-1)(Q - Pπ)
        view_adjustment = Q - P @ self.pi
        mu_bl = self.pi + self.tau * self.sigma @ P.T @ M_inv @ view_adjustment

        # Equation (21): Σ_BL = (1+τ)Σ - τ²ΣP'(τPΣP' + Ω)^(-1)PΣ
        sigma_bl = (1 + self.tau) * self.sigma
        sigma_bl -= self.tau**2 * self.sigma @ P.T @ M_inv @ P @ self.sigma

        return mu_bl, sigma_bl

    def compute_posterior_market(
        self,
        P: np.ndarray,
        Q: np.ndarray,
        Omega: Optional[np.ndarray] = None,
        confidence: float = 1.0,
        relative_confidence: Optional[np.ndarray] = None
    ) -> Tuple[np.ndarray, np.ndarray]:
        """
        Compute posterior distribution using MARKET-BASED formulation.

        This implements equations (32) and (33) from Section 3:

        μ_BL^m = π + ΣP'(PΣP' + Ω)^(-1)(Q - Pπ)
        Σ_BL^m = Σ - ΣP'(PΣP' + Ω)^(-1)PΣ

        This formulation:
        - Does not require tau in posterior computation
        - Correctly reduces to prior when Ω → ∞ (no confidence)
        - Correctly reduces to conditional when Ω → 0 (full confidence)
        - Integrates naturally with scenario analysis

        Args:
            P: View pick matrix (K x N)
            Q: View expected returns (K x 1 or K-length)
            Omega: Uncertainty matrix for views (K x K)
            confidence: Overall confidence level
            relative_confidence: Relative confidence for each view

        Returns:
            mu_bl: Posterior expected returns (N x 1)
            sigma_bl: Posterior covariance matrix (N x N)
        """
        # Convert inputs to numpy arrays
        P = np.asarray(P)
        Q = np.asarray(Q).flatten()

        K, N = P.shape
        if len(Q) != K:
            raise ValueError(f"Q length {len(Q)} must match P rows {K}")
        if N != len(self.pi):
            raise ValueError(f"P columns {N} must match asset count {len(self.pi)}")

        # Compute Omega if not provided
        if Omega is None:
            Omega = self._compute_omega(P, confidence, relative_confidence)
        else:
            Omega = np.asarray(Omega)
            # Handle scalar Omega for single view
            if Omega.ndim == 0 and K == 1:
                Omega = Omega.reshape((1, 1))
            elif Omega.shape != (K, K):
                raise ValueError(f"Omega shape {Omega.shape} must be ({K}, {K})")

        # Compute intermediate term: PΣP' + Ω
        P_sigma_Pt = P @ self.sigma @ P.T
        M = P_sigma_Pt + Omega

        # Compute M^(-1) using stable inversion
        try:
            M_inv = np.linalg.inv(M)
        except np.linalg.LinAlgError:
            warnings.warn("Matrix M is singular, using pseudo-inverse")
            M_inv = np.linalg.pinv(M)

        # Equation (32): μ_BL^m = π + ΣP'(PΣP' + Ω)^(-1)(Q - Pπ)
        view_adjustment = Q - P @ self.pi
        mu_bl = self.pi + self.sigma @ P.T @ M_inv @ view_adjustment

        # Equation (33): Σ_BL^m = Σ - ΣP'(PΣP' + Ω)^(-1)PΣ
        sigma_bl = self.sigma - self.sigma @ P.T @ M_inv @ P @ self.sigma

        return mu_bl, sigma_bl

    def compute_posterior(
        self,
        P: np.ndarray,
        Q: np.ndarray,
        Omega: Optional[np.ndarray] = None,
        confidence: float = 1.0,
        relative_confidence: Optional[np.ndarray] = None
    ) -> Tuple[np.ndarray, np.ndarray]:
        """
        Compute posterior distribution using the selected formulation.

        This is the main interface method that delegates to either the original
        or market-based formulation based on initialization.

        Args:
            P: View pick matrix (K x N)
               Example: [[0, 1, 0, 0]] for a view on asset 2
                       [[1, -1, 0, 0]] for a view that asset 1 outperforms asset 2
            Q: View expected returns (K x 1 or K-length)
               Example: [0.05] for 5% expected return on the view
            Omega: Uncertainty matrix for views (K x K)
                   If None, computed automatically
            confidence: Overall confidence level (higher = more confident)
                       Typical range: 0.1 to 10
            relative_confidence: Relative confidence for each view (K-length)
                                Can be used to express different confidence levels

        Returns:
            mu_bl: Posterior expected returns (N x 1)
            sigma_bl: Posterior covariance matrix (N x N)

        Example:
            >>> # Single view: Asset 2 will return 5%
            >>> P = np.array([[0, 1, 0, 0]])
            >>> Q = np.array([0.05])
            >>> mu_bl, sigma_bl = bl.compute_posterior(P, Q, confidence=2.0)
        """
        if self.use_market_formulation:
            return self.compute_posterior_market(
                P, Q, Omega, confidence, relative_confidence
            )
        else:
            return self.compute_posterior_original(
                P, Q, Omega, confidence, relative_confidence
            )

    def _compute_omega(
        self,
        P: np.ndarray,
        confidence: float = 1.0,
        relative_confidence: Optional[np.ndarray] = None
    ) -> np.ndarray:
        """
        Compute the view uncertainty matrix Omega.

        Implements equations (12) and (13) from the paper:

        Simple version (12): Ω = (1/c) * PΣP'

        Advanced version (13): Ω = (1/c) * diag(u) * PΣP' * diag(u)

        where c is the overall confidence and u is the relative confidence vector.

        Args:
            P: View pick matrix (K x N)
            confidence: Overall confidence parameter c
            relative_confidence: Relative confidence vector u (K-length)

        Returns:
            Omega: View uncertainty matrix (K x K)
        """
        # Compute base uncertainty from market volatilities
        P_sigma_Pt = P @ self.sigma @ P.T

        if relative_confidence is not None:
            # Equation (13): Ω = (1/c) * diag(u) * PΣP' * diag(u)
            u = np.asarray(relative_confidence).flatten()
            if len(u) != P.shape[0]:
                raise ValueError(
                    f"relative_confidence length {len(u)} must match views {P.shape[0]}"
                )
            u_diag = np.diag(u)
            Omega = (1 / confidence) * u_diag @ P_sigma_Pt @ u_diag
        else:
            # Equation (12): Ω = (1/c) * PΣP'
            Omega = (1 / confidence) * P_sigma_Pt

        return Omega

    def set_qualitative_views(
        self,
        P: np.ndarray,
        view_types: list,
        alpha: float = 1.0,
        beta: float = 2.0
    ) -> np.ndarray:
        """
        Convert qualitative views to quantitative Q vector.

        Implements equation (11) from the paper:
        Q_k = (Pπ)_k + η_k * sqrt((PΣP')_{k,k})

        where η ∈ {-β, -α, +α, +β} for:
        - "very bearish" (-β)
        - "bearish" (-α)
        - "bullish" (+α)
        - "very bullish" (+β)

        Args:
            P: View pick matrix (K x N)
            view_types: List of strings from:
                       ['very_bearish', 'bearish', 'bullish', 'very_bullish']
            alpha: Parameter for bullish/bearish (typically 1.0)
            beta: Parameter for very bullish/bearish (typically 2.0)

        Returns:
            Q: Quantitative view vector (K x 1)

        Example:
            >>> P = np.array([[1, -1, 0], [0, 1, 0]])
            >>> types = ['bullish', 'very_bullish']
            >>> Q = bl.set_qualitative_views(P, types)
        """
        P = np.asarray(P)
        K = P.shape[0]

        if len(view_types) != K:
            raise ValueError(
                f"view_types length {len(view_types)} must match views {K}"
            )

        # Map view types to eta values
        eta_map = {
            'very_bearish': -beta,
            'bearish': -alpha,
            'bullish': alpha,
            'very_bullish': beta
        }

        # Compute base expectations from prior
        P_pi = P @ self.pi

        # Compute volatility of view portfolios
        P_sigma_Pt = P @ self.sigma @ P.T
        view_volatilities = np.sqrt(np.diag(P_sigma_Pt))

        # Equation (11): Q_k = (Pπ)_k + η_k * sqrt((PΣP')_{k,k})
        Q = np.zeros(K)
        for k, view_type in enumerate(view_types):
            if view_type not in eta_map:
                raise ValueError(
                    f"view_type '{view_type}' not recognized. "
                    f"Use one of: {list(eta_map.keys())}"
                )
            eta = eta_map[view_type]
            Q[k] = P_pi[k] + eta * view_volatilities[k]

        return Q

    def validate_posterior(
        self,
        sigma_bl: np.ndarray,
        check_psd: bool = True,
        tolerance: float = 1e-8
    ) -> dict:
        """
        Validate the posterior covariance matrix.

        Checks:
        1. Symmetry
        2. Positive semi-definiteness
        3. Reasonable conditioning

        Args:
            sigma_bl: Posterior covariance matrix
            check_psd: If True, check positive semi-definiteness
            tolerance: Numerical tolerance for checks

        Returns:
            Dictionary with validation results
        """
        results = {
            'is_symmetric': False,
            'is_psd': None,
            'min_eigenvalue': None,
            'condition_number': None,
            'warnings': []
        }

        # Check symmetry
        if np.allclose(sigma_bl, sigma_bl.T, atol=tolerance):
            results['is_symmetric'] = True
        else:
            results['warnings'].append("Covariance matrix is not symmetric")

        if check_psd:
            # Check positive semi-definiteness
            eigenvalues = np.linalg.eigvalsh(sigma_bl)
            results['min_eigenvalue'] = np.min(eigenvalues)

            if np.all(eigenvalues >= -tolerance):
                results['is_psd'] = True
            else:
                results['is_psd'] = False
                results['warnings'].append(
                    f"Covariance has negative eigenvalues: min = {results['min_eigenvalue']}"
                )

            # Check condition number
            max_eig = np.max(eigenvalues)
            if results['min_eigenvalue'] > tolerance:
                results['condition_number'] = max_eig / results['min_eigenvalue']
                if results['condition_number'] > 1e10:
                    results['warnings'].append(
                        f"Poor conditioning: {results['condition_number']:.2e}"
                    )

        return results


def create_view_matrix(
    n_assets: int,
    absolute_views: Optional[dict] = None,
    relative_views: Optional[list] = None
) -> np.ndarray:
    """
    Helper function to create the P matrix from intuitive view specifications.

    Args:
        n_assets: Total number of assets
        absolute_views: Dict mapping asset indices to weights
                       Example: {1: 1.0} for a view on asset 1
        relative_views: List of (asset_i, asset_j, weight_i, weight_j) tuples
                       Example: [(0, 1, 1.0, -1.0)] for asset 0 outperforms asset 1

    Returns:
        P matrix (K x N) where K is total number of views

    Example:
        >>> # View 1: Asset 2 will return X%
        >>> # View 2: Asset 0 will outperform Asset 1
        >>> P = create_view_matrix(
        ...     n_assets=4,
        ...     absolute_views={2: 1.0},
        ...     relative_views=[(0, 1, 1.0, -1.0)]
        ... )
    """
    views = []

    # Add absolute views
    if absolute_views:
        for asset_idx, weight in absolute_views.items():
            if asset_idx >= n_assets or asset_idx < 0:
                raise ValueError(f"Asset index {asset_idx} out of range [0, {n_assets})")
            view = np.zeros(n_assets)
            view[asset_idx] = weight
            views.append(view)

    # Add relative views
    if relative_views:
        for asset_i, asset_j, weight_i, weight_j in relative_views:
            if asset_i >= n_assets or asset_i < 0:
                raise ValueError(f"Asset index {asset_i} out of range [0, {n_assets})")
            if asset_j >= n_assets or asset_j < 0:
                raise ValueError(f"Asset index {asset_j} out of range [0, {n_assets})")
            view = np.zeros(n_assets)
            view[asset_i] = weight_i
            view[asset_j] = weight_j
            views.append(view)

    if not views:
        raise ValueError("No views specified")

    return np.array(views)




def example_1_basic_usage():
    """
    Example 1: Basic usage with absolute and relative views
    """
    print("\n" + "=" * 80)
    print("EXAMPLE 1: Basic Black-Litterman Usage")
    print("=" * 80)

    # Simple 4-asset portfolio
    n_assets = 4
    asset_names = ['Stocks', 'Bonds', 'Gold', 'Real Estate']

    # Prior expected returns (could be from equilibrium or other model)
    pi = np.array([0.08, 0.04, 0.05, 0.06])

    # Covariance matrix (annualized)
    sigma = np.array([
        [0.040, 0.008, 0.002, 0.010],
        [0.008, 0.010, 0.001, 0.004],
        [0.002, 0.001, 0.015, 0.002],
        [0.010, 0.004, 0.002, 0.025]
    ])

    # Initialize model (using market formulation)
    bl = BlackLittermanModel(pi, sigma, use_market_formulation=True)

    # Express views:
    # View 1: Stocks will return 10%
    # View 2: Bonds will outperform Gold by 2%
    P = create_view_matrix(
        n_assets=n_assets,
        absolute_views={0: 1.0},  # Stocks
        relative_views=[(1, 2, 1.0, -1.0)]  # Bonds - Gold
    )
    Q = np.array([0.10, 0.02])

    print("\nPrior Expected Returns:")
    for i, name in enumerate(asset_names):
        print(f"  {name:15s}: {pi[i]*100:6.2f}%")

    print("\nViews:")
    print("  1. Stocks will return 10.0%")
    print("  2. Bonds will outperform Gold by 2.0%")

    # Compute posterior with different confidence levels
    for conf in [0.5, 1.0, 2.0, 5.0]:
        mu_bl, sigma_bl = bl.compute_posterior(P, Q, confidence=conf)
        print(f"\nPosterior Returns (confidence = {conf}):")
        for i, name in enumerate(asset_names):
            print(f"  {name:15s}: {mu_bl[i]*100:6.2f}%")


def example_2_qualitative_views():
    """
    Example 2: Using qualitative views (bullish/bearish)
    """
    print("\n" + "=" * 80)
    print("EXAMPLE 2: Qualitative Views")
    print("=" * 80)

    # 3-asset portfolio
    n_assets = 3
    asset_names = ['Tech', 'Energy', 'Finance']

    pi = np.array([0.12, 0.08, 0.10])
    sigma = np.array([
        [0.09, 0.02, 0.03],
        [0.02, 0.06, 0.02],
        [0.03, 0.02, 0.07]
    ])

    bl = BlackLittermanModel(pi, sigma, use_market_formulation=True)

    # Express qualitative views
    # View 1: Very bullish on Tech
    # View 2: Bearish on Energy
    # View 3: Tech will outperform Finance (bullish on spread)
    P = create_view_matrix(
        n_assets=n_assets,
        absolute_views={0: 1.0, 1: 1.0},  # Tech, Energy
        relative_views=[(0, 2, 1.0, -1.0)]  # Tech - Finance
    )

    view_types = ['very_bullish', 'bearish', 'bullish']
    Q = bl.set_qualitative_views(P, view_types, alpha=1.0, beta=2.0)

    print("\nPrior Expected Returns:")
    for i, name in enumerate(asset_names):
        print(f"  {name:15s}: {pi[i]*100:6.2f}%")

    print("\nQualitative Views:")
    print("  1. Very bullish on Tech")
    print("  2. Bearish on Energy")
    print("  3. Bullish: Tech will outperform Finance")

    print(f"\nQuantified Views (Q vector): {Q*100}%")

    mu_bl, sigma_bl = bl.compute_posterior(P, Q, confidence=1.5)

    print("\nPosterior Expected Returns:")
    for i, name in enumerate(asset_names):
        print(f"  {name:15s}: {mu_bl[i]*100:6.2f}%")


def example_3_scenario_analysis():
    """
    Example 3: Scenario analysis (full confidence limit)
    """
    print("\n" + "=" * 80)
    print("EXAMPLE 3: Scenario Analysis (Full Confidence)")
    print("=" * 80)

    # 3-asset portfolio
    asset_names = ['Asset A', 'Asset B', 'Asset C']

    pi = np.array([0.07, 0.05, 0.06])
    sigma = np.array([
        [0.04, 0.01, 0.01],
        [0.01, 0.03, 0.01],
        [0.01, 0.01, 0.05]
    ])

    bl = BlackLittermanModel(pi, sigma, use_market_formulation=True)

    # Scenario: Asset A returns 10% and Asset B returns 4%
    P = create_view_matrix(
        n_assets=3,
        absolute_views={0: 1.0, 1: 1.0}
    )
    Q = np.array([0.10, 0.04])

    print("\nPrior Expected Returns:")
    for i, name in enumerate(asset_names):
        print(f"  {name:15s}: {pi[i]*100:6.2f}%")

    print("\nScenario (Full Confidence):")
    print("  Asset A: 10.0%")
    print("  Asset B:  4.0%")

    # Very high confidence approximates scenario analysis
    mu_bl, sigma_bl = bl.compute_posterior(P, Q, confidence=1000.0)

    print("\nPosterior Expected Returns:")
    for i, name in enumerate(asset_names):
        print(f"  {name:15s}: {mu_bl[i]*100:6.2f}%")

    print("\nNote: With very high confidence, Asset A ≈ 10% and Asset B ≈ 4%")
    print("      Asset C is adjusted through correlation structure")


def example_4_confidence_sensitivity():
    """
    Example 4: Sensitivity to confidence levels
    """
    print("\n" + "=" * 80)
    print("EXAMPLE 4: Confidence Level Sensitivity")
    print("=" * 80)

    # Simple 2-asset case for clarity
    asset_names = ['Asset 1', 'Asset 2']

    pi = np.array([0.08, 0.06])
    sigma = np.array([
        [0.04, 0.01],
        [0.01, 0.03]
    ])

    bl = BlackLittermanModel(pi, sigma, use_market_formulation=True)

    # View: Asset 1 will return 12%
    P = np.array([[1, 0]])
    Q = np.array([0.12])

    print("\nPrior: Asset 1 = 8%, Asset 2 = 6%")
    print("View:  Asset 1 = 12%")
    print("\nPosterior Asset 1 returns at different confidence levels:")

    confidences = [0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 100.0]
    results = []

    for conf in confidences:
        mu_bl, _ = bl.compute_posterior(P, Q, confidence=conf)
        results.append(mu_bl[0])
        print(f"  Confidence = {conf:6.1f}: {mu_bl[0]*100:6.3f}%")

    print("\nObservation:")
    print("  - Low confidence  → stays close to prior (8%)")
    print("  - High confidence → approaches view (12%)")


def example_5_formulation_comparison():
    """
    Example 5: Compare original vs market formulation
    """
    print("\n" + "=" * 80)
    print("EXAMPLE 5: Original vs Market Formulation")
    print("=" * 80)

    # 3-asset portfolio
    pi = np.array([0.08, 0.06, 0.07])
    sigma = np.array([
        [0.04, 0.01, 0.01],
        [0.01, 0.03, 0.01],
        [0.01, 0.01, 0.05]
    ])

    # View: Asset 1 will return 10%
    P = np.array([[1, 0, 0]])
    Q = np.array([0.10])

    print("\nView: Asset 1 will return 10%")
    print(f"Prior: {pi*100}%\n")

    # Original formulation
    bl_orig = BlackLittermanModel(pi, sigma, tau=0.025, use_market_formulation=False)
    mu_orig, sigma_orig = bl_orig.compute_posterior(P, Q, confidence=1.0)

    # Market formulation
    bl_mkt = BlackLittermanModel(pi, sigma, use_market_formulation=True)
    mu_mkt, sigma_mkt = bl_mkt.compute_posterior(P, Q, confidence=1.0)

    print("Original Formulation (with tau=0.025):")
    print(f"  Expected Returns: {mu_orig*100}%")
    print(f"  Posterior Variance (Asset 1): {sigma_orig[0,0]*100:.4f}%²")

    print("\nMarket Formulation (no tau needed):")
    print(f"  Expected Returns: {mu_mkt*100}%")
    print(f"  Posterior Variance (Asset 1): {sigma_mkt[0,0]*100:.4f}%²")

    print("\nDifference:")
    print(f"  Expected Returns: {(mu_orig - mu_mkt)*100}%")
    print(f"  Variance: {(sigma_orig[0,0] - sigma_mkt[0,0])*100:.4f}%²")

    print("\nNote: Market formulation is cleaner (no tau parameter in posterior)")
    print("      and has better limiting behavior for scenario analysis")


def example_6_equilibrium_returns():
    """
    Example 6: Computing equilibrium returns from market weights
    """
    print("\n" + "=" * 80)
    print("EXAMPLE 6: Equilibrium Returns from Market Capitalization")
    print("=" * 80)

    # Example: Global equity markets
    asset_names = ['US', 'Europe', 'Japan', 'Emerging']

    # Market cap weights (example)
    w_eq = np.array([0.55, 0.25, 0.10, 0.10])

    # Historical covariance (example, annualized)
    sigma = np.array([
        [0.040, 0.025, 0.020, 0.030],
        [0.025, 0.035, 0.018, 0.028],
        [0.020, 0.018, 0.045, 0.025],
        [0.030, 0.028, 0.025, 0.060]
    ])

    print("\nMarket Capitalization Weights:")
    for i, name in enumerate(asset_names):
        print(f"  {name:15s}: {w_eq[i]*100:5.1f}%")

    # Compute equilibrium returns for different risk aversion levels
    print("\nImplied Equilibrium Returns:")

    for risk_aversion in [1.5, 2.5, 3.5]:
        pi = BlackLittermanModel.compute_equilibrium_returns(
            w_eq, sigma, risk_aversion
        )
        print(f"\n  Risk Aversion λ = {risk_aversion}:")
        for i, name in enumerate(asset_names):
            print(f"    {name:15s}: {pi[i]*100:6.2f}%")

    print("\nNote: Higher risk aversion → higher implied returns")
    print("      (investors require more return to hold risky assets)")


def example_7_relative_confidence():
    """
    Example 7: Using relative confidence levels for different views
    """
    print("\n" + "=" * 80)
    print("EXAMPLE 7: Relative Confidence Levels")
    print("=" * 80)

    # 3-asset portfolio
    asset_names = ['Stock A', 'Stock B', 'Stock C']

    pi = np.array([0.08, 0.07, 0.06])
    sigma = np.array([
        [0.04, 0.01, 0.01],
        [0.01, 0.03, 0.01],
        [0.01, 0.01, 0.05]
    ])

    bl = BlackLittermanModel(pi, sigma, use_market_formulation=True)

    # Two views:
    # View 1: Stock A will return 12% (HIGH confidence)
    # View 2: Stock B will return 9%  (LOW confidence)
    P = create_view_matrix(
        n_assets=3,
        absolute_views={0: 1.0, 1: 1.0}
    )
    Q = np.array([0.12, 0.09])

    print("\nPrior Expected Returns:")
    for i, name in enumerate(asset_names):
        print(f"  {name:15s}: {pi[i]*100:6.2f}%")

    print("\nViews:")
    print("  1. Stock A = 12% (HIGH confidence)")
    print("  2. Stock B =  9% (LOW confidence)")

    # Equal confidence
    print("\n--- Equal Confidence ---")
    mu_equal, _ = bl.compute_posterior(P, Q, confidence=2.0)
    for i, name in enumerate(asset_names):
        print(f"  {name:15s}: {mu_equal[i]*100:6.2f}%")

    # Relative confidence: View 1 is 3x more confident than View 2
    print("\n--- Relative Confidence (3:1) ---")
    relative_conf = np.array([1.0, 3.0])  # Higher value = LESS confident
    mu_relative, _ = bl.compute_posterior(
        P, Q, confidence=2.0, relative_confidence=relative_conf
    )
    for i, name in enumerate(asset_names):
        print(f"  {name:15s}: {mu_relative[i]*100:6.2f}%")

    print("\nObservation:")
    print("  Stock A moves more toward 12% (higher confidence)")
    print("  Stock B moves less toward 9% (lower confidence)")


if __name__ == "__main__":
    # Run all examples
    example_1_basic_usage()
    example_2_qualitative_views()
    example_3_scenario_analysis()
    example_4_confidence_sensitivity()
    example_5_formulation_comparison()
    example_6_equilibrium_returns()
    example_7_relative_confidence()

    print("\n" + "=" * 80)
    print("All examples completed successfully!")
    print("=" * 80 + "\n")



EXAMPLE 1: Basic Black-Litterman Usage

Prior Expected Returns:
  Stocks         :   8.00%
  Bonds          :   4.00%
  Gold           :   5.00%
  Real Estate    :   6.00%

Views:
  1. Stocks will return 10.0%
  2. Bonds will outperform Gold by 2.0%

Posterior Returns (confidence = 0.5):
  Stocks         :   8.67%
  Bonds          :   4.45%
  Gold           :   4.45%
  Real Estate    :   6.19%

Posterior Returns (confidence = 1.0):
  Stocks         :   9.00%
  Bonds          :   4.68%
  Gold           :   4.18%
  Real Estate    :   6.28%

Posterior Returns (confidence = 2.0):
  Stocks         :   9.33%
  Bonds          :   4.90%
  Gold           :   3.90%
  Real Estate    :   6.37%

Posterior Returns (confidence = 5.0):
  Stocks         :   9.67%
  Bonds          :   5.13%
  Gold           :   3.63%
  Real Estate    :   6.47%

EXAMPLE 2: Qualitative Views

Prior Expected Returns:
  Tech           :  12.00%
  Energy         :   8.00%
  Finance        :  10.00%

Qualitative Views:
  1. 

In [None]:
!ls -l

total 4
drwxr-xr-x 1 root root 4096 Jan 16 14:24 sample_data




---





---



In [None]:
"""
========================================================================================
COMPLETE BLACK-LITTERMAN + FINBERT INTEGRATION - FINAL VERSION
Single-File Google Colab Ready Implementation
========================================================================================



Features:
- Complete Black-Litterman model (Meucci 2008)
- FinBERT sentiment analysis integration
- Live NewsAPI data fetching
- ViewGenerator (sentiment → Q, Ω calibration)
- Portfolio optimization
- Results export to Excel
- DEMO mode for testing without API

========================================================================================
"""

# ============================================================================
# PART 1: IMPORTS & SETUP
# ============================================================================

import numpy as np
import pandas as pd
import warnings
import logging
from dataclasses import dataclass
from typing import Dict, List, Tuple, Optional
from datetime import datetime, timedelta

# Check and install dependencies for Google Colab
try:
    import yfinance as yf
except ImportError:
    print("Installing yfinance...")
    import sys
    import subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "yfinance", "-q"])
    import yfinance as yf

try:
    from newsapi import NewsApiClient
except ImportError:
    print("Installing newsapi-python...")
    import sys
    import subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "newsapi-python", "-q"])
    from newsapi import NewsApiClient

try:
    from transformers import BertTokenizer, BertForSequenceClassification
    import torch
except ImportError:
    print("Installing transformers and torch...")
    import sys
    import subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "transformers", "torch", "-q"])
    from transformers import BertTokenizer, BertForSequenceClassification
    import torch

# Suppress warnings
warnings.filterwarnings('ignore')

# Logging setup
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
logger = logging.getLogger(__name__)


# ============================================================================
# PART 2: DATA STRUCTURES
# ============================================================================

@dataclass
class SentimentData:
    """Container for sentiment analysis results."""
    ticker: str
    sentiment_mean: float
    sentiment_std: float
    news_count: int
    raw_scores: List[float]


@dataclass
class BlackLittermanView:
    """Black-Litterman view specification (P, Q, Ω)."""
    P: np.ndarray
    Q: np.ndarray
    Omega: np.ndarray
    metadata: Dict


# ============================================================================
# PART 3: BLACK-LITTERMAN MODEL
# ============================================================================

class BlackLittermanModel:
    """Black-Litterman portfolio optimization (Meucci 2008)."""

    def __init__(self, pi: np.ndarray, sigma: np.ndarray):
        """Initialize Black-Litterman model."""
        self.pi = np.asarray(pi).flatten()
        self.sigma = np.asarray(sigma)

        n_assets = len(self.pi)
        if self.sigma.shape != (n_assets, n_assets):
            raise ValueError(f"Sigma shape mismatch")

        if not np.allclose(self.sigma, self.sigma.T):
            self.sigma = (self.sigma + self.sigma.T) / 2

    @staticmethod
    def compute_equilibrium_returns(w_eq: np.ndarray, sigma: np.ndarray,
                                    risk_aversion: float = 2.5) -> np.ndarray:
        """Compute equilibrium returns: π = 2λΣw_eq"""
        return 2 * risk_aversion * sigma @ np.asarray(w_eq).flatten()

    def compute_posterior(self, P: np.ndarray, Q: np.ndarray,
                         Omega: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
        """
        Compute Black-Litterman posterior (Meucci 2008, Eq 32-33).

        μ_BL = π + ΣP'(PΣP' + Ω)^(-1)(Q - Pπ)
        Σ_BL = Σ - ΣP'(PΣP' + Ω)^(-1)PΣ
        """
        P = np.asarray(P)
        Q = np.asarray(Q).flatten()
        Omega = np.asarray(Omega)

        K, N = P.shape
        if Omega.ndim == 0 and K == 1:
            Omega = Omega.reshape((1, 1))

        # M = PΣP' + Ω
        M = P @ self.sigma @ P.T + Omega

        # Stable inversion
        try:
            M_inv = np.linalg.inv(M)
        except:
            M_inv = np.linalg.pinv(M)

        # Posterior mean
        mu_bl = self.pi + self.sigma @ P.T @ M_inv @ (Q - P @ self.pi)

        # Posterior covariance
        sigma_bl = self.sigma - self.sigma @ P.T @ M_inv @ P @ self.sigma

        return mu_bl, sigma_bl


# ============================================================================
# PART 4: VIEW GENERATOR
# ============================================================================

class ViewGenerator:
    """Generate Black-Litterman views from FinBERT sentiment."""

    def __init__(self, tickers: List[str], volatilities: np.ndarray,
                 sentiment_scaling: float = 0.02,
                 base_uncertainty: float = 0.0001,
                 news_volume_weight: float = 0.5,
                 consistency_weight: float = 0.5,
                 min_news_count: int = 3):
        """Initialize ViewGenerator."""
        self.tickers = tickers
        self.volatilities = np.asarray(volatilities)
        self.N = len(tickers)
        self.sentiment_scaling = sentiment_scaling
        self.base_uncertainty = base_uncertainty
        self.news_volume_weight = news_volume_weight
        self.consistency_weight = consistency_weight
        self.min_news_count = min_news_count
        self.ticker_to_idx = {t: i for i, t in enumerate(tickers)}

    def generate_views(self, sentiment_data: Dict[str, SentimentData],
                      prior_returns: np.ndarray,
                      filter_weak_signals: bool = True,
                      min_abs_sentiment: float = 0.1) -> BlackLittermanView:
        """Generate Black-Litterman views from sentiment data."""

        # Filter valid views
        valid_views = []
        for ticker, data in sentiment_data.items():
            if ticker not in self.ticker_to_idx:
                continue
            if data.news_count < self.min_news_count:
                continue
            if filter_weak_signals and abs(data.sentiment_mean) < min_abs_sentiment:
                continue
            valid_views.append(data)

        if not valid_views:
            # Null view
            P = np.zeros((1, self.N))
            P[0, 0] = 1.0
            Q = np.zeros(1)
            Omega = np.eye(1) * 1e10
            metadata = {'null_view': True, 'tickers': []}
            return BlackLittermanView(P, Q, Omega, metadata)

        K = len(valid_views)

        # Build P
        P = np.zeros((K, self.N))
        for k, view_data in enumerate(valid_views):
            asset_idx = self.ticker_to_idx[view_data.ticker]
            P[k, asset_idx] = 1.0

        # Compute Q: Q = π + sentiment × σ × scaling
        Q = np.zeros(K)
        for k, view_data in enumerate(valid_views):
            asset_idx = self.ticker_to_idx[view_data.ticker]
            base_return = prior_returns[asset_idx]
            sentiment_impact = (view_data.sentiment_mean *
                              self.volatilities[asset_idx] *
                              self.sentiment_scaling)
            Q[k] = base_return + sentiment_impact

        # Compute Ω
        Omega = np.zeros((K, K))
        for k, view_data in enumerate(valid_views):
            volume_unc = 1.0 / np.sqrt(max(view_data.news_count, 1))
            consistency_unc = view_data.sentiment_std ** 2
            Omega[k, k] = (self.base_uncertainty +
                          self.news_volume_weight * volume_unc +
                          self.consistency_weight * consistency_unc)

        metadata = {
            'tickers': [v.ticker for v in valid_views],
            'sentiments': [v.sentiment_mean for v in valid_views],
            'news_counts': [v.news_count for v in valid_views]
        }

        return BlackLittermanView(P, Q, Omega, metadata)


# ============================================================================
# PART 5: FINBERT ANALYZER
# ============================================================================

class FinBERTAnalyzer:
    """FinBERT sentiment analysis."""

    def __init__(self):
        """Load FinBERT model."""
        logger.info("Loading FinBERT model...")
        self.tokenizer = BertTokenizer.from_pretrained("ProsusAI/finbert")
        self.model = BertForSequenceClassification.from_pretrained("ProsusAI/finbert")
        self.model.eval()
        logger.info("✓ FinBERT loaded")

    def analyze_sentiment(self, texts: List[str]) -> Tuple[np.ndarray, float, float]:
        """Analyze sentiment: Score = P(Pos) - P(Neg)"""
        if not texts:
            return np.array([]), 0.0, 0.5

        inputs = self.tokenizer(texts, padding=True, truncation=True,
                               max_length=512, return_tensors='pt')

        with torch.no_grad():
            outputs = self.model(**inputs)

        probs = torch.nn.functional.softmax(outputs.logits, dim=-1).numpy()
        scores = probs[:, 0] - probs[:, 1]  # P(Pos) - P(Neg)

        return probs, float(np.mean(scores)), float(np.std(scores)) if len(scores) > 1 else 0.5


# ============================================================================
# PART 6: MAIN OPTIMIZER CLASS
# ============================================================================

class SentimentBlackLittermanOptimizer:
    """Complete integration: NewsAPI → FinBERT → Black-Litterman"""

    def __init__(self, api_key: str, tickers: List[str],
                 lookback_days: int = 252,
                 news_lookback_days: int = 7,
                 articles_per_ticker: int = 10,
                 risk_aversion: float = 2.5,
                 sentiment_scaling: float = 0.02):
        """Initialize optimizer."""
        self.api_key = api_key
        self.tickers = tickers
        self.N = len(tickers)
        self.lookback_days = lookback_days
        self.news_lookback_days = news_lookback_days
        self.articles_per_ticker = articles_per_ticker
        self.risk_aversion = risk_aversion
        self.sentiment_scaling = sentiment_scaling

        self.newsapi = NewsApiClient(api_key=api_key)
        self.finbert = FinBERTAnalyzer()

        self.sigma = None
        self.volatilities = None
        self.pi = None
        self.w_eq = None
        self.bl_model = None
        self.view_generator = None

    def fetch_news(self, ticker: str) -> List[Dict]:
        """Fetch news for ticker."""
        try:
            to_date = datetime.now()
            from_date = to_date - timedelta(days=self.news_lookback_days)

            response = self.newsapi.get_everything(
                q=ticker,
                language='en',
                sort_by='relevancy',
                page_size=self.articles_per_ticker,
                from_param=from_date.strftime('%Y-%m-%d'),
                to=to_date.strftime('%Y-%m-%d')
            )

            articles = response.get('articles', [])
            logger.info(f"  ✓ {ticker}: {len(articles)} articles")
            return articles
        except Exception as e:
            logger.error(f"  ✗ {ticker}: {e}")
            return []

    def process_ticker(self, ticker: str) -> Optional[SentimentData]:
        """Fetch news and analyze sentiment."""
        logger.info(f"\nProcessing {ticker}:")

        articles = self.fetch_news(ticker)
        if not articles:
            return None

        texts = []
        for article in articles:
            title = article.get('title', '')
            desc = article.get('description', '')
            text = f"{title}. {desc}" if desc else title
            if text:
                texts.append(text)

        if not texts:
            return None

        probs, sentiment_mean, sentiment_std = self.finbert.analyze_sentiment(texts)

        logger.info(f"  Sentiment: {sentiment_mean:+.3f} (std: {sentiment_std:.3f})")
        logger.info(f"  Sample: {texts[0][:60]}...")

        return SentimentData(
            ticker=ticker,
            sentiment_mean=sentiment_mean,
            sentiment_std=sentiment_std,
            news_count=len(texts),
            raw_scores=(probs[:, 0] - probs[:, 1]).tolist()
        )

    def load_market_data(self):
        """Load historical data - FIXED VERSION."""
        logger.info(f"\n{'='*80}")
        logger.info("LOADING MARKET DATA")
        logger.info(f"{'='*80}")

        end_date = datetime.now()
        start_date = end_date - timedelta(days=self.lookback_days + 30)

        # Download all tickers at once (more efficient)
        logger.info(f"Downloading data for {len(self.tickers)} tickers...")

        try:
            # Download all at once
            data = yf.download(self.tickers, start=start_date, end=end_date,
                             group_by='ticker', progress=False, threads=True)

            # Extract Close prices
            prices_dict = {}
            for ticker in self.tickers:
                try:
                    if len(self.tickers) == 1:
                        # Single ticker case
                        prices_dict[ticker] = data['Close']
                    else:
                        # Multiple tickers
                        prices_dict[ticker] = data[ticker]['Close']

                    logger.info(f"  ✓ {ticker}: {len(prices_dict[ticker])} days")
                except Exception as e:
                    logger.warning(f"  ⚠ {ticker}: {e}")

            if not prices_dict:
                raise ValueError("No data loaded for any ticker!")

            # Create DataFrame and align
            prices_df = pd.DataFrame(prices_dict)
            prices_df = prices_df.dropna()

            if len(prices_df) < 50:
                raise ValueError(f"Insufficient data: only {len(prices_df)} days")

            logger.info(f"\nCommon trading days: {len(prices_df)}")

            # Compute returns
            returns_df = np.log(prices_df / prices_df.shift(1)).dropna()

            # Estimate covariance
            self.sigma = returns_df.cov().values * 252
            self.sigma += np.eye(self.N) * 1e-6

            # Volatilities
            self.volatilities = np.sqrt(np.diag(self.sigma))

            logger.info("\nVolatilities (annualized):")
            for i, ticker in enumerate(self.tickers):
                logger.info(f"  {ticker}: {self.volatilities[i]*100:5.1f}%")

            # Equilibrium
            self.w_eq = np.ones(self.N) / self.N
            self.pi = BlackLittermanModel.compute_equilibrium_returns(
                self.w_eq, self.sigma, self.risk_aversion
            )

            logger.info("\nEquilibrium Returns:")
            for i, ticker in enumerate(self.tickers):
                logger.info(f"  {ticker}: {self.pi[i]*100:5.1f}%")

            # Initialize models
            self.bl_model = BlackLittermanModel(self.pi, self.sigma)
            self.view_generator = ViewGenerator(
                tickers=self.tickers,
                volatilities=self.volatilities,
                sentiment_scaling=self.sentiment_scaling
            )

        except Exception as e:
            logger.error(f"Error loading market data: {e}")
            raise

    def optimize(self) -> Optional[Dict]:
        """Run complete optimization."""
        logger.info(f"\n{'='*80}")
        logger.info("SENTIMENT BLACK-LITTERMAN OPTIMIZATION")
        logger.info(f"{'='*80}")

        # Load market data
        if self.sigma is None:
            self.load_market_data()

        # Fetch news & analyze
        logger.info(f"\n{'='*80}")
        logger.info("FETCHING NEWS & ANALYZING SENTIMENT")
        logger.info(f"{'='*80}")

        sentiment_data = {}
        for ticker in self.tickers:
            data = self.process_ticker(ticker)
            if data:
                sentiment_data[ticker] = data

        if not sentiment_data:
            logger.error("\n✗ No valid sentiment data")
            return None

        # Generate views
        logger.info(f"\n{'='*80}")
        logger.info("GENERATING VIEWS")
        logger.info(f"{'='*80}")

        view = self.view_generator.generate_views(
            sentiment_data, self.pi,
            filter_weak_signals=True, min_abs_sentiment=0.10
        )

        logger.info(f"\n✓ Generated {view.P.shape[0]} views")
        logger.info(f"  Tickers: {view.metadata.get('tickers', [])}")

        # Black-Litterman
        logger.info(f"\n{'='*80}")
        logger.info("BLACK-LITTERMAN POSTERIOR")
        logger.info(f"{'='*80}")

        mu_bl, sigma_bl = self.bl_model.compute_posterior(view.P, view.Q, view.Omega)

        logger.info("\nPosterior Returns:")
        for i, ticker in enumerate(self.tickers):
            delta = mu_bl[i] - self.pi[i]
            direction = "↑" if delta > 0 else "↓" if delta < 0 else "→"
            logger.info(f"  {ticker}: {self.pi[i]*100:5.1f}% → {mu_bl[i]*100:5.1f}% "
                       f"{direction} ({delta*100:+.1f}%)")

        # Optimize
        logger.info(f"\n{'='*80}")
        logger.info("PORTFOLIO OPTIMIZATION")
        logger.info(f"{'='*80}")

        try:
            sigma_inv = np.linalg.inv(sigma_bl)
        except:
            sigma_inv = np.linalg.pinv(sigma_bl)

        w_optimal = sigma_inv @ mu_bl / (2 * self.risk_aversion)
        w_optimal = np.maximum(w_optimal, 0)
        w_optimal = w_optimal / np.sum(w_optimal)

        logger.info("\nOptimal Weights:")
        for i, ticker in enumerate(self.tickers):
            logger.info(f"  {ticker}: {w_optimal[i]*100:5.1f}%")

        portfolio_return = w_optimal @ mu_bl
        portfolio_vol = np.sqrt(w_optimal @ sigma_bl @ w_optimal)
        sharpe = portfolio_return / portfolio_vol if portfolio_vol > 0 else 0

        logger.info(f"\nPortfolio:")
        logger.info(f"  Return: {portfolio_return*100:5.1f}%")
        logger.info(f"  Vol:    {portfolio_vol*100:5.1f}%")
        logger.info(f"  Sharpe: {sharpe:.2f}")

        return {
            'timestamp': datetime.now(),
            'tickers': self.tickers,
            'sentiment_data': sentiment_data,
            'view': view,
            'prior_returns': self.pi,
            'posterior_returns': mu_bl,
            'prior_weights': self.w_eq,
            'optimal_weights': w_optimal,
            'portfolio_return': portfolio_return,
            'portfolio_vol': portfolio_vol,
            'sharpe_ratio': sharpe,
            'sigma': self.sigma,  # Add original covariance
            'sigma_bl': sigma_bl  # Add posterior covariance
        }


# ============================================================================
# PART 7: DEMO MODE
# ============================================================================

def run_demo_mode(tickers: List[str] = None):
    """Demo mode with simulated sentiment (no API needed)."""
    if tickers is None:
        tickers = ['AAPL', 'MSFT', 'GOOGL']

    print("\n" + "="*80)
    print("DEMO MODE - SIMULATED SENTIMENT")
    print("="*80)

    # Load market data
    print("\nLoading market data...")
    end_date = datetime.now()
    start_date = end_date - timedelta(days=282)

    try:
        data = yf.download(tickers, start=start_date, end=end_date,
                          group_by='ticker', progress=False)

        prices_dict = {}
        for ticker in tickers:
            try:
                if len(tickers) == 1:
                    prices_dict[ticker] = data['Close']
                else:
                    prices_dict[ticker] = data[ticker]['Close']
                print(f"  ✓ {ticker}: {len(prices_dict[ticker])} days")
            except:
                pass

        prices_df = pd.DataFrame(prices_dict).dropna()
        returns_df = np.log(prices_df / prices_df.shift(1)).dropna()

        N = len(tickers)
        sigma = returns_df.cov().values * 252
        sigma += np.eye(N) * 1e-6
        volatilities = np.sqrt(np.diag(sigma))

        w_eq = np.ones(N) / N
        pi = BlackLittermanModel.compute_equilibrium_returns(w_eq, sigma, 2.5)

        print("\nEquilibrium Returns:")
        for i, ticker in enumerate(tickers):
            print(f"  {ticker}: {pi[i]*100:5.1f}%")

        # Simulated sentiment
        print("\n" + "="*80)
        print("SIMULATED SENTIMENT")
        print("="*80)

        np.random.seed(42)
        sentiment_data = {}

        base_sentiments = {'AAPL': 0.45, 'MSFT': 0.35, 'GOOGL': 0.25,
                          'TSLA': -0.20, 'NVDA': 0.60}

        for ticker in tickers:
            base = base_sentiments.get(ticker, np.random.uniform(-0.3, 0.3))
            news_count = np.random.randint(8, 25)
            raw_scores = np.clip(np.random.normal(base, 0.2, news_count), -1, 1)

            sentiment_data[ticker] = SentimentData(
                ticker=ticker,
                sentiment_mean=float(np.mean(raw_scores)),
                sentiment_std=float(np.std(raw_scores)),
                news_count=news_count,
                raw_scores=raw_scores.tolist()
            )

            print(f"  {ticker}: {sentiment_data[ticker].sentiment_mean:+.3f} "
                  f"({news_count} articles)")

        # Generate views
        generator = ViewGenerator(tickers, volatilities, 0.02)
        view = generator.generate_views(sentiment_data, pi, True, 0.10)

        print(f"\n✓ Generated {view.P.shape[0]} views")

        # Black-Litterman
        bl = BlackLittermanModel(pi, sigma)
        mu_bl, sigma_bl = bl.compute_posterior(view.P, view.Q, view.Omega)

        # Optimize
        sigma_inv = np.linalg.pinv(sigma_bl)
        w_optimal = sigma_inv @ mu_bl / 5.0
        w_optimal = np.maximum(w_optimal, 0)
        w_optimal = w_optimal / np.sum(w_optimal)

        # Results
        print("\n" + "="*80)
        print("FINAL PORTFOLIO")
        print("="*80)
        print(f"\n{'Ticker':<8} {'Sentiment':<12} {'Weight':<10} {'Return'}")
        print("-" * 60)

        for i, ticker in enumerate(tickers):
            sent = sentiment_data[ticker].sentiment_mean
            print(f"{ticker:<8} {sent:>+.3f}         {w_optimal[i]*100:>5.1f}%     "
                  f"{mu_bl[i]*100:>5.1f}%")

        portfolio_return = w_optimal @ mu_bl
        portfolio_vol = np.sqrt(w_optimal @ sigma_bl @ w_optimal)
        sharpe = portfolio_return / portfolio_vol if portfolio_vol > 0 else 0

        print("-" * 60)
        print(f"Portfolio Return: {portfolio_return*100:>5.1f}%")
        print(f"Portfolio Vol:    {portfolio_vol*100:>5.1f}%")
        print(f"Sharpe Ratio:     {sharpe:>5.2f}")
        print("="*80)

        results = {
            'timestamp': datetime.now(),
            'tickers': tickers,
            'sentiment_data': sentiment_data,
            'view': view,
            'prior_returns': pi,
            'posterior_returns': mu_bl,
            'prior_weights': w_eq,
            'optimal_weights': w_optimal,
            'portfolio_return': portfolio_return,
            'portfolio_vol': portfolio_vol,
            'sharpe_ratio': sharpe,
            'sigma': sigma,
            'sigma_bl': sigma_bl
        }

        return results

    except Exception as e:
        print(f"\n✗ Error: {e}")
        import traceback
        traceback.print_exc()
        return None


# ============================================================================
# PART 8: MAIN EXECUTION
# ============================================================================

if __name__ == "__main__":
    """
    Google Colab Execution

    Two modes:
    1. DEMO MODE (simulated sentiment) - No API key needed
    2. LIVE MODE (real news) - Uses NewsAPI
    """

    # ========================================================================
    # CONFIGURATION
    # ========================================================================

    USE_DEMO_MODE = False  # Set to True for demo, False for live

    # YOUR REAL API KEY
    NEWS_API_KEY = "1246c60fdabd4db7b6d55b5fcfa73c14"

    # Portfolio
    TICKERS = ['AAPL', 'MSFT', 'GOOGL']  # Start with 3 for testing

    # Parameters
    LOOKBACK_DAYS = 252
    NEWS_LOOKBACK_DAYS = 7
    RISK_AVERSION = 2.5

    # ========================================================================
    # RUN
    # ========================================================================

    print("\n" + "="*80)
    print("BLACK-LITTERMAN + FINBERT PORTFOLIO OPTIMIZATION")
    print("="*80)

    if USE_DEMO_MODE:
        print("\nMode: DEMO (simulated sentiment)")
        print("Set USE_DEMO_MODE = False for live news")
        print("="*80)

        results = run_demo_mode(TICKERS)

        # Create comparison visualization
        if results is not None:
            print("\n" + "="*80)
            print("CREATING COMPARISON VISUALIZATIONS...")
            print("="*80)

            viz_results = create_portfolio_comparison_plots(
                tickers=TICKERS,
                results=results,
                show_plots=True
            )

    else:
        print("\nMode: LIVE (real NewsAPI + FinBERT)")
        print(f"API Key: {NEWS_API_KEY[:10]}...")
        print(f"Tickers: {', '.join(TICKERS)}")
        print("="*80)

        try:
            optimizer = SentimentBlackLittermanOptimizer(
                api_key=NEWS_API_KEY,
                tickers=TICKERS,
                lookback_days=LOOKBACK_DAYS,
                news_lookback_days=NEWS_LOOKBACK_DAYS,
                risk_aversion=RISK_AVERSION
            )

            results = optimizer.optimize()

            if results:
                print("\n" + "="*80)
                print("✓ OPTIMIZATION COMPLETE")
                print("="*80)

                print(f"\n{'Ticker':<8} {'Sentiment':<12} {'Weight':<10} {'Return'}")
                print("-" * 60)

                for i, ticker in enumerate(TICKERS):
                    sent_data = results['sentiment_data'].get(ticker)
                    sent = sent_data.sentiment_mean if sent_data else 0.0
                    weight = results['optimal_weights'][i]
                    ret = results['posterior_returns'][i]

                    print(f"{ticker:<8} {sent:>+.3f}         {weight*100:>5.1f}%     "
                          f"{ret*100:>5.1f}%")

                print("-" * 60)
                print(f"Portfolio Return: {results['portfolio_return']*100:>5.1f}%")
                print(f"Portfolio Vol:    {results['portfolio_vol']*100:>5.1f}%")
                print(f"Sharpe Ratio:     {results['sharpe_ratio']:>5.2f}")
                print("="*80)

                # Create comparison visualization
                print("\n" + "="*80)
                print("CREATING COMPARISON VISUALIZATIONS...")
                print("="*80)

                viz_results = create_portfolio_comparison_plots(
                    tickers=TICKERS,
                    results=results,
                    show_plots=True
                )

        except Exception as e:
            print("\n" + "="*80)
            print("✗ ERROR")
            print("="*80)
            print(f"\nError: {e}")
            import traceback
            traceback.print_exc()
            print("\nTry setting USE_DEMO_MODE = True to test without API")

    print("\n" + "="*80)
    print("DONE")
    print("="*80)


# ============================================================================
# PART 9: PORTFOLIO COMPARISON VISUALIZATION
# ============================================================================

def create_portfolio_comparison_plots(
    tickers: List[str],
    results: Dict,
    show_plots: bool = True
):
    """
    Create comparison plots: Market-Cap vs Mean-Variance vs Black-Litterman.

    Parameters:
        tickers: List of tickers
        results: Results from optimize()
        show_plots: Display plots interactively

    Returns:
        Dictionary of Plotly figures
    """
    try:
        import plotly.graph_objects as go
        from plotly.subplots import make_subplots
    except ImportError:
        print("Installing plotly...")
        import subprocess
        import sys
        subprocess.check_call([sys.executable, "-m", "pip", "install", "plotly", "-q"])
        import plotly.graph_objects as go
        from plotly.subplots import make_subplots

    print("\n" + "="*80)
    print("PORTFOLIO COMPARISON VISUALIZATION")
    print("="*80)

    # Extract data from results
    if results is None:
        print("✗ No results to visualize")
        return None

    mu_bl = results.get('posterior_returns')
    sigma = results.get('sigma_bl')  # Use posterior covariance
    w_eq = results.get('prior_weights')
    w_bl = results.get('optimal_weights')

    # If sigma_bl not available, recompute from prices
    if sigma is None:
        print("  Computing covariance from historical data...")
        end_date = datetime.now()
        start_date = end_date - timedelta(days=282)

        try:
            data = yf.download(tickers, start=start_date, end=end_date,
                             group_by='ticker', progress=False)

            prices_dict = {}
            for ticker in tickers:
                try:
                    if len(tickers) == 1:
                        prices_dict[ticker] = data['Close']
                    else:
                        prices_dict[ticker] = data[ticker]['Close']
                except:
                    pass

            prices_df = pd.DataFrame(prices_dict).dropna()
            returns_df = np.log(prices_df / prices_df.shift(1)).dropna()
            sigma = returns_df.cov().values * 252
            sigma += np.eye(len(tickers)) * 1e-6
        except Exception as e:
            print(f"✗ Could not compute covariance: {e}")
            return None

    # Validate dimensions
    if sigma is None or sigma.ndim != 2:
        print("✗ Invalid covariance matrix")
        return None

    # Compute Mean-Variance (unstable)
    mu_hist = results['prior_returns']  # Use prior as proxy for historical

    try:
        sigma_inv = np.linalg.inv(sigma)
    except:
        sigma_inv = np.linalg.pinv(sigma)

    w_mv = sigma_inv @ mu_hist / 5.0
    w_mv = np.maximum(w_mv, 0)
    if np.sum(w_mv) > 0.01:
        w_mv = w_mv / np.sum(w_mv)
    else:
        w_mv = np.ones(len(tickers)) / len(tickers)

    # Compute statistics
    def portfolio_stats(w, mu, sigma):
        ret = w @ mu
        vol = np.sqrt(w @ sigma @ w)
        sharpe = ret / vol if vol > 0 else 0
        return ret, vol, sharpe

    stats_eq = portfolio_stats(w_eq, mu_hist, sigma)
    stats_mv = portfolio_stats(w_mv, mu_hist, sigma)
    stats_bl = portfolio_stats(w_bl, mu_bl, sigma)

    # ========================================================================
    # PLOT 1: Comprehensive Dashboard
    # ========================================================================

    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=(
            'Risk-Return Comparison',
            'Portfolio Weights',
            'Expected Returns',
            'Sharpe Ratios'
        ),
        specs=[
            [{'type': 'scatter'}, {'type': 'bar'}],
            [{'type': 'bar'}, {'type': 'bar'}]
        ],
        vertical_spacing=0.15,
        horizontal_spacing=0.12
    )

    # 1. Risk-Return Scatter
    strategies = [
        ('Market-Cap', stats_eq, 'green', 'circle'),
        ('Mean-Variance', stats_mv, 'orange', 'square'),
        ('Black-Litterman', stats_bl, 'red', 'star')
    ]

    for name, (ret, vol, sharpe), color, symbol in strategies:
        fig.add_trace(
            go.Scatter(
                x=[vol * 100],
                y=[ret * 100],
                mode='markers+text',
                name=name,
                marker=dict(size=20, color=color, symbol=symbol,
                          line=dict(width=2, color='white')),
                text=[name],
                textposition='top center',
                hovertemplate=f'{name}<br>Vol: %{{x:.2f}}%<br>Return: %{{y:.2f}}%<br>Sharpe: {sharpe:.3f}',
                showlegend=True
            ),
            row=1, col=1
        )

    # 2. Weights Comparison
    weights_data = [
        ('Market-Cap', w_eq, 'green'),
        ('Mean-Variance', w_mv, 'orange'),
        ('Black-Litterman', w_bl, 'red')
    ]

    for name, weights, color in weights_data:
        fig.add_trace(
            go.Bar(
                x=tickers,
                y=weights * 100,
                name=name,
                marker_color=color,
                showlegend=False
            ),
            row=1, col=2
        )

    # 3. Expected Returns
    returns_data = [stats_eq[0] * 100, stats_mv[0] * 100, stats_bl[0] * 100]
    names = ['Market-Cap', 'Mean-Variance', 'Black-Litterman']
    colors = ['green', 'orange', 'red']

    fig.add_trace(
        go.Bar(
            x=names,
            y=returns_data,
            marker_color=colors,
            showlegend=False,
            text=[f'{r:.1f}%' for r in returns_data],
            textposition='outside'
        ),
        row=2, col=1
    )

    # 4. Sharpe Ratios
    sharpe_data = [stats_eq[2], stats_mv[2], stats_bl[2]]

    fig.add_trace(
        go.Bar(
            x=names,
            y=sharpe_data,
            marker_color=colors,
            showlegend=False,
            text=[f'{s:.3f}' for s in sharpe_data],
            textposition='outside'
        ),
        row=2, col=2
    )

    # Update axes
    fig.update_xaxes(title_text='Volatility (%)', row=1, col=1)
    fig.update_yaxes(title_text='Return (%)', row=1, col=1)

    fig.update_xaxes(title_text='Assets', row=1, col=2)
    fig.update_yaxes(title_text='Weight (%)', row=1, col=2)

    fig.update_yaxes(title_text='Expected Return (%)', row=2, col=1)
    fig.update_yaxes(title_text='Sharpe Ratio', row=2, col=2)

    fig.update_layout(
        title_text='Portfolio Strategies Comparison Dashboard',
        template='plotly_white',
        height=800,
        showlegend=True,
        legend=dict(x=0.02, y=0.98, bgcolor='rgba(255,255,255,0.9)')
    )

    # Print comparison table
    print("\n" + "="*80)
    print("PERFORMANCE COMPARISON TABLE")
    print("="*80)
    print(f"\n{'Strategy':<20} {'Return':<12} {'Volatility':<12} {'Sharpe':<10}")
    print("-" * 60)

    for name, (ret, vol, sharpe), _, _ in strategies:
        print(f"{name:<20} {ret*100:>10.2f}%  {vol*100:>10.2f}%  {sharpe:>8.3f}")

    print("-" * 60)

    # Key insights
    print("\n" + "="*80)
    print("KEY INSIGHTS")
    print("="*80)

    print("\n1. MARKET-CAP WEIGHTED (Green - Benchmark):")
    print(f"   - Equal weights: {w_eq[0]*100:.1f}% each")
    print(f"   - Return: {stats_eq[0]*100:.2f}%")
    print(f"   - Sharpe: {stats_eq[2]:.3f}")
    print("   → Simple, transparent baseline")

    print("\n2. MEAN-VARIANCE (Orange - Often Unstable):")
    concentration_mv = np.max(w_mv) * 100
    print(f"   - Max concentration: {concentration_mv:.1f}%")
    print(f"   - Return: {stats_mv[0]*100:.2f}%")
    print(f"   - Sharpe: {stats_mv[2]:.3f}")
    if concentration_mv > 60:
        print("   ⚠ HIGH CONCENTRATION - Sign of instability!")
    print("   → Sensitive to estimation errors")

    print("\n3. BLACK-LITTERMAN (Red - Sentiment-Enhanced):")
    concentration_bl = np.max(w_bl) * 100
    print(f"   - Max concentration: {concentration_bl:.1f}%")
    print(f"   - Return: {stats_bl[0]*100:.2f}%")
    print(f"   - Sharpe: {stats_bl[2]:.3f}")
    print("   ✓ Stable, incorporates sentiment views")

    # Improvement metrics
    sharpe_improvement = ((stats_bl[2] - stats_eq[2]) / stats_eq[2]) * 100
    print(f"\n✨ Sharpe Improvement vs Benchmark: {sharpe_improvement:+.1f}%")

    print("="*80)

    # Show plot
    if show_plots:
        fig.show()

    # Save HTML
    try:
        filename = f"portfolio_comparison_{datetime.now().strftime('%Y%m%d_%H%M%S')}.html"
        fig.write_html(filename)
        print(f"\n✓ Interactive plot saved: {filename}")

        # Download in Colab
        try:
            from google.colab import files
            files.download(filename)
            print(f"✓ Downloading {filename}...")
        except:
            pass
    except Exception as e:
        print(f"\n⚠ Could not save HTML: {e}")

    return {
        'figure': fig,
        'stats': {
            'market_cap': stats_eq,
            'mean_variance': stats_mv,
            'black_litterman': stats_bl
        },
        'weights': {
            'market_cap': w_eq,
            'mean_variance': w_mv,
            'black_litterman': w_bl
        }
    }


# Add to main execution
if __name__ == "__main__":
    # ... existing code ...

    # After optimization completes, add visualization
    if not USE_DEMO_MODE and results is not None:
        print("\n" + "="*80)
        print("CREATING COMPARISON VISUALIZATIONS...")
        print("="*80)

        viz_results = create_portfolio_comparison_plots(
            tickers=TICKERS,
            results=results,
            show_plots=True
        )



BLACK-LITTERMAN + FINBERT PORTFOLIO OPTIMIZATION

Mode: LIVE (real NewsAPI + FinBERT)
API Key: 1246c60fda...
Tickers: AAPL, MSFT, GOOGL


model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/201 [00:00<?, ?it/s]

BertForSequenceClassification LOAD REPORT from: ProsusAI/finbert
Key                          | Status     |  | 
-----------------------------+------------+--+-
bert.embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.



✓ OPTIMIZATION COMPLETE

Ticker   Sentiment    Weight     Return
------------------------------------------------------------
AAPL     +0.117          36.6%      12.9%
MSFT     -0.280          34.5%      10.1%
GOOGL    -0.044          28.9%      17.5%
------------------------------------------------------------
Portfolio Return:  13.3%
Portfolio Vol:     15.2%
Sharpe Ratio:      0.87

CREATING COMPARISON VISUALIZATIONS...

PORTFOLIO COMPARISON VISUALIZATION

PERFORMANCE COMPARISON TABLE

Strategy             Return       Volatility   Sharpe    
------------------------------------------------------------
Market-Cap                13.50%       16.43%     0.822
Mean-Variance             13.50%       16.43%     0.822
Black-Litterman           13.26%       16.19%     0.819
------------------------------------------------------------

KEY INSIGHTS

1. MARKET-CAP WEIGHTED (Green - Benchmark):
   - Equal weights: 33.3% each
   - Return: 13.50%
   - Sharpe: 0.822
   → Simple, transparent base


✓ Interactive plot saved: portfolio_comparison_20260207_150753.html


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✓ Downloading portfolio_comparison_20260207_150753.html...

DONE

CREATING COMPARISON VISUALIZATIONS...

PORTFOLIO COMPARISON VISUALIZATION

PERFORMANCE COMPARISON TABLE

Strategy             Return       Volatility   Sharpe    
------------------------------------------------------------
Market-Cap                13.50%       15.49%     0.872
Mean-Variance             13.26%       15.17%     0.874
Black-Litterman           13.26%       15.17%     0.874
------------------------------------------------------------

KEY INSIGHTS

1. MARKET-CAP WEIGHTED (Green - Benchmark):
   - Equal weights: 33.3% each
   - Return: 13.50%
   - Sharpe: 0.872
   → Simple, transparent baseline

2. MEAN-VARIANCE (Orange - Often Unstable):
   - Max concentration: 36.5%
   - Return: 13.26%
   - Sharpe: 0.874
   → Sensitive to estimation errors

3. BLACK-LITTERMAN (Red - Sentiment-Enhanced):
   - Max concentration: 36.6%
   - Return: 13.26%
   - Sharpe: 0.874
   ✓ Stable, incorporates sentiment views

✨ Sharpe


✓ Interactive plot saved: portfolio_comparison_20260207_150754.html


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✓ Downloading portfolio_comparison_20260207_150754.html...
