In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
import requests
from pypfopt import expected_returns, risk_models, EfficientFrontier, HRPOpt
from datetime import datetime, timedelta
import re
import logging
from collections import defaultdict
import time
from requests.adapters import HTTPAdapter, Retry

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class AssetLoader:
    def __init__(self):
        self.index_map = {
            'US Large Cap': ('S&P 500', 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies', 0, 'Symbol'),
            'US Tech': ('NASDAQ-100', 'https://en.wikipedia.org/wiki/Nasdaq-100', 4, 'Ticker'),
            'Bonds': ['BND', 'AGG', 'TLT', 'IEF', 'LQD', 'HYG'],
            'Commodities': ['GLD', 'SLV', 'USO', 'UNG']
        }
        self.rate_limit_delay = 2  # seconds
        self.setup_requests_session()

    def setup_requests_session(self):
        """Set up a requests session with retry mechanism"""
        retry_strategy = Retry(
            total=5,
            backoff_factor=1,
            status_forcelist=[429, 500, 502, 503, 504],
            allowed_methods=["GET", "POST"]
        )
        adapter = HTTPAdapter(max_retries=retry_strategy)
        self.session = requests.Session()
        self.session.mount("http://", adapter)
        self.session.mount("https://", adapter)

    def _clean_symbol(self, symbol):
        return re.sub(r'[^a-zA-Z0-9-]', '', str(symbol)).replace('.', '-')

    def fetch_assets(self):
        assets = []
        failed_symbols = []
        sector_map = {}
        
        # Fetch equity indices with rate limiting
        for index in ['US Large Cap', 'US Tech']:
            try:
                config = self.index_map[index]
                tables = pd.read_html(config[1])
                df = tables[config[2]]
                symbols = df[config[3]].tolist()
                assets.extend([self._clean_symbol(s) for s in symbols])
            except Exception as e:
                logger.error(f"Error fetching {index}: {str(e)}")
                continue
        
        # Add fixed income and commodities
        assets.extend(self.index_map['Bonds'])
        assets.extend(self.index_map['Commodities'])
        
        # Validate symbols with retry mechanism
        valid_assets = []
        for symbol in list(set(assets)):
            max_retries = 3
            retries = 0
            while retries < max_retries:
                try:
                    ticker = yf.Ticker(symbol)
                    info = ticker.info
                    if info.get('quoteType') == 'CRYPTOCURRENCY':
                        failed_symbols.append((symbol, "Crypto asset"))
                        break
                    hist = ticker.history(period="1mo")
                    if hist.empty or len(hist) < 5:
                        failed_symbols.append((symbol, "Insufficient data"))
                        break
                    sector = info.get('sector', 'Unknown')
                    sector_map[symbol] = sector
                    valid_assets.append(symbol)
                    break
                except Exception as e:
                    if 'Too Many Requests' in str(e):
                        retries += 1
                        if retries < max_retries:
                            logger.warning(f"Rate limited for {symbol}. Retrying in {self.rate_limit_delay}s...")
                            time.sleep(self.rate_limit_delay)
                        else:
                            failed_symbols.append((symbol, "Rate limit exceeded"))
                    else:
                        failed_symbols.append((symbol, str(e)))
                    logger.warning(f"Failed to validate {symbol}: {str(e)}")
                    break
        
        logger.info(f"Loaded {len(valid_assets)} valid symbols")
        logger.info(f"Failed to load {len(failed_symbols)} symbols")
        return valid_assets, sector_map

class MarketAnalyzer:
    def __init__(self, fred_api_key):
        self.fred_api_key = fred_api_key
        self.rate_limit_delay = 2  # seconds
        self.setup_requests_session()

    def setup_requests_session(self):
        """Set up a requests session with retry mechanism"""
        retry_strategy = Retry(
            total=5,
            backoff_factor=1,
            status_forcelist=[429, 500, 502, 503, 504],
            allowed_methods=["GET", "POST"]
        )
        adapter = HTTPAdapter(max_retries=retry_strategy)
        self.session = requests.Session()
        self.session.mount("http://", adapter)
        self.session.mount("https://", adapter)

    def _fetch_vix_data(self):
        """Fetch VIX data from FRED API"""
        url = "https://api.stlouisfed.org/fred/series/observations"
        params = {
            'series_id': 'VIXCLS',
            'file_type': 'json',
            'api_key': self.fred_api_key,
            'limit': 1,
            'sort_order': 'desc'
        }
        max_retries = 3
        for attempt in range(max_retries):
            try:
                response = self.session.get(url, params=params)
                response.raise_for_status()
                data = response.json()
                observations = data['observations']
                if not observations:
                    logger.error("No VIX data returned")
                    return None
                vix_value = float(observations[0]['value'])
                return vix_value
            except Exception as e:
                if attempt < max_retries - 1:
                    logger.warning(f"Failed to fetch VIX data (attempt {attempt + 1}/{max_retries}). Retrying...")
                    time.sleep(self.rate_limit_delay)
                else:
                    logger.error(f"Failed to fetch VIX data after {max_retries} attempts: {str(e)}")
                    return None

    def _fetch_fred_yield(self, series_id):
        """Fetch yield curve data from FRED API"""
        url = f"https://api.stlouisfed.org/fred/series/observations?series_id={series_id}&api_key={self.fred_api_key}&file_type=json"
        try:
            max_retries = 3
            for attempt in range(max_retries):
                try:
                    response = self.session.get(url)
                    response.raise_for_status()
                    data = response.json()
                    values = [float(obs['value']) for obs in data['observations'] if obs['value'] != '.']
                    return values[-1] if values else 4.0
                except Exception as e:
                    if attempt < max_retries - 1:
                        logger.warning(f"Failed to fetch FRED data (attempt {attempt + 1}/{max_retries}). Retrying...")
                        time.sleep(self.rate_limit_delay)
                    else:
                        logger.error(f"Failed to fetch FRED data after {max_retries} attempts: {str(e)}")
                        return 4.0  # Default value
        except:
            logger.warning("Using default yield value")
            return 4.0

    def get_market_regime(self, price_data):
        """Determine current market regime based on volatility and yield curve"""
        try:
            if price_data.empty:
                logger.warning("No price data available for regime detection")
                return 'Neutral'
            returns = price_data.pct_change().dropna()
            # Calculate 6-month rolling volatility
            rolling_vol = returns.rolling(126).std().iloc[-1].mean() * np.sqrt(252)
            # Fetch VIX data using FRED API
            vix_value = self._fetch_vix_data()
            if vix_value is None:
                logger.warning("Using default VIX value of 20")
                vix_value = 20
            # Fetch yield curve data
            ten_year = self._fetch_fred_yield('DGS10')
            two_year = self._fetch_fred_yield('DGS2')
            spread = ten_year - two_year
            # Determine market regime
            if vix_value > 30 or spread < 0:
                return 'Bearish'
            elif vix_value < 20 and spread > 0.5:
                return 'Bullish'
            elif rolling_vol > 0.25:
                return 'High Volatility'
            else:
                return 'Neutral'
        except Exception as e:
            logger.error(f"Market regime detection failed: {str(e)}")
            return 'Neutral'

class PortfolioOptimizer:
    def __init__(self, price_data, sectors):
        if price_data.empty:
            raise ValueError("Cannot initialize optimizer with empty price data")
        self.price_data = price_data
        self.sectors = sectors
        self.expected_returns = expected_returns.mean_historical_return(price_data)
        self.returns_df = expected_returns.returns_from_prices(price_data)
        self.cov_matrix = risk_models.CovarianceShrinkage(price_data).ledoit_wolf()
        # Calculate sector allocations
        self.sector_weights = self._calculate_sector_weights()

    def _calculate_sector_weights(self):
        """Calculate sector weights excluding Unknown sector"""
        sector_weights = defaultdict(float)
        # Count assets per valid sector
        valid_sectors = {symbol: sector 
                        for symbol, sector in self.sectors.items() 
                        if sector != 'Unknown'}
        
        if not valid_sectors:
            raise ValueError("All symbols are marked as Unknown")
            
        # Calculate weights for known sectors only
        total_assets = len(valid_sectors)
        for symbol in valid_sectors:
            sector = valid_sectors[symbol]
            sector_weights[sector] += 1/total_assets
        
        return dict(sector_weights)

    def optimize(self, regime):
        """Optimize portfolio based on market regime"""
        try:
            if regime == 'Bullish':
                return self._growth_strategy()
            elif regime == 'Bearish':
                return self._defensive_strategy()
            else:
                return self._balanced_strategy()
        except Exception as e:
            logger.error(f"Optimization failed: {str(e)}")
            raise

    def _growth_strategy(self):
        """Growth-oriented portfolio optimization"""
        hrp = HRPOpt(self.returns_df)
        hrp.optimize()
        return hrp.clean_weights()

    def _defensive_strategy(self):
        """Defensive portfolio optimization with sector constraints"""
        ef = EfficientFrontier(self.expected_returns, self.cov_matrix)
        # Sector constraints (max 30% per sector)
        sector_limits = {sector: 0.3 for sector in self.sector_weights}
        ef.add_sector_constraints(self.sectors, sector_limits)
        # Minimum 40% in bonds
        bond_symbols = [s for s in self.price_data.columns if self.sectors.get(s) == 'Financial Services']
        if bond_symbols:
            ef.add_constraint(lambda w: sum(w[b] for b in bond_symbols) >= 0.4)
        ef.efficient_risk(0.15)
        return ef.clean_weights()

    def _balanced_strategy(self):
        """Balanced portfolio optimization"""
        hrp = HRPOpt(self.returns_df)
        hrp.optimize()
        return hrp.clean_weights()

class Backtester:
    def __init__(self, price_data, sectors):
        if price_data.empty:
            raise ValueError("Cannot initialize backtester with empty price data")
        self.price_data = price_data
        self.sectors = sectors
        self.returns = price_data.pct_change().dropna()

    def run_backtest(self, weights, risk_free_rate=0.03, transaction_cost=0.0001):
        """Run backtest with transaction costs"""
        try:
            weights_series = pd.Series(weights)
            portfolio_returns = self.returns.dot(weights_series)
            # Calculate transaction costs (0.01% per trade)
            turnover = weights_series.abs().sum()  # Initial allocation
            portfolio_returns.iloc[0] -= transaction_cost * turnover
            # Ongoing turnover (simplified)
            for i in range(1, len(portfolio_returns)):
                turnover = (weights_series - portfolio_returns.iloc[i-1]).abs().sum()
                portfolio_returns.iloc[i] -= transaction_cost * turnover
            # Calculate risk-adjusted metrics
            excess_returns = portfolio_returns - risk_free_rate/252
            sharpe = excess_returns.mean() / excess_returns.std() * np.sqrt(252)
            return {
                'Total Return': portfolio_returns.add(1).prod() - 1,
                'Annualized Return': portfolio_returns.mean() * 252,
                'Annualized Volatility': portfolio_returns.std() * np.sqrt(252),
                'Sharpe Ratio': sharpe,
                'Max Drawdown': (portfolio_returns.add(1).cumprod().cummax() - 
                               portfolio_returns.add(1).cumprod()).max(),
                'Sector Allocation': self._analyze_sectors(weights)
            }
        except Exception as e:
            logger.error(f"Backtest failed: {str(e)}")
            raise

    def _analyze_sectors(self, weights):
        """Analyze sector allocation of the portfolio"""
        sectors = defaultdict(float)
        for symbol, weight in weights.items():
            sectors[self.sectors.get(symbol, 'Unknown')] += weight
        return dict(sectors)

def main():
    try:
        logger.info("Initializing portfolio optimization system")
        # Load assets and sectors
        loader = AssetLoader()
        symbols, sectors = loader.fetch_assets()
        if not symbols:
            logger.error("No valid symbols found. Exiting.")
            return
        
        # Download price data
        logger.info("Downloading market data...")
        data = yf.download(
            symbols,
            period='5y',
            group_by='ticker',
            threads=True,
            progress=False,
            timeout=30  # Increased timeout
        )
        
        # Build price dataframe
        if isinstance(data.columns, pd.MultiIndex):
            price_data = data.xs('Close', level=1, axis=1).dropna(axis=1, how='all')
        else:
            price_data = data['Close'].to_frame().dropna(axis=1, how='all')
        
        if price_data.empty:
            logger.error("No valid price data available after download. Exiting.")
            return
        
        logger.info(f"Final dataset: {price_data.shape[1]} assets, {price_data.shape[0]} days")
        
        # Analyze market regime
        analyzer = MarketAnalyzer(fred_api_key="2644806562857c722f9560731334060e")
        regime = analyzer.get_market_regime(price_data)
        logger.info(f"Current market regime: {regime}")
        
        # Optimize portfolio
        optimizer = PortfolioOptimizer(price_data, sectors)
        weights = optimizer.optimize(regime)
        
        # Run backtest
        backtester = Backtester(price_data, sectors)
        results = backtester.run_backtest(weights)
        
        # Display results
        logger.info("\nPortfolio Performance:")
        for metric, value in results.items():
            if metric == 'Sector Allocation':
                logger.info("\nSector Allocation:")
                for sector, alloc in value.items():
                    logger.info(f"{sector}: {alloc:.2%}")
            else:
                logger.info(f"{metric}: {value:.2%}")
        return results
    except Exception as e:
        logger.error(f"System failure: {str(e)}")
        raise

if __name__ == "__main__":
    main() 

2025-03-23 10:20:45,609 - INFO - Initializing portfolio optimization system
2025-03-23 10:26:09,843 - ERROR - $WDC: possibly delisted; no price data found  (period=1mo)
2025-03-23 10:27:05,517 - ERROR - $BRKB: possibly delisted; no price data found  (period=1mo) (Yahoo error = "No data found, symbol may be delisted")
2025-03-23 10:28:47,616 - ERROR - $BFB: possibly delisted; no price data found  (period=1mo) (Yahoo error = "No data found, symbol may be delisted")
2025-03-23 10:30:17,993 - INFO - Loaded 526 valid symbols
2025-03-23 10:30:17,994 - INFO - Failed to load 3 symbols
2025-03-23 10:30:17,996 - INFO - Downloading market data...


YF.download() has changed argument auto_adjust default to True


2025-03-23 10:32:06,274 - ERROR - 
102 Failed downloads:
2025-03-23 10:32:06,275 - ERROR - ['WRB', 'KR', 'JNPR', 'BLK', 'PPL', 'LUV', 'GRMN', 'NOW', 'LII', 'PAYC', 'F', 'ALL', 'TSCO', 'GFS', 'ED', 'NFLX', 'EPAM', 'VLTO', 'GEHC', 'STE', 'HWM', 'DIS', 'ENPH', 'DLTR', 'IRM', 'TRGP', 'FE', 'RMD', 'GEN', 'UHS', 'VST', 'LRCX', 'CAH', 'ISRG', 'REG', 'MDT', 'BKR', 'GM', 'ECL', 'TEAM', 'CMS', 'LKQ', 'DPZ', 'TSN', 'EMN', 'RSG', 'COP', 'MSTR', 'SPGI', 'PPG', 'KEYS', 'PDD', 'NUE', 'FRT', 'AVY', 'CRL', 'BND', 'LYV', 'MDB', 'MNST', 'MU', 'HUBB', 'WDAY', 'YUM', 'SWKS', 'GL', 'IVZ', 'UNP', 'ELV', 'HOLX', 'ZBH', 'NRG', 'JBHT', 'JCI', 'MET', 'GWW', 'STX', 'AMCR', 'MO', 'DDOG', 'BIIB', 'UAL', 'HII', 'OKE', 'GLD', 'HON', 'BX', 'HAS', 'SBUX', 'LMT', 'SLV', 'PNR', 'CB', 'EBAY', 'CNC', 'DTE', 'ABNB', 'AEP', 'BEN', 'DVA', 'DOV', 'WST']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')
2025-03-23 10:32:06,869 - INFO - Final dataset: 424 assets, 1257 days
2025-03-23 10:32:27,591 - INFO - 