In [13]:
# Market Structure Analysis
## A Comprehensive Implementation

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
from umap import UMAP
import yfinance as yf
from scipy.cluster.hierarchy import dendrogram, linkage
import warnings
warnings.filterwarnings('ignore')


In [14]:

class MarketStructureAnalyzer:
    def __init__(self):
        self.data = None
        self.returns = None
        self.features = None
        self.pca = None
        self.eigenportfolios = None
        self.market_maps = {}
        
    def fetch_data(self, universe):
        """
        Fetch market data for given universe
        """
        data = {}
        for ticker in universe:
            try:
                df = yf.download(ticker, start='2018-01-01', end='2023-12-31')
                data[ticker] = df['Adj Close']
            except Exception as e:
                print(f"Error fetching {ticker}: {e}")
                
        self.data = pd.DataFrame(data)
        self.returns = self.data.pct_change().dropna()
        return self.returns
    
    def compute_features(self):
        """
        Compute technical and statistical features
        """
        features = {}
        
        # Volatility features
        for window in [21, 63, 252]:
            vol = self.returns.rolling(window).std()
            for col in vol.columns:
                features[f'vol_{window}d_{col}'] = vol[col]
        
        # Correlation features
        market_proxy = self.returns.mean(axis=1)
        for window in [63, 252]:
            for col in self.returns.columns:
                rolling_corr = (
                    self.returns[col].rolling(window)
                    .corr(market_proxy)
                )
                features[f'market_corr_{window}d_{col}'] = rolling_corr
        
        # Momentum features
        for window in [21, 63, 252]:
            mom = self.returns.rolling(window).sum()
            for col in mom.columns:
                features[f'mom_{window}d_{col}'] = mom[col]
        
        self.features = pd.DataFrame(features)
        return self.features
    
    def perform_pca(self):
        """
        Perform PCA analysis and create eigenportfolios
        """
        scaler = StandardScaler()
        returns_scaled = scaler.fit_transform(self.returns)
        
        self.pca = PCA()
        pca_result = self.pca.fit_transform(returns_scaled)
        
        # Create eigenportfolios
        self.eigenportfolios = pd.DataFrame(
            self.pca.components_,
            columns=self.returns.columns,
            index=[f'PC{i+1}' for i in range(len(self.returns.columns))]
        )
        
        return self.eigenportfolios
    
    def create_market_maps(self, methods=['tsne', 'umap']):
        """
        Create market maps using different dimensionality reduction techniques
        """
        scaler = StandardScaler()
        returns_scaled = scaler.fit_transform(self.returns)
        n_samples = returns_scaled.shape[1]
        
        for method in methods:
            if method == 'tsne':
                # Set perplexity to min(30, n_samples - 1)
                perplexity = min(30, n_samples - 1)
                reducer = TSNE(n_components=2, random_state=42, perplexity=perplexity)
            elif method == 'umap':
                reducer = UMAP(n_neighbors=min(15, n_samples - 1), 
                            min_dist=0.1, 
                            random_state=42)
                
            self.market_maps[method] = reducer.fit_transform(returns_scaled.T)
    
        return self.market_maps
    
    def visualize_market_structure(self, figsize=(20, 15)):
        """
        Create comprehensive market structure visualization
        """
        fig = plt.figure(figsize=figsize)
        
        # 1. PCA Analysis
        ax1 = plt.subplot(221)
        cumulative_var = np.cumsum(self.pca.explained_variance_ratio_)
        ax1.plot(range(1, len(cumulative_var) + 1), cumulative_var, 'bo-')
        ax1.set_title('Cumulative Variance Explained')
        ax1.set_xlabel('Number of Components')
        ax1.set_ylabel('Cumulative Variance Ratio')
        ax1.grid(True)
        
        # 2. First Two Eigenportfolios
        ax2 = plt.subplot(222)
        self.eigenportfolios.iloc[:2].T.plot(kind='bar', ax=ax2)
        ax2.set_title('First Two Eigenportfolios')
        ax2.tick_params(axis='x', rotation=45)
        
        # 3. Market Maps
        ax3 = plt.subplot(223)
        tsne_map = self.market_maps['tsne']
        ax3.scatter(tsne_map[:,0], tsne_map[:,1])
        for i, txt in enumerate(self.returns.columns):
            ax3.annotate(txt, (tsne_map[i,0], tsne_map[i,1]))
        ax3.set_title('t-SNE Market Map')
        
        ax4 = plt.subplot(224)
        umap_map = self.market_maps['umap']
        ax4.scatter(umap_map[:,0], umap_map[:,1])
        for i, txt in enumerate(self.returns.columns):
            ax4.annotate(txt, (umap_map[i,0], umap_map[i,1]))
        ax4.set_title('UMAP Market Map')
        
        plt.tight_layout()
        return fig
    
    def analyze_portfolio(self, weights=None):
        """
        Perform comprehensive portfolio analysis
        """
        if weights is None:
            weights = np.ones(len(self.returns.columns)) / len(self.returns.columns)
            
        portfolio_returns = (self.returns * weights).sum(axis=1)
        
        # Basic statistics
        analysis = {
            'Annual Return': portfolio_returns.mean() * 252,
            'Annual Volatility': portfolio_returns.std() * np.sqrt(252),
            'Sharpe Ratio': (portfolio_returns.mean() * 252) / (portfolio_returns.std() * np.sqrt(252)),
            'Max Drawdown': (portfolio_returns.cumsum() - portfolio_returns.cumsum().cummax()).min(),
            'Skewness': portfolio_returns.skew(),
            'Kurtosis': portfolio_returns.kurtosis()
        }
        
        # Risk decomposition using PCA
        risk_contributions = pd.DataFrame(
            self.pca.components_.T * weights,
            columns=[f'PC{i+1}' for i in range(len(self.returns.columns))],
            index=self.returns.columns
        )
        
        return analysis, risk_contributions



In [16]:
universe = [
        'SPY', 'QQQ', 'IWM',  # US Equity
        'EFA', 'EEM', 'VGK',  # International Equity
        'TLT', 'IEF', 'SHY',  # US Treasury
        'LQD', 'HYG', 'EMB',  # Credit
        'GLD', 'SLV', 'DBC',  # Commodities
        'VNQ', 'REET', 'REM'  # Real Estate
    ]
    
# Initialize analyzer
analyzer = MarketStructureAnalyzer()

# Fetch and prepare data
returns = analyzer.fetch_data(universe)
features = analyzer.compute_features()


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

In [19]:

# Perform analysis
eigenportfolios = analyzer.perform_pca()
market_maps = analyzer.create_market_maps()


AttributeError: 'NoneType' object has no attribute 'split'

In [None]:

# Create visualizations
fig = analyzer.visualize_market_structure()
plt.show()

# Analyze equal-weight portfolio
analysis, risk_contributions = analyzer.analyze_portfolio()
print("\nPortfolio Analysis:")
for key, value in analysis.items():
    print(f"{key}: {value:.4f}")

print("\nRisk Contributions from Principal Components:")
print(risk_contributions.sum().round(4))