# AI-Enhanced 60/40 Portfolio - Data Exploration

This notebook explores the market data and economic indicators used in the AI-driven portfolio strategy.

## Objectives:
1. Load and visualize asset prices
2. Analyze economic indicators (VIX, Yield Spread, Interest Rates)
3. Explore correlations and relationships
4. Understand market regimes

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yaml
import warnings
warnings.filterwarnings('ignore')

# Import custom modules
from data_acquisition import DataAcquisition
from feature_engineering import FeatureEngineer

# Set plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.precision', 4)

print("Libraries imported successfully!")

## 1. Load Configuration and Data

In [None]:
# Load configuration
with open('config.yaml', 'r') as f:
    config = yaml.safe_load(f)

print("Configuration loaded:")
print(f"  Start Date: {config['data']['start_date']}")
print(f"  End Date: {config['data']['end_date']}")
print(f"  Rebalance Frequency: {config['data']['rebalance_frequency']}")

In [None]:
# Initialize data acquisition
data_acq = DataAcquisition(config)

# Fetch all data
prices, returns, indicators = data_acq.get_full_dataset()

print("\nData loaded successfully!")
print(f"Prices shape: {prices.shape}")
print(f"Returns shape: {returns.shape}")
print(f"Indicators shape: {indicators.shape}")

## 2. Asset Price Analysis

In [None]:
# Display first and last few rows
print("First 5 rows of prices:")
display(prices.head())

print("\nLast 5 rows of prices:")
display(prices.tail())

# Summary statistics
print("\nPrice summary statistics:")
display(prices.describe())

In [None]:
# Plot normalized prices
fig, ax = plt.subplots(figsize=(14, 7))

normalized_prices = prices / prices.iloc[0] * 100

for col in normalized_prices.columns:
    ax.plot(normalized_prices.index, normalized_prices[col], label=col, linewidth=2)

ax.set_xlabel('Date', fontsize=12)
ax.set_ylabel('Normalized Price (Base 100)', fontsize=12)
ax.set_title('Asset Price Performance (Normalized)', fontsize=14, fontweight='bold')
ax.legend(fontsize=10)
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print("\nCumulative returns:")
cumulative_returns = (prices.iloc[-1] / prices.iloc[0] - 1) * 100
for asset, ret in cumulative_returns.items():
    print(f"  {asset}: {ret:+.2f}%")

## 3. Returns Analysis

In [None]:
# Returns statistics
print("Returns summary statistics:")
display(returns.describe())

# Annualized metrics
print("\nAnnualized metrics (assuming monthly data):")
annual_return = returns.mean() * 12
annual_vol = returns.std() * np.sqrt(12)
sharpe_ratio = annual_return / annual_vol

metrics_df = pd.DataFrame({
    'Annual Return': annual_return,
    'Annual Volatility': annual_vol,
    'Sharpe Ratio': sharpe_ratio
})
display(metrics_df)

In [None]:
# Plot returns distribution
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
axes = axes.ravel()

for i, col in enumerate(returns.columns):
    if i < len(axes):
        axes[i].hist(returns[col] * 100, bins=30, edgecolor='black', alpha=0.7)
        axes[i].axvline(returns[col].mean() * 100, color='red', linestyle='--', 
                       linewidth=2, label=f'Mean: {returns[col].mean()*100:.2f}%')
        axes[i].set_xlabel('Monthly Return (%)', fontsize=10)
        axes[i].set_ylabel('Frequency', fontsize=10)
        axes[i].set_title(f'{col} Returns Distribution', fontsize=11, fontweight='bold')
        axes[i].legend(fontsize=9)
        axes[i].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Returns correlation matrix
corr_matrix = returns.corr()

fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, fmt='.3f', cmap='coolwarm', 
            center=0, square=True, linewidths=1, ax=ax)
ax.set_title('Asset Returns Correlation Matrix', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

print("\nKey correlations:")
print(corr_matrix)

## 4. Economic Indicators Analysis

In [None]:
# Display indicators
print("Economic indicators:")
display(indicators.head(10))

print("\nIndicators summary statistics:")
display(indicators.describe())

In [None]:
# Plot all indicators
fig, axes = plt.subplots(3, 1, figsize=(14, 12))

# VIX
axes[0].plot(indicators.index, indicators['VIX'], color='red', linewidth=2)
axes[0].set_ylabel('VIX Level', fontsize=11)
axes[0].set_title('CBOE Volatility Index (VIX)', fontsize=12, fontweight='bold')
axes[0].axhline(y=indicators['VIX'].median(), color='black', linestyle='--', 
               linewidth=1, label=f'Median: {indicators["VIX"].median():.2f}')
axes[0].legend(fontsize=9)
axes[0].grid(True, alpha=0.3)

# Yield Spread
axes[1].plot(indicators.index, indicators['Yield_Spread'], color='blue', linewidth=2)
axes[1].set_ylabel('Spread (%)', fontsize=11)
axes[1].set_title('10Y-3M Treasury Yield Spread', fontsize=12, fontweight='bold')
axes[1].axhline(y=0, color='red', linestyle='--', linewidth=1, label='Zero (Inversion)')
axes[1].axhline(y=indicators['Yield_Spread'].median(), color='black', linestyle='--', 
               linewidth=1, label=f'Median: {indicators["Yield_Spread"].median():.2f}')
axes[1].legend(fontsize=9)
axes[1].grid(True, alpha=0.3)

# Interest Rate
axes[2].plot(indicators.index, indicators['Interest_Rate'], color='green', linewidth=2)
axes[2].set_xlabel('Date', fontsize=11)
axes[2].set_ylabel('Rate (%)', fontsize=11)
axes[2].set_title('Interest Rate', fontsize=12, fontweight='bold')
axes[2].axhline(y=indicators['Interest_Rate'].median(), color='black', linestyle='--', 
               linewidth=1, label=f'Median: {indicators["Interest_Rate"].median():.2f}')
axes[2].legend(fontsize=9)
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Indicators correlation with asset returns
indicator_return_corr = pd.DataFrame()

for indicator in indicators.columns:
    corrs = {}
    for asset in returns.columns:
        # Calculate correlation
        aligned_data = pd.concat([indicators[indicator], returns[asset]], axis=1).dropna()
        corrs[asset] = aligned_data.corr().iloc[0, 1]
    indicator_return_corr[indicator] = pd.Series(corrs)

print("Correlation between indicators and asset returns:")
display(indicator_return_corr)

# Visualize
fig, ax = plt.subplots(figsize=(10, 6))
sns.heatmap(indicator_return_corr, annot=True, fmt='.3f', cmap='coolwarm', 
            center=0, linewidths=1, ax=ax)
ax.set_title('Indicators vs Asset Returns Correlation', fontsize=14, fontweight='bold')
ax.set_xlabel('Economic Indicators', fontsize=11)
ax.set_ylabel('Assets', fontsize=11)
plt.tight_layout()
plt.show()

## 5. Market Regime Analysis

In [None]:
# Define market regimes
regimes = pd.DataFrame(index=indicators.index)

# Volatility regimes
vix_median = indicators['VIX'].median()
regimes['High_Volatility'] = indicators['VIX'] > vix_median

# Yield curve regimes
regimes['Inverted_Curve'] = indicators['Yield_Spread'] < 0
regimes['Flat_Curve'] = indicators['Yield_Spread'].abs() < 0.5
regimes['Steep_Curve'] = indicators['Yield_Spread'] > 1.5

# Interest rate regimes
rate_median = indicators['Interest_Rate'].median()
regimes['High_Rate'] = indicators['Interest_Rate'] > rate_median

print("Market regime frequencies:")
for col in regimes.columns:
    freq = regimes[col].sum() / len(regimes)
    print(f"  {col}: {freq:.1%} of the time")

In [None]:
# Analyze returns by regime
print("\nAverage returns by market regime:\n")

for regime_col in regimes.columns:
    print(f"{regime_col}:")
    
    # Split returns by regime
    regime_true = returns[regimes[regime_col]]
    regime_false = returns[~regimes[regime_col]]
    
    print(f"  When {regime_col} = True:")
    avg_returns_true = regime_true.mean() * 12 * 100
    for asset, ret in avg_returns_true.items():
        print(f"    {asset}: {ret:+.2f}% annualized")
    
    print(f"  When {regime_col} = False:")
    avg_returns_false = regime_false.mean() * 12 * 100
    for asset, ret in avg_returns_false.items():
        print(f"    {asset}: {ret:+.2f}% annualized")
    
    print()

## 6. Key Insights

Based on the exploration above, we can observe:

1. **Asset Performance**: Different assets show varying performance characteristics
2. **Correlations**: Traditional stock-bond correlations may break down during certain periods
3. **Economic Indicators**: VIX, Yield Spread, and Interest Rates provide valuable signals
4. **Market Regimes**: Different regimes favor different assets

These insights inform our AI model's ability to dynamically allocate across assets based on prevailing market conditions.