# Data Exploration for DRL Portfolio Allocation

This notebook explores the asset data (SPY, AGG, GLD, VNQ) and analyzes their statistical properties.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

from data_acquisition import DataAcquisition

# Set plotting style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

## 1. Load Data

In [None]:
# Fetch data
data_acq = DataAcquisition('config.yaml')
dataset = data_acq.fetch_full_dataset()

prices = dataset['prices']
returns = dataset['returns']

print(f"Data range: {prices.index[0]} to {prices.index[-1]}")
print(f"Total days: {len(prices)}")
print(f"\nAssets: {list(prices.columns)}")

## 2. Price Evolution

In [None]:
# Normalize prices to 100
normalized_prices = prices / prices.iloc[0] * 100

fig, ax = plt.subplots(figsize=(14, 7))
normalized_prices.plot(ax=ax, linewidth=2)
ax.set_title('Normalized Asset Prices (Base=100)', fontsize=16, fontweight='bold')
ax.set_xlabel('Date', fontsize=12)
ax.set_ylabel('Normalized Price', fontsize=12)
ax.legend(fontsize=10)
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 3. Return Statistics

In [None]:
# Calculate annualized statistics
annual_returns = returns.mean() * 252
annual_volatility = returns.std() * np.sqrt(252)
sharpe_ratio = annual_returns / annual_volatility

stats_df = pd.DataFrame({
    'Annual Return': annual_returns,
    'Annual Volatility': annual_volatility,
    'Sharpe Ratio': sharpe_ratio
})

print("\nAsset Statistics:")
print(stats_df.round(4))

## 4. Correlation Analysis

In [None]:
# Correlation matrix
corr_matrix = returns.corr()

fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(
    corr_matrix,
    annot=True,
    fmt='.3f',
    cmap='coolwarm',
    center=0,
    square=True,
    linewidths=1,
    cbar_kws={"shrink": 0.8}
)
ax.set_title('Asset Return Correlations', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

## 5. Rolling Statistics

In [None]:
# Rolling volatility (60-day window)
rolling_vol = returns.rolling(window=60).std() * np.sqrt(252)

fig, ax = plt.subplots(figsize=(14, 7))
rolling_vol.plot(ax=ax, linewidth=1.5, alpha=0.8)
ax.set_title('Rolling 60-Day Volatility (Annualized)', fontsize=16, fontweight='bold')
ax.set_xlabel('Date', fontsize=12)
ax.set_ylabel('Volatility', fontsize=12)
ax.legend(fontsize=10)
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 6. Distribution Analysis

In [None]:
# Return distributions
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
axes = axes.flatten()

for i, col in enumerate(returns.columns):
    ax = axes[i]
    returns[col].hist(bins=50, ax=ax, alpha=0.7, edgecolor='black')
    ax.axvline(returns[col].mean(), color='red', linestyle='--', linewidth=2, label='Mean')
    ax.set_title(f'{col} Return Distribution', fontsize=12, fontweight='bold')
    ax.set_xlabel('Daily Return', fontsize=10)
    ax.set_ylabel('Frequency', fontsize=10)
    ax.legend()
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 7. Crisis Periods

In [None]:
# Highlight crisis periods
crisis_periods = [
    ('2008-09-01', '2009-03-31', '2008 Financial Crisis'),
    ('2020-02-01', '2020-04-30', 'COVID-19 Crash'),
    ('2022-01-01', '2022-10-31', '2022 Bear Market')
]

fig, ax = plt.subplots(figsize=(16, 8))
normalized_prices.plot(ax=ax, linewidth=2, alpha=0.7)

for start, end, label in crisis_periods:
    ax.axvspan(start, end, alpha=0.2, color='red', label=label)

ax.set_title('Asset Prices with Crisis Periods Highlighted', fontsize=16, fontweight='bold')
ax.set_xlabel('Date', fontsize=12)
ax.set_ylabel('Normalized Price', fontsize=12)
ax.legend(loc='best', fontsize=10)
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()