# Risk Management Dashboard - Data Retrieval

This notebook demonstrates how to retrieve market data from Yahoo Finance and macroeconomic data from FRED for the risk management dashboard project.

In [None]:
# Install required packages
%pip install -r ../requirements.txt

In [None]:
# Import standard libraries
import os
import json
%pip install pandas
import pandas as pd
import numpy as np
%pip install matplotlib
import matplotlib.pyplot as plt
%pip install seaborn
import seaborn as sns
from pathlib import Path
import datetime as dt

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 120)

# Configure plot styles
# Use modern style naming convention
try:
    plt.style.use('seaborn-v0_8-darkgrid')  # For newer versions of matplotlib
except:
    try:
        plt.style.use('seaborn-darkgrid')  # For older versions
    except:
        print("Could not set seaborn style, using default style instead")

# Alternative approach: just use seaborn's built-in styling
sns.set_theme(style="darkgrid")
sns.set_palette('muted')
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12

In [None]:
# Set up project paths
PROJECT_ROOT = Path().resolve().parents[0]
CONFIG_DIR = PROJECT_ROOT / "configs"
DATA_DIR = PROJECT_ROOT / "data"

# Make sure raw data directory exists
raw_data_dir = DATA_DIR / "raw"
raw_data_dir.mkdir(exist_ok=True, parents=True)

print(f"Project root: {PROJECT_ROOT}")
print(f"Config directory: {CONFIG_DIR}")
print(f"Data directory: {DATA_DIR}")

In [None]:
# Load data configuration
with open(CONFIG_DIR / "data_config.json", 'r') as f:
    data_config = json.load(f)

# Extract configuration parameters
config = data_config['data_retrieval']
start_date = config['start_date']
end_date = config['end_date']

# Display configuration
print("Data Retrieval Configuration:")
print(f"Start Date: {start_date}")
print(f"End Date: {end_date}")
print(f"Lookback Years: {config['lookback_years']}")

# Display indices and indicators
print("\nEquity Indices:")
for idx in config['equity_indices']:
    print(f"  - {idx['description']} ({idx['ticker']}): {idx['weight']*100:.1f}%")

print("\nBond Indices:")
for idx in config['bond_indices']:
    print(f"  - {idx['description']} ({idx['ticker']}): {idx['weight']*100:.1f}%")

print("\nMacroeconomic Indicators:")
for idx in config['macro_indicators']:
    print(f"  - {idx['description']} ({idx['series_id']}): {idx['frequency']}")

## Load Configuration

First, let's load the data retrieval configuration from the config file.

## Import Data Retrieval Module

Now, let's import the data retrieval functions from our module.

In [None]:
# Add the project root to the path
import sys
sys.path.append(str(PROJECT_ROOT))
%pip install requests 

# Import data retrieval functions
from src.data.retrieve_data import (
    load_api_keys,
    get_market_data,
    get_fred_data,
    load_crisis_periods,
    get_data_for_date_range
)

## Retrieve Market Data

Let's retrieve market data for our equity and bond indices.

In [None]:
# Get equity and bond tickers
equity_tickers = [item['ticker'] for item in config['equity_indices']]
bond_tickers = [item['ticker'] for item in config['bond_indices']]
all_tickers = equity_tickers + bond_tickers

# Retrieve market data
try:
    print(f"Retrieving market data for {all_tickers}...")
    market_data = get_market_data(
        tickers=all_tickers,
        start_date=start_date,
        end_date=end_date
    )
    print("Market data retrieved successfully.")
    
    # Display the first few rows of the data
    print("\nFirst few rows of market data:")
    display(market_data.head())
    
    # Display data info
    print("\nMarket data info:")
    for col in market_data.columns.levels[0]:
        print(f"  - {col}")
    
except Exception as e:
    print(f"Error retrieving market data: {e}")
    print("Attempting to use Yahoo Finance directly...")
    try:
        import yfinance as yf
        
        # Map tickers to Yahoo format
        ticker_map = {
            "SPX": "^GSPC",  # S&P 500
            "US10YT=RR": "^TNX"  # 10-Year Treasury Yield
        }
        yahoo_tickers = [ticker_map.get(ticker, ticker) for ticker in all_tickers]
        
        # Download data
        data = yf.download(yahoo_tickers, start=start_date, end=end_date, progress=False)
        
        # Save data to CSV
        data.to_csv(raw_data_dir / f"yahoo_data_{dt.datetime.now().strftime('%Y%m%d')}.csv")
        
        print("Market data retrieved from Yahoo Finance successfully.")
        market_data = data
        
        # Display the first few rows of the data
        print("\nFirst few rows of Yahoo Finance data:")
        display(market_data.head())
    except Exception as e2:
        print(f"Error retrieving market data from Yahoo Finance: {e2}")

## Retrieve Macroeconomic Data

Now, let's retrieve macroeconomic data from FRED.

In [None]:
# Get macro indicators
macro_indicators = [item['series_id'] for item in config['macro_indicators']]

# Retrieve macroeconomic data
try:
    print(f"Retrieving macroeconomic data for {macro_indicators}...")
    macro_data = get_fred_data(
        series_ids=macro_indicators,
        start_date=start_date,
        end_date=end_date
    )
    print("Macroeconomic data retrieved successfully.")
    
    # Display the first few rows of the data
    print("\nFirst few rows of macroeconomic data:")
    display(macro_data.head())
    
    # Display data info
    print("\nMacroeconomic data info:")
    print(f"Shape: {macro_data.shape}")
    print(f"Columns: {macro_data.columns.tolist()}")
    print(f"Date range: {macro_data.index.min()} to {macro_data.index.max()}")
    
except Exception as e:
    print(f"Error retrieving macroeconomic data: {e}")
    print("Attempting to use pandas_datareader as fallback...")
    
    try:
        import pandas_datareader as pdr
        
        # Create empty DataFrame
        macro_data = pd.DataFrame()
        
        # Try to get each series
        for series_id in macro_indicators:
            try:
                series = pdr.get_data_fred(series_id, start=start_date, end=end_date)
                macro_data[series_id] = series[series_id]
                print(f"Retrieved {series_id} successfully.")
            except Exception as e_series:
                print(f"Error retrieving {series_id}: {e_series}")
        
        if not macro_data.empty:
            print("Successfully retrieved some macro data with pandas_datareader.")
            display(macro_data.head())
        else:
            print("Failed to retrieve any macro data.")
    except Exception as e2:
        print(f"Error using pandas_datareader: {e2}")

## Create Historical Crisis Periods File

Let's create a file with historical crisis periods that will be used for stress testing.

In [None]:
# Load existing crisis periods or create new ones
crisis_periods = load_crisis_periods()

# Display crisis periods
print("Loaded Historical Crisis Periods:")
display(crisis_periods)

# Calculate duration in days if not already present
if 'duration_days' not in crisis_periods.columns:
    crisis_periods['duration_days'] = (crisis_periods['end_date'] - crisis_periods['start_date']).dt.days
    # Save updated data
    crisis_file = DATA_DIR / "external" / "crisis_periods.csv"
    crisis_periods.to_csv(crisis_file, index=False)
    print(f"Added duration_days and saved to {crisis_file}")

# Ask if user wants to add more crisis periods
add_more = input("Do you want to add more crisis periods? (yes/no): ")

if add_more.lower() == 'yes':
    new_periods = []
    
    while True:
        name = input("Enter crisis name (or 'done' to finish): ")
        if name.lower() == 'done':
            break
            
        start_date = input("Enter start date (YYYY-MM-DD): ")
        end_date = input("Enter end date (YYYY-MM-DD): ")
        description = input("Enter description: ")
        
        new_periods.append({
            'name': name,
            'start_date': start_date,
            'end_date': end_date,
            'description': description
        })
    
    if new_periods:
        # Create DataFrame
        new_df = pd.DataFrame(new_periods)
        
        # Convert dates to datetime
        new_df['start_date'] = pd.to_datetime(new_df['start_date'])
        new_df['end_date'] = pd.to_datetime(new_df['end_date'])
        
        # Calculate duration in days
        new_df['duration_days'] = (new_df['end_date'] - new_df['start_date']).dt.days
        
        # Append to existing data
        crisis_periods = pd.concat([crisis_periods, new_df], ignore_index=True)
        
        # Save updated data
        crisis_file = DATA_DIR / "external" / "crisis_periods.csv"
        crisis_periods.to_csv(crisis_file, index=False)
        
        print(f"Added {len(new_periods)} new crisis periods.")
        display(crisis_periods)

## Data Visualization

Let's create some basic visualizations of the retrieved data.

In [None]:
# Plot market data
if 'market_data' in locals():
    plt.figure(figsize=(14, 8))
    
    # Extract closing prices
    if isinstance(market_data.columns, pd.MultiIndex) and 'TRDPRC_1' in market_data.columns.levels[0]:
        # For structured data
        prices = market_data['TRDPRC_1']
    elif isinstance(market_data.columns, pd.MultiIndex) and 'Adj Close' in market_data.columns.levels[0]:
        # For Yahoo Finance data
        prices = market_data['Adj Close']
    else:
        # For flat column structure
        prices = market_data
        
    # Normalize prices to 100 at the start
    normalized_prices = prices / prices.iloc[0] * 100
    
    # Plot normalized prices
    normalized_prices.plot(ax=plt.gca())
    
    plt.title('Normalized Asset Prices (Base = 100)')
    plt.xlabel('Date')
    plt.ylabel('Normalized Price')
    plt.legend()
    plt.grid(True)
    plt.show()
    
    # Calculate returns
    returns = prices.pct_change().dropna()
    
    # Plot returns distributions
    plt.figure(figsize=(14, 8))
    
    for col in returns.columns:
        sns.kdeplot(returns[col], label=col)
    
    plt.title('Return Distributions')
    plt.xlabel('Daily Return')
    plt.ylabel('Density')
    plt.legend()
    plt.grid(True)
    plt.show()

In [None]:
# Plot macroeconomic data
if 'macro_data' in locals() and not macro_data.empty:
    plt.figure(figsize=(14, 8))
    
    # Plot each macro indicator on its own subplot
    fig, axes = plt.subplots(len(macro_data.columns), 1, figsize=(14, 4*len(macro_data.columns)), sharex=True)
    
    # Handle single axis case
    if len(macro_data.columns) == 1:
        axes = [axes]
    
    for i, col in enumerate(macro_data.columns):
        # Try to get the description from the config
        try:
            description = next((item['description'] for item in config['macro_indicators'] if item['series_id'] == col), col)
        except:
            description = col
        
        # Plot the data
        macro_data[col].plot(ax=axes[i])
        axes[i].set_title(f"{description} ({col})")
        axes[i].grid(True)
        
    plt.tight_layout()
    plt.show()

## Highlight Crisis Periods

Let's visualize the market data with crisis periods highlighted.

In [None]:
# Plot market data with crisis periods highlighted
if 'market_data' in locals() and 'crisis_periods' in locals():
    plt.figure(figsize=(14, 8))
    
    # Extract closing prices
    if isinstance(market_data.columns, pd.MultiIndex) and 'TRDPRC_1' in market_data.columns.levels[0]:
        # For structured data
        prices = market_data['TRDPRC_1']
    elif isinstance(market_data.columns, pd.MultiIndex) and 'Adj Close' in market_data.columns.levels[0]:
        # For Yahoo Finance data
        prices = market_data['Adj Close']
    else:
        # For flat column structure
        prices = market_data
    
    # Select the first equity index
    equity_symbol = equity_tickers[0]
    
    # Map to Yahoo symbol if needed
    if equity_symbol == "SPX" and "^GSPC" in prices.columns:
        equity_symbol = "^GSPC"
    
    if equity_symbol in prices.columns:
        # Normalize prices to 100 at the start
        equity_prices = prices[equity_symbol]
        normalized_prices = equity_prices / equity_prices.iloc[0] * 100
        
        # Plot normalized prices
        normalized_prices.plot(ax=plt.gca())
        
        # Highlight crisis periods
        min_price = normalized_prices.min()
        max_price = normalized_prices.max()
        padding = (max_price - min_price) * 0.05
        
        for _, crisis in crisis_periods.iterrows():
            if crisis['start_date'] < normalized_prices.index.max() and crisis['end_date'] > normalized_prices.index.min():
                plt.axvspan(
                    crisis['start_date'],
                    crisis['end_date'],
                    alpha=0.2,
                    color='red',
                    label=crisis['name'] if 'current_crisis' not in locals() else ""
                )
                
                # Add text label
                plt.text(
                    crisis['start_date'] + (crisis['end_date'] - crisis['start_date']) / 2,
                    max_price + padding,
                    crisis['name'],
                    ha='center',
                    va='bottom',
                    fontsize=10,
                    rotation=45
                )
                
                # Track that we've added this crisis
                locals()['current_crisis'] = crisis['name']
        
        plt.title(f'Normalized {equity_symbol} Price with Crisis Periods')
        plt.xlabel('Date')
        plt.ylabel('Normalized Price')
        plt.ylim(min_price - padding, max_price + padding*3)
        plt.grid(True)
        plt.show()

## Using the All-in-One Data Retrieval Function

The module provides a convenience function to get all data in one call.

In [None]:
# Get all data in one call using the convenience function
try:
    print(f"Retrieving all data for period: {start_date} to {end_date}")
    all_market_data, all_macro_data, all_crisis_periods = get_data_for_date_range(
        start_date=start_date,
        end_date=end_date
    )
    
    print("Data retrieval completed successfully.")
    print(f"Market data shape: {all_market_data.shape}")
    print(f"Macro data shape: {all_macro_data.shape}")
    print(f"Crisis periods: {len(all_crisis_periods)}")
    
except Exception as e:
    print(f"Error retrieving all data: {e}")

## Next Steps

Now that we have retrieved the necessary data, the next steps are to:

1. Clean and process the data (Notebook 02_data_cleaning.ipynb)
2. Implement VaR models and risk analysis (Notebook 03_var_modeling.ipynb)
3. Develop stress testing scenarios (Notebook 05_stress_testing.ipynb)
4. Validate the models through backtesting (Notebook 06_backtesting.ipynb)