# FirstRateData Explorer 📊

Welcome to your interactive financial data analysis environment! This notebook provides a comprehensive toolkit for exploring and analyzing high-frequency stock market data from FirstRateData.

## 🎯 What You Can Do Here:
- **Load and explore metadata** (14K+ stock symbols, company profiles, S&P 500 changes)
- **Access 1-minute resolution stock data** (2000-2024, OHLCV format)
- **Perform time series analysis** and visualizations
- **Compare stocks** and analyze market patterns
- **Build trading strategies** and backtest ideas

## 📊 Data Overview:
- **Time Range**: 2000-2024 (25+ years of data)
- **Frequency**: 1-minute bars
- **Coverage**: All US stocks with ~1.28M records per trading day
- **Timezone**: America/New_York
- **Format**: OHLCV (Open, High, Low, Close, Volume)

Let's start exploring! 🚀

## 1. Setup and Imports 🔧

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import warnings
from datetime import datetime, date, timedelta
import pytz

# Import our custom data access client
from src.aqf.data_access import FirstRateDataClient

# Configure display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
warnings.filterwarnings('ignore')

# Configure matplotlib
plt.style.use('seaborn-v0_8')
plt.rcParams['figure.figsize'] = (12, 8)

# Configure plotly
import plotly.io as pio
pio.renderers.default = 'notebook'

print("✅ All libraries imported successfully!")
print("📅 Notebook initialized on:", datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

## 2. Initialize Data Client 🚀

In [None]:
# Initialize the FirstRateData client
client = FirstRateDataClient(profile_name="firstratedata")

# Get overview of available data
print("\n🗂️  Data Availability Overview:")
dates_info = client.get_available_dates()
for key, value in dates_info.items():
    print(f"   {key}: {value}")

## 3. Explore Metadata 📋

Let's start by understanding what stocks and companies are available in our dataset.

In [None]:
# Load all metadata
metadata = client.get_all_metadata()

# Store individual datasets for easy access
stock_meta = metadata['stock_metadata']
company_profiles = metadata['company_profiles']
sp500_changes = metadata['sp500_changes']

print("📊 Metadata Summary:")
print(f"   Stock metadata: {len(stock_meta)} entries")
print(f"   Company profiles: {len(company_profiles)} entries")
print(f"   S&P 500 changes: {len(sp500_changes)} entries")

In [None]:
# Explore stock metadata in detail
print("🏢 Stock Metadata Sample:")
print(stock_meta.head())

print("\n📈 Data Coverage by Year:")
# Count stocks by year
stock_meta['first_year'] = stock_meta['first_date'].dt.year
stock_meta['last_year'] = stock_meta['last_date'].dt.year

year_coverage = stock_meta.groupby('first_year').size().sort_index()
print(year_coverage.head(10))

print("\n🏭 Company Profiles Sample:")
print(company_profiles.head())

print("\n🌏 Companies by Country:")
country_counts = company_profiles['country'].value_counts().head(10)
print(country_counts)

In [None]:
# Visualize sector distribution
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# Sector distribution
sector_counts = company_profiles['sector'].value_counts().head(10)
sector_counts.plot(kind='barh', ax=ax1, color='skyblue')
ax1.set_title('Top 10 Sectors by Company Count')
ax1.set_xlabel('Number of Companies')

# Industry distribution (top 15)
industry_counts = company_profiles['industry'].value_counts().head(15)
industry_counts.plot(kind='barh', ax=ax2, color='lightcoral')
ax2.set_title('Top 15 Industries by Company Count')
ax2.set_xlabel('Number of Companies')

plt.tight_layout()
plt.show()

print(f"\n📊 Sector breakdown:")
for sector, count in sector_counts.items():
    print(f"   {sector}: {count} companies")

## 4. Load and Explore Sample Data 📈

Now let's load some actual stock price data and see what we're working with!

In [None]:
# Load a recent trading day (change this date as needed)
SAMPLE_DATE = '2024-08-01'  # Thursday, Aug 1, 2024

print(f"📅 Loading data for {SAMPLE_DATE}...")
sample_data = client.load_day(SAMPLE_DATE)

if sample_data is not None:
    print(f"\n✅ Successfully loaded {len(sample_data):,} records")
    
    # Display basic info
    client.print_summary(sample_data)
    
    print(f"\n📋 Sample Records:")
    display(sample_data.head(10))
else:
    print("❌ No data found for this date. Try a different date!")

In [None]:
# Analyze trading patterns throughout the day
if sample_data is not None:
    # Extract hour from timestamp for analysis
    sample_data['hour'] = sample_data['timestamp'].dt.hour
    sample_data['minute'] = sample_data['timestamp'].dt.minute
    
    # Trading activity by hour
    hourly_volume = sample_data.groupby('hour')['volume'].sum()
    hourly_trades = sample_data.groupby('hour').size()
    
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
    
    # Volume by hour
    hourly_volume.plot(kind='bar', ax=ax1, color='steelblue', alpha=0.7)
    ax1.set_title('Total Trading Volume by Hour of Day')
    ax1.set_xlabel('Hour (EST)')
    ax1.set_ylabel('Volume')
    ax1.tick_params(axis='x', rotation=0)
    
    # Number of trades by hour
    hourly_trades.plot(kind='bar', ax=ax2, color='darkgreen', alpha=0.7)
    ax2.set_title('Number of Price Updates by Hour of Day')
    ax2.set_xlabel('Hour (EST)')
    ax2.set_ylabel('Number of Updates')
    ax2.tick_params(axis='x', rotation=0)
    
    plt.tight_layout()
    plt.show()
    
    print("🕘 Trading Hours Analysis:")
    print(f"   Market opens: {sample_data['hour'].min()}:00 EST")
    print(f"   Market closes: {sample_data['hour'].max()}:00 EST")
    print(f"   Peak volume hour: {hourly_volume.idxmax()}:00 EST")
    print(f"   Peak activity hour: {hourly_trades.idxmax()}:00 EST")

## 5. Individual Stock Analysis 📊

Let's dive deep into specific stocks and create some beautiful visualizations!

In [None]:
# Choose a stock to analyze (change this to any ticker you're interested in)
TICKER = 'AAPL'  # Apple Inc.

print(f"📈 Analyzing {TICKER} for {SAMPLE_DATE}")

if sample_data is not None:
    # Filter data for the chosen ticker
    ticker_data = sample_data[sample_data['ticker'] == TICKER].copy()
    
    if len(ticker_data) > 0:
        print(f"✅ Found {len(ticker_data)} price points for {TICKER}")
        
        # Sort by timestamp for time series analysis
        ticker_data = ticker_data.sort_values('timestamp').reset_index(drop=True)
        
        # Calculate some basic metrics
        day_open = ticker_data.iloc[0]['open']
        day_close = ticker_data.iloc[-1]['close']
        day_high = ticker_data['high'].max()
        day_low = ticker_data['low'].min()
        total_volume = ticker_data['volume'].sum()
        
        print(f"\n📊 {TICKER} Daily Summary:")
        print(f"   Open: ${day_open:.2f}")
        print(f"   Close: ${day_close:.2f}")
        print(f"   High: ${day_high:.2f}")
        print(f"   Low: ${day_low:.2f}")
        print(f"   Change: ${day_close - day_open:.2f} ({((day_close - day_open) / day_open * 100):+.2f}%)")
        print(f"   Volume: {total_volume:,.0f}")
        
        # Display sample data
        display(ticker_data.head())
        
    else:
        print(f"❌ No data found for {TICKER} on {SAMPLE_DATE}")
        print("Available tickers:")
        available_tickers = sorted(sample_data['ticker'].unique())
        print(available_tickers[:20], "... (showing first 20)")
        TICKER = available_tickers[0]  # Use the first available ticker
        print(f"\n🔄 Switching to {TICKER} for analysis...")

In [None]:
# Create an interactive candlestick chart
if sample_data is not None and len(ticker_data) > 0:
    
    # Resample to 5-minute intervals for better visualization
    ticker_data_5min = ticker_data.set_index('timestamp').resample('5min').agg({
        'open': 'first',
        'high': 'max',
        'low': 'min',
        'close': 'last',
        'volume': 'sum'
    }).dropna()
    
    # Create candlestick chart with volume
    fig = make_subplots(
        rows=2, cols=1,
        shared_xaxes=True,
        vertical_spacing=0.1,
        subplot_titles=(f'{TICKER} Price Chart (5-min intervals)', 'Volume'),
        row_width=[0.7, 0.3]
    )
    
    # Add candlestick chart
    fig.add_trace(
        go.Candlestick(
            x=ticker_data_5min.index,
            open=ticker_data_5min['open'],
            high=ticker_data_5min['high'],
            low=ticker_data_5min['low'],
            close=ticker_data_5min['close'],
            name=TICKER
        ),
        row=1, col=1
    )
    
    # Add volume bars
    fig.add_trace(
        go.Bar(
            x=ticker_data_5min.index,
            y=ticker_data_5min['volume'],
            name='Volume',
            marker_color='rgba(158,202,225,0.8)',
            yaxis='y2'
        ),
        row=2, col=1
    )
    
    # Update layout
    fig.update_layout(
        title=f'{TICKER} Intraday Trading - {SAMPLE_DATE}',
        yaxis_title='Price ($)',
        yaxis2_title='Volume',
        xaxis_rangeslider_visible=False,
        height=600,
        showlegend=False
    )
    
    fig.show()
    
    print(f"📈 Interactive chart shows {TICKER}'s intraday movement with 5-minute intervals")

## 6. Multi-Day Analysis 📅

Let's load multiple days of data to see longer-term patterns and trends!

In [None]:
# Load a week of data (adjust dates as needed)
START_DATE = '2024-08-01'  # Thursday
END_DATE = '2024-08-05'    # Monday (next week)

print(f"📅 Loading data from {START_DATE} to {END_DATE}...")
multi_day_data = client.load_date_range(START_DATE, END_DATE)

if multi_day_data is not None:
    print(f"\n✅ Successfully loaded multi-day dataset!")
    
    # Add date column for grouping
    multi_day_data['date'] = multi_day_data['timestamp'].dt.date
    
    # Daily summary statistics
    daily_summary = multi_day_data.groupby('date').agg({
        'volume': 'sum',
        'ticker': 'nunique',
        'close': 'mean'
    }).rename(columns={'ticker': 'unique_tickers', 'close': 'avg_price'})
    
    print(f"\n📊 Daily Summary:")
    display(daily_summary)
    
    # Visualize daily trading activity
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))
    
    # Daily volume
    daily_summary['volume'].plot(kind='bar', ax=ax1, color='steelblue', alpha=0.7)
    ax1.set_title('Total Daily Volume')
    ax1.set_ylabel('Volume')
    ax1.tick_params(axis='x', rotation=45)
    
    # Daily unique tickers
    daily_summary['unique_tickers'].plot(kind='bar', ax=ax2, color='darkgreen', alpha=0.7)
    ax2.set_title('Number of Active Tickers per Day')
    ax2.set_ylabel('Unique Tickers')
    ax2.tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.show()
else:
    print("❌ No data found for this date range. Try different dates!")

## 7. Stock Comparison 🔄

Compare multiple stocks side by side to spot patterns and correlations!

In [None]:
# Compare multiple popular stocks
COMPARISON_TICKERS = ['AAPL', 'GOOGL', 'MSFT', 'TSLA', 'NVDA']

if sample_data is not None:
    print(f"📊 Comparing {len(COMPARISON_TICKERS)} stocks for {SAMPLE_DATE}")
    
    comparison_data = []
    
    for ticker in COMPARISON_TICKERS:
        ticker_subset = sample_data[sample_data['ticker'] == ticker]
        if len(ticker_subset) > 0:
            # Calculate daily metrics
            day_open = ticker_subset['open'].iloc[0]
            day_close = ticker_subset['close'].iloc[-1] 
            day_high = ticker_subset['high'].max()
            day_low = ticker_subset['low'].min()
            total_volume = ticker_subset['volume'].sum()
            daily_return = (day_close - day_open) / day_open * 100
            
            comparison_data.append({
                'Ticker': ticker,
                'Open': day_open,
                'Close': day_close,
                'High': day_high,
                'Low': day_low,
                'Volume': total_volume,
                'Daily_Return_%': daily_return,
                'Price_Range_%': (day_high - day_low) / day_open * 100
            })
    
    if comparison_data:
        comparison_df = pd.DataFrame(comparison_data)
        
        print(f"\n📈 Stock Comparison Summary:")
        display(comparison_df.round(2))
        
        # Visualize the comparison
        fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))
        
        # Daily returns
        comparison_df.set_index('Ticker')['Daily_Return_%'].plot(
            kind='bar', ax=ax1, color='steelblue', alpha=0.7
        )
        ax1.set_title('Daily Returns (%)')
        ax1.axhline(y=0, color='black', linestyle='-', alpha=0.3)
        ax1.tick_params(axis='x', rotation=45)
        
        # Volume comparison
        comparison_df.set_index('Ticker')['Volume'].plot(
            kind='bar', ax=ax2, color='darkgreen', alpha=0.7
        )
        ax2.set_title('Total Volume')
        ax2.tick_params(axis='x', rotation=45)
        
        # Price range
        comparison_df.set_index('Ticker')['Price_Range_%'].plot(
            kind='bar', ax=ax3, color='darkorange', alpha=0.7
        )
        ax3.set_title('Intraday Price Range (%)')
        ax3.tick_params(axis='x', rotation=45)
        
        # Closing prices
        comparison_df.set_index('Ticker')['Close'].plot(
            kind='bar', ax=ax4, color='purple', alpha=0.7
        )
        ax4.set_title('Closing Prices ($)')
        ax4.tick_params(axis='x', rotation=45)
        
        plt.tight_layout()
        plt.show()
        
        # Find best/worst performers
        best_performer = comparison_df.loc[comparison_df['Daily_Return_%'].idxmax()]
        worst_performer = comparison_df.loc[comparison_df['Daily_Return_%'].idxmin()]
        highest_volume = comparison_df.loc[comparison_df['Volume'].idxmax()]
        
        print(f"\n🏆 Daily Highlights:")
        print(f"   Best performer: {best_performer['Ticker']} (+{best_performer['Daily_Return_%']:.2f}%)")
        print(f"   Worst performer: {worst_performer['Ticker']} ({worst_performer['Daily_Return_%']:.2f}%)")
        print(f"   Highest volume: {highest_volume['Ticker']} ({highest_volume['Volume']:,.0f})")
    else:
        print("❌ None of the selected tickers found in the data")
        print("Available tickers:", sorted(sample_data['ticker'].unique())[:10])

## 8. Custom Analysis Playground 🎮

This is your space to experiment! Use the cells below to try your own analysis ideas.

In [None]:
# 🎯 EXPERIMENT 1: Find stocks with unusual volume
# Try to identify stocks that had unusually high volume on our sample date

if sample_data is not None:
    print("🔍 Finding stocks with unusual volume patterns...")
    
    # Calculate volume statistics by ticker
    volume_stats = sample_data.groupby('ticker')['volume'].agg(['sum', 'mean', 'count']).reset_index()
    volume_stats.columns = ['ticker', 'total_volume', 'avg_volume_per_minute', 'data_points']
    
    # Sort by total volume
    high_volume_stocks = volume_stats.sort_values('total_volume', ascending=False).head(15)
    
    print(f"📊 Top 15 stocks by volume on {SAMPLE_DATE}:")
    display(high_volume_stocks)
    
    # Visualize top volume stocks
    plt.figure(figsize=(12, 6))
    plt.bar(high_volume_stocks['ticker'], high_volume_stocks['total_volume'], 
            color='steelblue', alpha=0.7)
    plt.title(f'Top 15 Stocks by Total Volume - {SAMPLE_DATE}')
    plt.xlabel('Ticker')
    plt.ylabel('Total Volume')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()
    
print("\n💡 Ideas to try next:")
print("   - Compare volume patterns across different days")
print("   - Find correlations between volume and price movements") 
print("   - Identify stocks with high intraday volatility")
print("   - Calculate moving averages for technical analysis")

In [None]:
# 🎯 EXPERIMENT 2: Your custom analysis here!
# Example ideas:
# - Calculate technical indicators (RSI, MACD, etc.)
# - Find stocks with specific patterns
# - Analyze sector performance
# - Build a simple trading strategy

# Your code here...

## 9. Helper Functions & Quick Reference 🛠️

Here are some useful functions and code snippets for your analysis!

In [None]:
# 🔧 HELPER FUNCTIONS

def quick_stock_info(ticker_symbol, date_str=SAMPLE_DATE):
    """Get quick info about a stock for a specific date."""
    data = client.get_ticker_data(ticker_symbol, date_str)
    if data is not None:
        day_open = data['open'].iloc[0]
        day_close = data['close'].iloc[-1]
        day_high = data['high'].max()
        day_low = data['low'].min()
        total_volume = data['volume'].sum()
        
        print(f"📊 {ticker_symbol} on {date_str}:")
        print(f"   Open: ${day_open:.2f}")
        print(f"   Close: ${day_close:.2f}")
        print(f"   High: ${day_high:.2f}") 
        print(f"   Low: ${day_low:.2f}")
        print(f"   Change: {((day_close - day_open) / day_open * 100):+.2f}%")
        print(f"   Volume: {total_volume:,.0f}")
        return data
    return None

def plot_intraday_chart(ticker_symbol, date_str=SAMPLE_DATE, interval_minutes=5):
    """Create a quick intraday chart for any stock."""
    data = client.get_ticker_data(ticker_symbol, date_str)
    if data is not None and len(data) > 0:
        # Resample to specified interval
        data_resampled = data.set_index('timestamp').resample(f'{interval_minutes}min').agg({
            'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'
        }).dropna()
        
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8), sharex=True)
        
        # Price chart
        ax1.plot(data_resampled.index, data_resampled['close'], linewidth=2, label='Close Price')
        ax1.fill_between(data_resampled.index, data_resampled['low'], data_resampled['high'], 
                        alpha=0.3, label='High-Low Range')
        ax1.set_title(f'{ticker_symbol} - {date_str} ({interval_minutes}-min intervals)')
        ax1.set_ylabel('Price ($)')
        ax1.legend()
        ax1.grid(True, alpha=0.3)
        
        # Volume chart
        ax2.bar(data_resampled.index, data_resampled['volume'], alpha=0.7, color='steelblue')
        ax2.set_ylabel('Volume')
        ax2.set_xlabel('Time')
        ax2.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
        
        return data_resampled
    else:
        print(f"No data found for {ticker_symbol} on {date_str}")
        return None

def find_active_stocks(data=sample_data, top_n=20):
    """Find the most actively traded stocks in a dataset."""
    if data is not None:
        activity = data.groupby('ticker').agg({
            'volume': 'sum',
            'close': lambda x: (x.iloc[-1] - x.iloc[0]) / x.iloc[0] * 100  # Daily return
        }).round(2)
        activity.columns = ['Total_Volume', 'Daily_Return_%']
        activity = activity.sort_values('Total_Volume', ascending=False).head(top_n)
        return activity
    return None

# 📝 QUICK REFERENCE COMMANDS:

print("🚀 QUICK COMMANDS TO TRY:")
print("=" * 40)
print()
print("# Load data for any date:")
print("data = client.load_day('2024-07-15')")
print()
print("# Load a date range (max 30 days):")
print("data = client.load_date_range('2024-08-01', '2024-08-05')")
print()
print("# Get info for a specific stock:")
print("apple_data = quick_stock_info('AAPL')")
print()
print("# Plot intraday chart:")
print("plot_intraday_chart('TSLA')")
print()
print("# Find most active stocks:")
print("active = find_active_stocks(sample_data)")
print()
print("# Get all metadata:")
print("meta = client.get_all_metadata()")
print()
print("# Check available dates:")
print("dates = client.get_available_dates()")