# Time of Day Returns Analysis

This notebook analyzes trade returns bucketed by time of day to identify patterns in intraday performance.

## Features

- Bucket trades by time of day (configurable intervals)
- Box plots showing return distributions for each time bucket
- Separate visualization for each run configuration
- Identify optimal and suboptimal trading hours
- Memory-efficient matplotlib visualizations

## Usage

1. Configure stock symbol and time bucket interval
2. Scan and filter runs
3. Load selected runs
4. Visualize returns by time of day

Use this notebook to understand if the strategy performs better during specific trading hours and to potentially adjust trading windows.

In [None]:
# Import required libraries
import sys
from pathlib import Path

# Add analysis folder to path for imports
# Use the project root to construct the analysis path reliably
project_root = Path.cwd()
while project_root.name != 'quiescence' and project_root.parent != project_root:
    project_root = project_root.parent
analysis_path = project_root / 'analysis'

if str(analysis_path) not in sys.path:
    sys.path.insert(0, str(analysis_path))

import pandas as pd
import matplotlib.pyplot as plt

# Import custom utility functions from analysis folder
from utilities import scan_backtest_runs, load_run_data, create_runs_summary_dataframe, convert_utc_to_ny

## 1. Configuration

Set the stock symbol, paths, and time bucket interval for analysis.

In [None]:
# === CONFIGURATION ===
stock_symbol = "MSFT"

# Project paths
STORAGE_ROOT = Path("/data/quiescence/")
BACKTEST_ROOT = STORAGE_ROOT / "backtest"

# Time bucketing configuration (in minutes)
TIME_BUCKET_INTERVAL = "15min"  # Options: "10min", "15min", "30min", "1h"

print(f"Analyzing stock: {stock_symbol}")
print(f"Time bucket interval: {TIME_BUCKET_INTERVAL}")
print(f"Backtest runs root: {BACKTEST_ROOT}")

## 2. Scan and Filter Runs

Scan all available runs and create a summary DataFrame for filtering.

In [None]:
# Scan all runs for the configured stock (fast - metadata only)
runs_metadata = scan_backtest_runs(BACKTEST_ROOT, stock_symbol)

# Create summary DataFrame for easy filtering and comparison
df_summary = create_runs_summary_dataframe(runs_metadata)

print(f"\nFound {len(df_summary)} runs for {stock_symbol}\n")
print(df_summary.to_string(index=False))

## 3. Select Runs to Load

Choose which runs to analyze.

In [None]:
# Option 1: Load ALL runs
# runs_data = [load_run_data(run) for run in runs_metadata]

# Option 2: Load specific runs by Run number
selected_run_numbers = [11, 19, 23, 34, 42, 47]
runs_data = [load_run_data(runs_metadata[run_num - 1]) for run_num in selected_run_numbers if 0 < run_num <= len(runs_metadata)]

# Option 3: Filter using DataFrame conditions
# filtered_df = df_summary[df_summary['Frequency'] == '1-MINUTE']
# selected_indices = filtered_df['Run'].values - 1
# runs_data = [load_run_data(runs_metadata[i]) for i in selected_indices]

print(f"Loaded {len(runs_data)} runs")

## 4. Time of Day Returns Analysis

Bucket trades by time of day and visualize return distributions.

In [None]:
# Generate time buckets for the entire day
time_buckets = pd.date_range(start="00:00", end="23:59:59", freq=TIME_BUCKET_INTERVAL).time

print(f"Created {len(time_buckets)} time buckets from 00:00 to 23:59")
print(f"Time buckets: {[str(bucket) for bucket in time_buckets[:5]]}... (showing first 5)")

## 5. Visualize Returns by Time of Day

Create box plots for each run showing return distributions across time buckets.

In [None]:
# Create separate plots for each run
for run in runs_data:
    # Extract frequency from bar_type
    bar_type = run.get('bar_type', '')
    if bar_type:
        parts = bar_type.split('-')
        frequency = f"{parts[1]}{parts[2].lower()[0:3]}" if len(parts) >= 3 else 'Unknown'
    else:
        frequency = 'Unknown'

    ticker = run["ticker"]
    max_pos_bars = run.get('max_position_bars', 'N/A')
    df = run["positions_report"]

    # Dictionary: {time_bucket: [returns]}
    bucketed_returns = {bucket: [] for bucket in time_buckets}

    # Iterate over positions and bucket by exit time
    for _, row in df.iterrows():
        # Parse ts_closed - handle both datetime strings and numeric timestamps
        ts_closed = row["ts_closed"]
        if isinstance(ts_closed, str):
            # Parse datetime string and convert to timestamp
            local_datetime = convert_utc_to_ny(pd.to_datetime(ts_closed).timestamp())
        else:
            # Already numeric (nanoseconds)
            local_datetime = convert_utc_to_ny(ts_closed / 10**9)
        
        timestamp = local_datetime.time()
        ret = row["realized_return"]

        # Assign to the correct time bucket
        for bucket in time_buckets:
            if timestamp <= bucket:
                bucketed_returns[bucket].append(ret)
                break

    # --- Plot section ---
    fig, ax = plt.subplots(figsize=(14, 7))
    
    # Prepare data for box plot
    plot_data = []
    plot_labels = []
    
    for bucket, returns in bucketed_returns.items():
        if returns:  # skip empty buckets
            plot_data.append(returns)
            plot_labels.append(str(bucket))
    
    # Create box plot
    bp = ax.boxplot(plot_data, labels=plot_labels, patch_artist=True,
                    showmeans=False, widths=0.6)
    
    # Style the boxes
    for patch in bp['boxes']:
        patch.set_facecolor('steelblue')
        patch.set_alpha(0.7)
    
    # Format the plot
    ax.set_title(f"{ticker} | {frequency} | Max Pos Bars: {max_pos_bars} | Time of Day Returns",
                fontsize=12, fontweight='bold')
    ax.set_xlabel("Time Bucket (Local NY Time)", fontsize=11)
    ax.set_ylabel("Realized Returns", fontsize=11)
    ax.set_ylim(-0.002, 0.002)  # Adjust this range as needed
    ax.grid(True, alpha=0.3, axis='y')
    ax.axhline(y=0, color='red', linestyle='--', linewidth=0.5, alpha=0.5)
    
    # Rotate x-axis labels for better readability
    plt.xticks(rotation=45, ha='right')
    
    plt.tight_layout()
    plt.show()


## 6. Summary Statistics by Time Bucket

Calculate mean returns for each time bucket to identify best/worst trading hours.

In [None]:
# Calculate summary statistics for each run
for run_idx, run in enumerate(runs_data):
    # Extract run details
    bar_type = run.get('bar_type', '')
    if bar_type:
        parts = bar_type.split('-')
        frequency = f"{parts[1]}-{parts[2]}" if len(parts) >= 3 else 'Unknown'
    else:
        frequency = 'Unknown'
    
    ticker = run["ticker"]
    max_pos_bars = run.get('max_position_bars', 'N/A')
    df = run["positions_report"]
    
    # Dictionary: {time_bucket: [returns]}
    bucketed_returns = {bucket: [] for bucket in time_buckets}
    
    # Bucket returns by time
    for _, row in df.iterrows():
        # Parse ts_closed - handle both datetime strings and numeric timestamps
        ts_closed = row["ts_closed"]
        if isinstance(ts_closed, str):
            # Parse datetime string and convert to timestamp
            local_datetime = convert_utc_to_ny(pd.to_datetime(ts_closed).timestamp())
        else:
            # Already numeric (nanoseconds)
            local_datetime = convert_utc_to_ny(ts_closed / 10**9)
        
        timestamp = local_datetime.time()
        ret = row["realized_return"]
        
        for bucket in time_buckets:
            if timestamp <= bucket:
                bucketed_returns[bucket].append(ret)
                break
    
    # Calculate statistics
    stats_data = []
    for bucket, returns in bucketed_returns.items():
        if returns:
            stats_data.append({
                'Time Bucket': str(bucket),
                'Trades': len(returns),
                'Mean Return': f"{sum(returns) / len(returns):.6f}",
                'Median Return': f"{sorted(returns)[len(returns)//2]:.6f}",
                'Min Return': f"{min(returns):.6f}",
                'Max Return': f"{max(returns):.6f}"
            })
    
    if stats_data:
        df_stats = pd.DataFrame(stats_data)
        print(f"\n{'='*90}")
        print(f"Run {run_idx + 1}: {ticker} | {frequency} | Max Pos Bars: {max_pos_bars}")
        print(f"{'='*90}\n")
        print(df_stats.to_string(index=False))
        
        # Identify best and worst time buckets
        df_stats['Mean Return (numeric)'] = df_stats['Mean Return'].astype(float)
        best_time = df_stats.loc[df_stats['Mean Return (numeric)'].idxmax(), 'Time Bucket']
        worst_time = df_stats.loc[df_stats['Mean Return (numeric)'].idxmin(), 'Time Bucket']
        
        print(f"\nðŸ“ˆ Best time bucket: {best_time}")
        print(f"ðŸ“‰ Worst time bucket: {worst_time}")
    else:
        print(f"\nRun {run_idx + 1}: No data available")
