In [21]:
%load_ext autoreload
%autoreload 2

import time
from datetime import datetime, timezone
from utils.api import fetch_market_data
from utils.helpers import bin_orderbook, parse_future_name
import matplotlib.pyplot as plt
import pandas as pd
#mpl go import
import plotly.graph_objects as go
from matplotlib.animation import FuncAnimation
import numpy as np


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
# Benchmark: Fetch futures for one day with trim and bin
inst_family = 'BTC-USD'
date = datetime(2025, 1, 10, tzinfo=timezone.utc)
day_end = datetime(2025, 1, 11, tzinfo=timezone.utc)
time_step_minutes = 5

print(f"Benchmarking futures fetch for {date.date()}")
print(f"Instrument: {inst_family}")
print(f"Time binning: {time_step_minutes} minutes\n")

start_time = time.time()

futures_df = fetch_market_data(
    '6', 'FUTURES', inst_family, date, day_end, 'daily', 
    verbose=True,
    process_fn=lambda df: bin_orderbook(df, f'{time_step_minutes}min')
)

end_time = time.time()
elapsed = end_time - start_time

print(f"\n{'='*60}")
print(f"Benchmark Results:")
print(f"  Total time: {elapsed:.2f} seconds ({elapsed/60:.2f} minutes)")
print(f"  Final rows: {len(futures_df)}")
print(f"  Final memory: {futures_df.memory_usage(deep=True).sum() / (1024*1024):.2f} MB")
print(f"{'='*60}")


Benchmarking futures fetch for 2025-01-10
Instrument: BTC-USD
Time binning: 5 minutes

Fetching FUTURES data (module=6) for BTC-USD
Period: 2025-01-10 00:00:00+00:00 to 2025-01-11 00:00:00+00:00
Split into 1 requests


Fetching data:   0%|          | 0/1 [00:00<?, ?it/s]

Fetch #1/1: 6 files found | Total: 6 files, 1020.08 MB


Downloading CSVs:   0%|          | 0/6 [00:00<?, ?it/s]


  Memory Statistics Summary (6 files):
    Compressed:           0.01 MB
    Peak chunk:        1420.21 MB  (per-file chunks processed sequentially)
    After trim:        6674.99 MB  (reduction from peak: 4.700x)
    After process_fn:     0.51 MB  (reduction from trim: 0.000x)

✓ Successfully fetched 1537 records

Benchmark Results:
  Total time: 87.33 seconds (1.46 minutes)
  Final rows: 1537
  Final memory: 0.50 MB


In [2]:
# Verify time binning and inspect columns
print("Columns in futures_df:")
print(futures_df.columns.tolist())
print()

# Check if there's only one entry per time bin for each symbol
print("Checking for duplicate time bins per symbol:")
duplicates = futures_df.groupby(['symbol', 'timestamp']).size()
max_entries = duplicates.max()
symbols_with_duplicates = duplicates[duplicates > 1]

if len(symbols_with_duplicates) > 0:
    print(f"  ⚠️  Found {len(symbols_with_duplicates)} symbol-timestamp pairs with multiple entries")
    print(f"  Max entries per bin: {max_entries}")
    print("\n  Sample duplicates:")
    print(symbols_with_duplicates.head(10))
else:
    print(f"  ✓ All symbol-timestamp pairs are unique (no duplicates)")
    print(f"  Total unique combinations: {len(duplicates)}")

print(f"\n  Unique symbols: {futures_df['symbol'].nunique()}")
print(f"  Unique timestamps: {futures_df['timestamp'].nunique()}")
print(f"\nFirst few rows:")
print(futures_df.head())


Columns in futures_df:
['timeMs', 'exchTimeMs', 'bid_1_px', 'bid_1_qty', 'bid_1_ordCnt', 'ask_1_px', 'ask_1_qty', 'ask_1_ordCnt', 'bid_2_px', 'bid_2_qty', 'bid_2_ordCnt', 'ask_2_px', 'ask_2_qty', 'ask_2_ordCnt', 'bid_3_px', 'bid_3_qty', 'bid_3_ordCnt', 'ask_3_px', 'ask_3_qty', 'ask_3_ordCnt', 'bid_4_px', 'bid_4_qty', 'bid_4_ordCnt', 'ask_4_px', 'ask_4_qty', 'ask_4_ordCnt', 'bid_5_px', 'bid_5_qty', 'bid_5_ordCnt', 'ask_5_px', 'ask_5_qty', 'ask_5_ordCnt', 'symbol', 'time_bin', 'timestamp']

Checking for duplicate time bins per symbol:
  ✓ All symbol-timestamp pairs are unique (no duplicates)
  Total unique combinations: 1537

  Unique symbols: 6
  Unique timestamps: 288

First few rows:
          timeMs     exchTimeMs  bid_1_px  bid_1_qty  bid_1_ordCnt  ask_1_px  \
0  1736467499992  1736467499989   92573.0         20             1   92587.4   
1  1736467799876  1736467799873   92582.8          2             1   92601.4   
2  1736468099986  1736468099983   92615.3         38             1

In [3]:
# Test depth=0 functionality (weighted mid price calculation)
print("Testing depth=0 functionality:")
print(f"Fetching futures with depth=0 (should calculate weighted mid price)...\n")

futures_mid_df = fetch_market_data(
    '6', 'FUTURES', 'BTC-USD', 
    datetime(2025, 1, 10), 
    datetime(2025, 1, 11),
    'daily', 
    verbose=True,
    depth=0,
    process_fn=lambda df: bin_orderbook(df, '5min')
)

print(f"\nColumns in depth=0 result:")
print(futures_mid_df.columns.tolist())
print(f"\nFirst few rows:")
print(futures_mid_df.head())
print(f"\nMemory usage: {futures_mid_df.memory_usage(deep=True).sum() / (1024*1024):.2f} MB")


Testing depth=0 functionality:
Fetching futures with depth=0 (should calculate weighted mid price)...

Fetching FUTURES data (module=6) for BTC-USD
Period: 2025-01-10 00:00:00 to 2025-01-11 00:00:00
Split into 1 requests


Fetching data:   0%|          | 0/1 [00:00<?, ?it/s]

Fetch #1/1: 6 files found | Total: 6 files, 1020.08 MB


Downloading CSVs:   0%|          | 0/6 [00:00<?, ?it/s]

✓ Successfully fetched 1537 records

Columns in depth=0 result:
['timeMs', 'exchTimeMs', 'symbol', 'mid_price', 'time_bin', 'timestamp']

First few rows:
          timeMs     exchTimeMs             symbol  mid_price  \
0  1736467499992  1736467499989  BTC-USD-250110.OK   92580.20   
1  1736467799876  1736467799873  BTC-USD-250110.OK   92592.10   
2  1736468099986  1736468099983  BTC-USD-250110.OK   92627.35   
3  1736468399996  1736468399993  BTC-USD-250110.OK   92631.75   
4  1736468699986  1736468699983  BTC-USD-250110.OK   92520.65   

             time_bin      timestamp  
0 2025-01-10 00:00:00  1736467200000  
1 2025-01-10 00:05:00  1736467500000  
2 2025-01-10 00:10:00  1736467800000  
3 2025-01-10 00:15:00  1736468100000  
4 2025-01-10 00:20:00  1736468400000  

Memory usage: 0.16 MB


In [6]:
print(futures_mid_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1537 entries, 0 to 1536
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   timeMs      1537 non-null   int64         
 1   exchTimeMs  1537 non-null   int64         
 2   symbol      1537 non-null   object        
 3   mid_price   1536 non-null   float64       
 4   time_bin    1537 non-null   datetime64[ns]
 5   timestamp   1537 non-null   int64         
dtypes: datetime64[ns](1), float64(1), int64(3), object(1)
memory usage: 72.2+ KB
None


In [10]:
from utils.helpers import parse_future_name

In [23]:
# Add tenor_days column using parse_future_name
futures_mid_df['tenor_days'] = futures_mid_df['symbol'].apply(lambda x: (
    parse_future_name(x)[1] - pd.to_datetime(futures_mid_df['time_bin'].iloc[0])
).days)

# Get global min/max for price axis
y_min = futures_mid_df['mid_price'].min() * 0.95  # Add 5% padding
y_max = futures_mid_df['mid_price'].max() * 1.05

# Group by time_bin to create frames
frames = []
for time_bin in futures_mid_df['time_bin'].unique():
    # Get data for this time bin
    frame_data = futures_mid_df[futures_mid_df['time_bin'] == time_bin].copy()
    
    # Sort by tenor days for line plotting
    frame_data = frame_data.sort_values('tenor_days')
    
    # Fit exponential curve (log-linear) to prices vs tenor
    log_prices = np.log(frame_data['mid_price'])
    z = np.polyfit(frame_data['tenor_days'], log_prices, 1)
    p = lambda x: np.exp(z[1] + z[0] * x)
    
    # Calculate R-squared
    fitted_prices = p(frame_data['tenor_days'])
    residuals = frame_data['mid_price'] - fitted_prices
    ss_res = np.sum(residuals ** 2)
    ss_tot = np.sum((frame_data['mid_price'] - frame_data['mid_price'].mean()) ** 2)
    r_squared = 1 - (ss_res / ss_tot)
    
    # Create dense x points for smooth curve
    x_dense = np.linspace(frame_data['tenor_days'].min(), frame_data['tenor_days'].max(), 100)
    
    # Create scatter plot of actual prices
    scatter = go.Scatter(
        x=frame_data['tenor_days'],
        y=frame_data['mid_price'],
        mode='markers',
        name='Market Prices'
    )
    
    # Create line plot of fitted curve
    line = go.Scatter(
        x=x_dense,
        y=p(x_dense),
        mode='lines',
        name=f'Log-Linear Fit (R² = {r_squared:.3f})'
    )
    
    frames.append(go.Frame(
        data=[scatter, line],
        name=str(time_bin)
    ))

# Create figure with animation
fig = go.Figure(
    data=frames[0].data,
    frames=frames,
    layout=go.Layout(
        title="Futures Term Structure Animation",
        xaxis=dict(title="Tenor (days)"),
        yaxis=dict(
            title="Price",
            range=[y_min, y_max]  # Fix y-axis range
        ),
        showlegend=True,
        updatemenus=[{
            'type': 'buttons',
            'showactive': False,
            'buttons': [
                {'label': 'Play',
                 'method': 'animate',
                 'args': [None, {'frame': {'duration': 500, 'redraw': True},
                               'fromcurrent': True,
                               'transition': {'duration': 300}}]},
                {'label': 'Pause',
                 'method': 'animate',
                 'args': [[None], {'frame': {'duration': 0, 'redraw': False},
                                 'mode': 'immediate',
                                 'transition': {'duration': 0}}]}
            ]
        }],
        sliders=[{
            'currentvalue': {'prefix': 'Time: '},
            'steps': [{'args': [[f.name],
                              {'frame': {'duration': 300, 'redraw': True},
                               'mode': 'immediate',
                               'transition': {'duration': 300}}],
                      'label': f.name,
                      'method': 'animate'} for f in frames]
        }]
    )
)

fig.show()