# Perpetual Futures Funding Arbitrage - Data Exploration

This notebook explores perpetual futures and spot price data to understand the funding rate mechanism and price differentials.

In [None]:
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yaml
from datetime import datetime

from data_acquisition import DataAcquisition

sns.set_style('darkgrid')
%matplotlib inline

## 1. Load Configuration

In [None]:
with open('../config.yaml', 'r') as f:
    config = yaml.safe_load(f)

print("Strategy:", config['strategy']['name'])
print("Assets:", [a['symbol'] for a in config['assets']])

## 2. Fetch Data

In [None]:
data_acq = DataAcquisition(config)

start_date = datetime.strptime(config['backtest']['start_date'], '%Y-%m-%d')
end_date = datetime.strptime(config['backtest']['end_date'], '%Y-%m-%d')

# Fetch BTC data
symbol = 'BTCUSDT'
df = data_acq.prepare_dataset(symbol, start_date, end_date)

print(f"Data shape: {df.shape}")
df.head()

## 3. Price Analysis

In [None]:
fig, axes = plt.subplots(3, 1, figsize=(14, 10))

# Perpetual vs Spot prices
axes[0].plot(df.index, df['perp_price'], label='Perpetual', alpha=0.8)
axes[0].plot(df.index, df['spot_price'], label='Spot', alpha=0.8)
axes[0].set_title('Perpetual vs Spot Prices')
axes[0].set_ylabel('Price (USD)')
axes[0].legend()
axes[0].grid(True)

# Premium/Discount
premium = (df['perp_price'] / df['spot_price'] - 1) * 10000  # in bps
axes[1].plot(df.index, premium, color='purple', alpha=0.7)
axes[1].axhline(y=0, color='black', linestyle='--', linewidth=0.8)
axes[1].set_title('Perpetual Premium/Discount (bps)')
axes[1].set_ylabel('Premium (bps)')
axes[1].grid(True)

# Funding rates
axes[2].plot(df.index, df['funding_rate'] * 100, color='green', alpha=0.7)
axes[2].set_title('Funding Rate (%)')
axes[2].set_ylabel('Funding Rate (%)')
axes[2].set_xlabel('Date')
axes[2].grid(True)

plt.tight_layout()
plt.show()

## 4. Premium Distribution

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Histogram
axes[0].hist(premium, bins=100, alpha=0.7, color='blue', edgecolor='black')
axes[0].axvline(x=premium.mean(), color='red', linestyle='--', label=f'Mean: {premium.mean():.2f} bps')
axes[0].set_title('Premium Distribution')
axes[0].set_xlabel('Premium (bps)')
axes[0].set_ylabel('Frequency')
axes[0].legend()
axes[0].grid(True)

# Q-Q plot
from scipy import stats
stats.probplot(premium.dropna(), dist="norm", plot=axes[1])
axes[1].set_title('Q-Q Plot')
axes[1].grid(True)

plt.tight_layout()
plt.show()

print(f"Premium Statistics:")
print(f"Mean: {premium.mean():.4f} bps")
print(f"Std: {premium.std():.4f} bps")
print(f"Min: {premium.min():.4f} bps")
print(f"Max: {premium.max():.4f} bps")

## 5. Funding Rate Analysis

In [None]:
# Funding rate statistics
print("Funding Rate Statistics:")
print(df['funding_rate'].describe())

# Correlation between premium and funding rate
correlation = premium.corr(df['funding_rate'])
print(f"\nCorrelation (Premium vs Funding Rate): {correlation:.4f}")

# Scatter plot
plt.figure(figsize=(10, 6))
plt.scatter(df['funding_rate'] * 100, premium, alpha=0.3)
plt.xlabel('Funding Rate (%)')
plt.ylabel('Premium (bps)')
plt.title('Premium vs Funding Rate')
plt.grid(True)
plt.show()

## 6. Volume Analysis

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(14, 8))

# Volume over time
axes[0].plot(df.index, df['perp_volume'], label='Perpetual', alpha=0.7)
axes[0].plot(df.index, df['spot_volume'], label='Spot', alpha=0.7)
axes[0].set_title('Trading Volume')
axes[0].set_ylabel('Volume')
axes[0].legend()
axes[0].grid(True)

# Volume in USD
perp_volume_usd = df['perp_volume'] * df['perp_price']
spot_volume_usd = df['spot_volume'] * df['spot_price']

axes[1].plot(df.index, perp_volume_usd, label='Perpetual', alpha=0.7)
axes[1].plot(df.index, spot_volume_usd, label='Spot', alpha=0.7)
axes[1].axhline(y=5e6, color='red', linestyle='--', label='Min Volume Threshold')
axes[1].set_title('Trading Volume (USD)')
axes[1].set_ylabel('Volume (USD)')
axes[1].set_xlabel('Date')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.show()

## 7. Summary Statistics

In [None]:
summary = pd.DataFrame({
    'Metric': [
        'Avg Perp Price',
        'Avg Spot Price',
        'Avg Premium (bps)',
        'Avg Funding Rate (%)',
        'Avg Perp Volume (USD)',
        'Avg Spot Volume (USD)',
        'Data Points'
    ],
    'Value': [
        f"${df['perp_price'].mean():.2f}",
        f"${df['spot_price'].mean():.2f}",
        f"{premium.mean():.4f}",
        f"{df['funding_rate'].mean() * 100:.6f}",
        f"${perp_volume_usd.mean():,.0f}",
        f"${spot_volume_usd.mean():,.0f}",
        f"{len(df):,}"
    ]
})

print(summary.to_string(index=False))