In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Import your utility modules
from preprocessing.utils import get_snapshot_filepaths
from preprocessing.dataframe import load_dataframe, save_dataframe, load_snapshots_to_dataframe
from visualisations.plot import plot_total_liquidity, plot_average_fee, plot_unique_makers
from visualisations.fees import plot_fee_metrics, plot_fee_type_distribution, plot_fee_volume_metrics
from analysis.fees import calculate_fee_statistics, calculate_time_based_statistics, compute_fee_ratios, compute_volume_metrics, calculate_liquidity_metrics, calculate_market_health_metrics

In [None]:
# Set up paths
directory_path = '../data'  # Update this to your data directory path
df_pickle_path = '../dataframe.pkl'  # Path where your DataFrame is saved

# Load or compute the DataFrame
if os.path.exists(df_pickle_path):
    df_stats = load_dataframe(df_pickle_path)
else:
    filepaths = get_snapshot_filepaths(directory_path)
    df_stats = load_snapshots_to_dataframe(filepaths)
    save_dataframe(df_stats, df_pickle_path)

# Display the first few rows
df_stats.head()

Plot total liquidity

In [None]:
plt.figure(figsize=(12, 6))
df_stats['total_liquidity'].plot()
plt.title('Total Liquidity Over Time')
plt.xlabel('Timestamp')
plt.ylabel('Total Liquidity (satoshis)')
plt.show()

Plot unique makers

In [None]:
plt.figure(figsize=(12, 6))
df_stats['total_unique_makers'].plot()
plt.title('Number of Unique Makers Over Time')
plt.xlabel('Timestamp')
plt.ylabel('Number of Unique Makers')
plt.show()

In [None]:
# Cell: Compute Smoothed Data

# Define window sizes
rolling_window_size = 100  # Adjust as needed
rolling_window_size_1000 = 1000  # Adjust as needed

# Rolling Mean Smoothing
df_stats['total_liquidity_smooth_100'] = df_stats['total_liquidity'].rolling(window=rolling_window_size, center=True).mean()
df_stats['total_liquidity_smooth_1000'] = df_stats['total_liquidity'].rolling(window=rolling_window_size_1000, center=True).mean()

In [None]:
# Cell: Plot Total Liquidity with Rolling Mean Smoothing

plt.figure(figsize=(12, 6))
plt.plot(df_stats.index, df_stats['total_liquidity'], label='Original', alpha=0.5)
plt.plot(df_stats.index, df_stats['total_liquidity_smooth_100'], label='Rolling Mean', color='red')
plt.plot(df_stats.index, df_stats['total_liquidity_smooth_1000'], label='Rolling Mean 1000', color='yellow')
plt.title('Total Liquidity Over Time (Rolling Mean Smoothed)')
plt.xlabel('Timestamp')
plt.ylabel('Total Liquidity (satoshis)')
plt.legend()
plt.show()

In [None]:
df_stats['total_unique_makers_smooth_100'] = df_stats['total_unique_makers'].rolling(window=rolling_window_size, center=True).mean()
df_stats['total_unique_makers_smooth_1000'] = df_stats['total_unique_makers'].rolling(window=1000, center=True).mean()

In [None]:
# Cell: Plot Total Makers with Rolling Mean Smoothing

plt.figure(figsize=(12, 6))
plt.plot(df_stats.index, df_stats['total_unique_makers'], label='Original', alpha=0.5)
plt.plot(df_stats.index, df_stats['total_unique_makers_smooth_100'], label='Rolling Mean', color='red')
plt.plot(df_stats.index, df_stats['total_unique_makers_smooth_1000'], label='Rolling Mean 1000', color='yellow')
plt.title('Total Makers Over Time (Rolling Mean Smoothed)')
plt.xlabel('Timestamp')
plt.ylabel('Number of Makers')
plt.legend()
plt.show()

In [None]:
# Plotting both datasets with twin axes
fig, ax1 = plt.subplots(figsize=(12, 6))

# Plot total liquidity on the primary y-axis
color = 'tab:blue'
ax1.set_xlabel('Timestamp')
ax1.set_ylabel('Total Liquidity (satoshis)', color=color)
ax1.plot(df_stats.index, df_stats['total_liquidity_smooth_1000'], color=color, label='Total Liquidity')
ax1.tick_params(axis='y', labelcolor=color)

# Create a secondary y-axis
ax2 = ax1.twinx()

# Plot total unique makers on the secondary y-axis
color = 'tab:red'
ax2.set_ylabel('Total Unique Makers', color=color)
ax2.plot(df_stats.index, df_stats['total_unique_makers_smooth_1000'], color=color, label='Total Unique Makers')
ax2.tick_params(axis='y', labelcolor=color)

# Add a title and legend
plt.title('Total Liquidity and Unique Makers Over Time')
fig.tight_layout()
plt.show()

Fee Analysis

In [None]:
# Calculate Fee Statistics
fee_stats = calculate_fee_statistics(df_stats)
print("Fee Statistics Summary:")
print("\nRelative Fee Percentages:")
print(f"Mean: {fee_stats['relative_percentage'].mean:.8f}")
print(f"Median: {fee_stats['relative_percentage'].median:.8f}")
print(f"Std: {fee_stats['relative_percentage'].std:.8f}")
print(f"95th percentile: {fee_stats['relative_percentage'].percentiles['95']:.8f}")

print("\nAbsolute Fee (satoshis):")
print(f"Mean: {fee_stats['absolute_satoshis'].mean:,.2f}")
print(f"Median: {fee_stats['absolute_satoshis'].median:,.2f}")
print(f"Std: {fee_stats['absolute_satoshis'].std:,.2f}")
print(f"95th percentile: {fee_stats['absolute_satoshis'].percentiles['95']:,.2f}")

In [None]:
# Daily Statistics Analysis
daily_stats = calculate_time_based_statistics(df_stats, freq='D')

# Plot daily statistics
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))

daily_stats['relative_fees_percentage_mean']['mean'].plot(ax=ax1)
ax1.set_title('Daily Average Relative Fee Percentage')
ax1.set_ylabel('Fee Percentage')

daily_stats['absolute_fees_satoshis_mean']['mean'].plot(ax=ax2)
ax2.set_title('Daily Average Absolute Fee (satoshis)')
ax2.set_ylabel('Satoshis')

plt.tight_layout()
plt.show()

In [None]:
# Market Health Analysis
liquidity_metrics = calculate_liquidity_metrics(df_stats)
health_metrics = calculate_market_health_metrics(df_stats)

print("Market Health Indicators:")
print("\nLiquidity Metrics:")
print(f"Average Liquidity: {liquidity_metrics['avg_liquidity']:,.0f} satoshis")
print(f"Liquidity per Maker: {liquidity_metrics['liquidity_per_maker']:,.0f} satoshis")
print(f"Liquidity Volatility: {liquidity_metrics['liquidity_volatility']:.4f}")

print("\nMarket Health Metrics:")
print(f"Maker Stability: {health_metrics['maker_stability']:.4f}")
print(f"Fee Stability: {health_metrics['fee_stability']:.4f}")
print(f"Market Depth: {health_metrics['market_depth']:,.0f}")

In [None]:
# Fee Ratio Analysis
fee_ratios = compute_fee_ratios(df_stats)
volume_metrics = compute_volume_metrics(df_stats)

# Plot fee ratios with volume
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))

# Fee ratios plot
ax1.plot(fee_ratios.index, fee_ratios['relative_ratio_smooth'], 
         label='Relative Fee Ratio', color='blue')
ax1.plot(fee_ratios.index, fee_ratios['absolute_ratio_smooth'], 
         label='Absolute Fee Ratio', color='red')
ax1.set_title('Fee Type Ratios Over Time')
ax1.set_ylabel('Ratio')
ax1.legend()

# Volume plot
ax2.plot(volume_metrics.index, volume_metrics['total_volume_smooth'], 
         label='Total Volume', color='green')
ax2.set_title('Trading Volume Over Time')
ax2.set_ylabel('Number of Offers')
ax2.legend()

plt.tight_layout()
plt.show()

In [None]:
# Market Share Analysis
fig, ax = plt.subplots(figsize=(12, 6))

# Create stacked area plot
ax.fill_between(volume_metrics.index, 
                volume_metrics['relative_share_smooth'], 
                label='Relative Fee Market Share', 
                alpha=0.5, color='blue')
ax.fill_between(volume_metrics.index, 
                volume_metrics['absolute_share_smooth'], 
                label='Absolute Fee Market Share', 
                alpha=0.5, color='red')

ax.set_title('Market Share by Fee Type')
ax.set_ylabel('Share')
ax.legend()
plt.show()

In [None]:
# Correlation Analysis
correlation_metrics = [
    'total_liquidity', 'total_unique_makers',
    'relative_fees_percentage_mean', 'absolute_fees_satoshis_mean',
    'relative_fees_count', 'absolute_fees_count'
]

correlation_matrix = df_stats[correlation_metrics].corr()

plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Matrix of Key Metrics')
plt.tight_layout()
plt.show()

Time-based Analysis

In [None]:
# Analyze hourly patterns
df_stats['hour'] = df_stats.index.hour

hourly_stats = df_stats.groupby('hour').agg({
    'total_liquidity': 'mean',
    'total_unique_makers': 'mean',
    'relative_fees_percentage_mean': 'mean',
    'absolute_fees_satoshis_mean': 'mean'
})

# Plot hourly patterns
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))

hourly_stats['total_liquidity'].plot(ax=ax1)
ax1.set_title('Average Liquidity by Hour')
ax1.set_ylabel('Satoshis')

hourly_stats['total_unique_makers'].plot(ax=ax2)
ax2.set_title('Average Makers by Hour')
ax2.set_ylabel('Count')

hourly_stats['relative_fees_percentage_mean'].plot(ax=ax3)
ax3.set_title('Average Relative Fee by Hour')
ax3.set_ylabel('Percentage')

hourly_stats['absolute_fees_satoshis_mean'].plot(ax=ax4)
ax4.set_title('Average Absolute Fee by Hour')
ax4.set_ylabel('Satoshis')

plt.tight_layout()
plt.show()

In [None]:
# Weekly patterns
df_stats['day_of_week'] = df_stats.index.day_name()

weekly_stats = df_stats.groupby('day_of_week').agg({
    'total_liquidity': 'mean',
    'total_unique_makers': 'mean',
    'relative_fees_percentage_mean': 'mean',
    'absolute_fees_satoshis_mean': 'mean'
})

# Reorder days
day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
weekly_stats = weekly_stats.reindex(day_order)

# Plot weekly patterns
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))

weekly_stats['total_liquidity'].plot(kind='bar', ax=ax1)
ax1.set_title('Average Liquidity by Day')
ax1.set_ylabel('Satoshis')
ax1.tick_params(axis='x', rotation=45)

weekly_stats['total_unique_makers'].plot(kind='bar', ax=ax2)
ax2.set_title('Average Makers by Day')
ax2.set_ylabel('Count')
ax2.tick_params(axis='x', rotation=45)

weekly_stats['relative_fees_percentage_mean'].plot(kind='bar', ax=ax3)
ax3.set_title('Average Relative Fee by Day')
ax3.set_ylabel('Percentage')
ax3.tick_params(axis='x', rotation=45)

weekly_stats['absolute_fees_satoshis_mean'].plot(kind='bar', ax=ax4)
ax4.set_title('Average Absolute Fee by Day')
ax4.set_ylabel('Satoshis')
ax4.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

Liquidity Distribution Analysis

In [None]:
# Analyze liquidity distribution
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# Histogram of liquidity
sns.histplot(data=df_stats, x='total_liquidity', bins=50, ax=ax1)
ax1.set_title('Distribution of Total Liquidity')
ax1.set_xlabel('Satoshis')

# Box plot of liquidity by day of week
sns.boxplot(data=df_stats, x='day_of_week', y='total_liquidity', ax=ax2)
ax2.set_title('Liquidity Distribution by Day')
ax2.set_xlabel('Day of Week')
ax2.set_ylabel('Satoshis')
ax2.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

In [None]:
# Fee Distribution Analysis
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))

# Relative fee percentage distribution
sns.histplot(data=df_stats, x='relative_fees_percentage_mean', bins=50, ax=ax1)
ax1.set_title('Distribution of Relative Fee Percentages')
ax1.set_xlabel('Percentage')

# Absolute fee distribution
sns.histplot(data=df_stats, x='absolute_fees_satoshis_mean', bins=50, ax=ax2)
ax2.set_title('Distribution of Absolute Fees')
ax2.set_xlabel('Satoshis')

# Box plot of relative fees by day
sns.boxplot(data=df_stats, x='day_of_week', y='relative_fees_percentage_mean', ax=ax3)
ax3.set_title('Relative Fee Distribution by Day')
ax3.set_xlabel('Day of Week')
ax3.set_ylabel('Percentage')
ax3.tick_params(axis='x', rotation=45)

# Box plot of absolute fees by day
sns.boxplot(data=df_stats, x='day_of_week', y='absolute_fees_satoshis_mean', ax=ax4)
ax4.set_title('Absolute Fee Distribution by Day')
ax4.set_xlabel('Day of Week')
ax4.set_ylabel('Satoshis')
ax4.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()