# Phase 1C: Volume Analysis (Daily Data)

Analyze DeFi vs TradFi **daily** trading volumes for commodity perpetuals (Gold, Silver, Oil, Natural Gas).

**Volume Standardization:**  
- DeFi: volume Ã— price (base asset units â†’ USD notional)
- TradFi: volume Ã— price Ã— contract_size (contracts â†’ USD notional)

**Analyses:**
1. Raw notional volume line charts
2. Rolling 3-day z-score pattern detection
3. Cross-correlation with lag shifts (-7 to +7 days)
4. Rolling average volumes (1d / 3d / 5d / 7d)

In [22]:
import os

import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

PHASE_1B_DIR = os.path.join("output", "Phase 1B")
ASSETS = ["Gold", "Silver", "Oil", "Natural Gas"]

# Contract multipliers for TradFi futures (oz or barrels per contract)
TRADFI_MULTIPLIERS = {
    "Gold": 100,          # GC=F: 100 troy oz per contract
    "Silver": 5000,       # SI=F: 5,000 troy oz per contract
    "Oil": 1000,          # CL=F: 1,000 barrels per contract
    "Natural Gas": 10000, # NG=F: 10,000 MMBtu per contract
}

In [23]:
def load_asset_data(asset_name: str) -> pd.DataFrame:
    """Load Phase 1B Excel file for a given asset."""
    path = os.path.join(PHASE_1B_DIR, f"{asset_name}.xlsx")
    df = pd.read_excel(path)
    df["time"] = pd.to_datetime(df["time"])
    return df

In [24]:
data = {}
for asset in ASSETS:
    df = load_asset_data(asset)
    overlap = df.dropna(subset=["defi_volume", "tradfi_volume"]).shape[0]
    print(f"{asset:<14} {len(df):>5} rows, {overlap:>4} overlapping")
    data[asset] = df

Gold              60 rows,   39 overlapping
Silver            55 rows,   36 overlapping
Oil               35 rows,   24 overlapping
Natural Gas       20 rows,   14 overlapping


## Compute Notional Volumes (USD)

Standardize volumes to USD notional value for apples-to-apples comparison:
- **DeFi**: volume is in base asset units â†’ multiply by price
- **TradFi**: volume is in contracts â†’ multiply by price Ã— contract_size

**Note**: Data is already daily (1d bars from Phase 1B)

In [25]:
for asset, df in data.items():
    multiplier = TRADFI_MULTIPLIERS[asset]
    
    # Compute notional volumes
    df["defi_notional_usd"] = df["defi_volume"] * df["defi_close"]
    df["tradfi_notional_usd"] = df["tradfi_volume"] * df["tradfi_close"] * multiplier
    
    # Summary statistics
    overlap_mask = df[["defi_notional_usd", "tradfi_notional_usd"]].notna().all(axis=1)
    if overlap_mask.any():
        defi_mean = df.loc[overlap_mask, "defi_notional_usd"].mean()
        tradfi_mean = df.loc[overlap_mask, "tradfi_notional_usd"].mean()
        ratio = tradfi_mean / defi_mean if defi_mean > 0 else 0
        print(f"{asset:<14} DeFi: ${defi_mean:>12,.0f}/day  |  TradFi: ${tradfi_mean:>15,.0f}/day  |  Ratio: {ratio:>8.1f}x")

Gold           DeFi: $ 101,778,286/day  |  TradFi: $  2,788,690,814/day  |  Ratio:     27.4x
Silver         DeFi: $ 534,702,067/day  |  TradFi: $    381,344,173/day  |  Ratio:      0.7x
Oil            DeFi: $   5,845,240/day  |  TradFi: $ 21,059,001,438/day  |  Ratio:   3602.8x
Natural Gas    DeFi: $   8,085,383/day  |  TradFi: $ 10,029,553,950/day  |  Ratio:   1240.5x


## 1. Raw Trading Volumes (USD Notional)

In [None]:
for asset, df in data.items():
    fig = go.Figure()
    fig.add_trace(go.Bar(x=df["time"], y=df["defi_notional_usd"], name="DeFi Volume", marker_color="steelblue"))
    fig.add_trace(go.Bar(x=df["time"], y=df["tradfi_notional_usd"], name="TradFi Volume", marker_color="coral"))
    fig.update_layout(
        title=f"Raw Trading Volumes (USD Notional) â€” {asset}",
        xaxis_title="Date",
        yaxis_title="Volume (USD)",
        hovermode="x unified",
        template="plotly_white",
        barmode="group",  # Side-by-side bars
    )
    fig.show()

## 2. Daily Volume Z-Score Analysis â€” Rolling 3-Day Window

Aggregate hourly volumes to **daily totals**, then compute z-scores within a rolling 3-day window.  
Colored bands indicate severity thresholds:
- ðŸŸ¢ Normal: |z| < 1.0
- ðŸŸ¡ Moderate: 1.0 â‰¤ |z| < 1.5  
- ðŸŸ  Elevated: 1.5 â‰¤ |z| < 2.0
- ðŸ”´ Significant: |z| â‰¥ 2.0

In [None]:
WINDOW_3D = 3  # 3 days
MIN_PERIODS_3D = 2  # require at least 2 days

for asset, df in data.items():
    # Data is already daily - no aggregation needed
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.08,
                        subplot_titles=("DeFi Volume Z-Score (Daily)", "TradFi Volume Z-Score (Daily)"))

    for i, (col, label) in enumerate([("defi_notional_usd", "DeFi"), ("tradfi_notional_usd", "TradFi")], 1):
        # Calculate z-scores on daily data
        rolling_mean = df[col].rolling(WINDOW_3D, min_periods=MIN_PERIODS_3D).mean()
        rolling_std = df[col].rolling(WINDOW_3D, min_periods=MIN_PERIODS_3D).std()
        z = (df[col] - rolling_mean) / rolling_std
        z = z.replace([np.inf, -np.inf], 0)

        # Add colored threshold bands (subtle background)
        # Significant zone (|z| >= 2.0)
        fig.add_hrect(y0=2.0, y1=10, fillcolor="red", opacity=0.1, line_width=0, row=i, col=1)
        fig.add_hrect(y0=-10, y1=-2.0, fillcolor="red", opacity=0.1, line_width=0, row=i, col=1)
        # Elevated zone (1.5 <= |z| < 2.0)
        fig.add_hrect(y0=1.5, y1=2.0, fillcolor="orange", opacity=0.15, line_width=0, row=i, col=1)
        fig.add_hrect(y0=-2.0, y1=-1.5, fillcolor="orange", opacity=0.15, line_width=0, row=i, col=1)
        # Moderate zone (1.0 <= |z| < 1.5)
        fig.add_hrect(y0=1.0, y1=1.5, fillcolor="yellow", opacity=0.1, line_width=0, row=i, col=1)
        fig.add_hrect(y0=-1.5, y1=-1.0, fillcolor="yellow", opacity=0.1, line_width=0, row=i, col=1)
        
        # Add reference lines at key thresholds
        for threshold, color, dash in [(2.0, "red", "dash"), (1.5, "orange", "dot"), (1.0, "gold", "dot")]:
            fig.add_hline(y=threshold, line_dash=dash, line_color=color, opacity=0.6, line_width=1, row=i, col=1)
            fig.add_hline(y=-threshold, line_dash=dash, line_color=color, opacity=0.6, line_width=1, row=i, col=1)
        fig.add_hline(y=0, line_dash="solid", line_color="gray", opacity=0.3, line_width=1, row=i, col=1)

        # Plot as bar chart
        bar_color = "darkblue" if i == 1 else "darkgreen"
        fig.add_trace(go.Bar(x=df["time"], y=z, name=f"{label} Z-Score", 
                             marker_color=bar_color, opacity=0.7), 
                     row=i, col=1)

    fig.update_layout(
        title=f"Daily Volume Z-Scores (3-Day Rolling Window) â€” {asset}",
        height=600,
        hovermode="x unified",
        template="plotly_white",
        showlegend=False,
        barmode="group",  # Side-by-side bars (though only one series per subplot)
    )
    # Set y-axis range to zoom in on relevant region
    fig.update_yaxes(range=[-3, 3], row=1, col=1)
    fig.update_yaxes(range=[-3, 3], row=2, col=1)
    fig.show()

## 3. Cross-Correlation â€” DeFi vs TradFi Volume

### 3A. Summary Curve
Pearson correlation at each lag from **-7 to +7 days**.  
Negative lag = DeFi leads, positive lag = TradFi leads.

In [28]:
LAGS = range(-7, 8)  # -7 to +7 days

for asset, df in data.items():
    overlap = df.dropna(subset=["defi_notional_usd", "tradfi_notional_usd"]).copy()

    if len(overlap) < 10:
        print(f"{asset}: only {len(overlap)} overlapping days â€” skipping cross-correlation")
        continue

    lags, corrs = [], []
    for lag in LAGS:
        shifted = overlap["defi_notional_usd"].shift(lag)
        valid = shifted.notna() & overlap["tradfi_notional_usd"].notna()
        if valid.sum() >= 5:
            lags.append(lag)
            corrs.append(shifted[valid].corr(overlap["tradfi_notional_usd"][valid]))

    if len(corrs) == 0:
        print(f"{asset}: insufficient data for cross-correlation")
        continue

    peak_idx = int(np.argmax(np.abs(corrs)))
    peak_lag, peak_corr = lags[peak_idx], corrs[peak_idx]

    # Color bars based on correlation value
    colors = ['crimson' if c < 0 else 'steelblue' for c in corrs]

    fig = go.Figure()
    fig.add_trace(go.Bar(x=lags, y=corrs, marker_color=colors, name="Correlation"))
    fig.add_hline(y=0, line_dash="solid", line_color="gray", opacity=0.5)
    fig.add_annotation(x=peak_lag, y=peak_corr, text=f"Peak: lag={peak_lag}d, r={peak_corr:.3f}",
                       showarrow=True, arrowhead=2, bgcolor="white")
    fig.update_layout(
        title=f"Cross-Correlation: DeFi vs TradFi Volume â€” {asset}",
        xaxis_title="Lag (days, negative = DeFi leads)",
        yaxis_title="Pearson Correlation",
        hovermode="x unified",
        template="plotly_white",
        showlegend=False,
    )
    fig.show()

### 3B. Time-Varying Heatmap

Rolling 5-day correlation between DeFi and TradFi volumes at each lag,  
showing how the lead/lag relationship evolves over time.

In [29]:
WINDOW_5D = 5  # 5 days
MIN_PERIODS_5D = 3  # require at least 3 days

for asset, df in data.items():
    overlap = df.dropna(subset=["defi_notional_usd", "tradfi_notional_usd"]).copy().reset_index(drop=True)

    if len(overlap) < 10:
        print(f"{asset}: only {len(overlap)} overlapping days â€” skipping heatmap")
        continue

    corr_matrix = pd.DataFrame(index=overlap["time"])
    for lag in LAGS:
        shifted = overlap["defi_notional_usd"].shift(lag)
        rolling_corr = shifted.rolling(WINDOW_5D, min_periods=MIN_PERIODS_5D).corr(overlap["tradfi_notional_usd"])
        corr_matrix[lag] = rolling_corr.values

    # drop rows where all lags are NaN (early window warmup)
    corr_matrix = corr_matrix.dropna(how="all")

    if len(corr_matrix) == 0:
        print(f"{asset}: insufficient data for heatmap")
        continue

    fig = go.Figure(data=go.Heatmap(
        z=corr_matrix.values.T,
        x=corr_matrix.index,
        y=list(LAGS),
        colorscale="RdBu",
        zmin=-1, zmax=1,
        colorbar_title="Correlation",
    ))
    fig.update_layout(
        title=f"Time-Varying Cross-Correlation (5-Day Rolling) â€” {asset}",
        xaxis_title="Date",
        yaxis_title="Lag (days)",
        template="plotly_white",
        height=500,
    )
    fig.show()

## 4. Rolling Average Volumes

Moving averages over 1-day, 3-day, 5-day, and 7-day windows for both DeFi and TradFi volumes.

In [None]:
ROLLING_WINDOWS = {"1d": 1, "3d": 3, "5d": 5, "7d": 7}
COLORS = {"1d": "#1f77b4", "3d": "#ff7f0e", "5d": "#2ca02c", "7d": "#d62728"}

for asset, df in data.items():
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.08,
                        subplot_titles=("DeFi Rolling Avg Volume (USD)", "TradFi Rolling Avg Volume (USD)"))

    for label, days in ROLLING_WINDOWS.items():
        min_p = max(1, int(days * 0.66))  # At least 1 day
        defi_ma = df["defi_notional_usd"].rolling(days, min_periods=min_p).mean()
        tradfi_ma = df["tradfi_notional_usd"].rolling(days, min_periods=min_p).mean()

        # Use line charts for rolling averages
        fig.add_trace(go.Scatter(x=df["time"], y=defi_ma, name=label, mode="lines",
                                 line=dict(color=COLORS[label], width=2), legendgroup=label), row=1, col=1)
        fig.add_trace(go.Scatter(x=df["time"], y=tradfi_ma, name=label, mode="lines",
                                 line=dict(color=COLORS[label], width=2), legendgroup=label,
                                 showlegend=False), row=2, col=1)

    fig.update_layout(
        title=f"Rolling Average Volumes (USD Notional) â€” {asset}",
        height=600,
        hovermode="x unified",
        template="plotly_white",
    )
    fig.show()