# Accumulation Signal Sandbox
Explore alternate short-signal formulas using daily_metrics data (read-only). No database writes.


In [None]:
from pathlib import Path
import sys

import duckdb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

project_root = Path.cwd()
if (project_root / 'darkpool_analysis').exists():
    sys.path.insert(0, str(project_root))

from darkpool_analysis.config import load_config

config = load_config()

TICKER = 'NKE'
START_DATE = '2025-11-01'
END_DATE = '2025-12-31'
DB_PATH = config.db_path

DOT_MODE = 'current'  # 'current', 'option_a', 'option_b', 'all'
BLEND_SHORT_WEIGHT = 0.6
BLEND_FLOW_WEIGHT = 0.4


In [None]:
COLORS = {
    'background': '#0f0f10',
    'panel_bg': '#141416',
    'text': '#e6e6e6',
    'grid': '#2a2a2d',
    'white': '#ffffff',
    'green': '#00ff88',
    'red': '#ff6b6b',
    'yellow': '#ffd700',
    'cyan': '#00d4ff',
    'orange': '#ff9f43',
    'purple': '#b026ff',
    'neutral': '#6b6b6b',
    'blue': '#4aa3ff',
}
GRID_ALPHA = 0.18

def _apply_axis_style(ax):
    ax.set_facecolor(COLORS['panel_bg'])
    ax.tick_params(colors=COLORS['text'], labelsize=9)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_color(COLORS['grid'])
    ax.spines['bottom'].set_color(COLORS['grid'])
    ax.grid(True, alpha=GRID_ALPHA, color=COLORS['grid'], linestyle='--')

def _format_volume(value):
    if pd.isna(value):
        return 'NA'
    value = float(value)
    if abs(value) >= 1_000_000_000:
        return f'{value / 1_000_000_000:.1f}B'
    if abs(value) >= 1_000_000:
        return f'{value / 1_000_000:.1f}M'
    if abs(value) >= 1_000:
        return f'{value / 1_000:.0f}K'
    return f'{value:,.0f}'

def _rolling_zscore(series, window, min_periods):
    rolling = series.rolling(window=window, min_periods=min_periods)
    mean = rolling.mean()
    std = rolling.std(ddof=0)
    return (series - mean) / std

def _sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

def _compute_score_display(short_z, lit_z, price_z, otc_z, cfg):
    short = pd.to_numeric(short_z, errors='coerce').to_numpy(dtype=float)
    lit = pd.to_numeric(lit_z, errors='coerce').fillna(0.0).to_numpy(dtype=float)
    price = pd.to_numeric(price_z, errors='coerce').fillna(0.0).to_numpy(dtype=float)
    otc = pd.to_numeric(otc_z, errors='coerce').fillna(0.0).to_numpy(dtype=float)

    score = np.full_like(short, np.nan, dtype=float)
    valid = ~np.isnan(short)
    if valid.any():
        raw = (
            cfg.composite_w_short * np.tanh(short[valid] * 0.5)
            + cfg.composite_w_lit * np.tanh(lit[valid] * 0.5)
            + cfg.composite_w_price * np.tanh(price[valid] * 0.3)
        )
        intensity_range = cfg.intensity_scale_max - cfg.intensity_scale_min
        intensity = cfg.intensity_scale_min + intensity_range * _sigmoid(otc[valid])
        score[valid] = np.clip(raw * intensity, -1.0, 1.0)

    display = (score + 1.0) * 50.0
    return pd.Series(display, index=short_z.index)

def _set_flow_axis(ax, values):
    series = pd.to_numeric(values, errors='coerce')
    max_abs = series.abs().max(skipna=True)
    if pd.isna(max_abs) or max_abs == 0:
        max_abs = 1.0
    padding = max(max_abs * 0.1, 1.0)
    y_max = max_abs + padding
    y_min = -y_max
    ax.set_ylim(y_min, y_max)
    ax.set_yticks(np.linspace(y_min, y_max, 5))
    ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: _format_volume(x)))
    ax.axhline(0.0, color=COLORS['neutral'], linestyle='--', linewidth=1.2, alpha=0.7)

def _set_ratio_axis(ax, values, bot=1.25, sell=0.75):
    series = pd.to_numeric(values, errors='coerce')
    max_val = series.max(skipna=True)
    if pd.isna(max_val):
        max_val = 2.0
    upper = max(2.0, float(max_val) * 1.1)
    ax.set_ylim(0, upper)
    ax.axhline(1.0, color=COLORS['neutral'], linestyle='--', linewidth=1.0, alpha=0.6)
    ax.axhline(bot, color=COLORS['green'], linestyle='--', linewidth=1.0, alpha=0.8)
    ax.axhline(sell, color=COLORS['red'], linestyle='--', linewidth=1.0, alpha=0.8)

def _set_volume_axis(ax, values):
    series = pd.to_numeric(values, errors='coerce')
    max_val = series.max(skipna=True)
    if pd.isna(max_val) or max_val <= 0:
        max_val = 1.0
    y_max = max_val * 1.1
    ax.set_ylim(0, y_max)
    ax.set_yticks(np.linspace(0, y_max, 5))
    ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: _format_volume(x)))

DOT_SPECS = {
    'current': {'accum': COLORS['green'], 'dist': COLORS['red'], 'offset': -0.18},
    'option_a': {'accum': COLORS['blue'], 'dist': COLORS['purple'], 'offset': 0.0},
    'option_b': {'accum': COLORS['yellow'], 'dist': COLORS['orange'], 'offset': 0.18},
}

def _plot_score_panel(ax, x, scores, confidence, title, dot_mode, dot_sources):
    from matplotlib.colors import LinearSegmentedColormap

    score_cmap = LinearSegmentedColormap.from_list(
        'score_cmap',
        [
            (0.0, COLORS['purple']),
            (0.5, '#555555'),
            (1.0, COLORS['green']),
        ],
    )

    scores_filled = scores.fillna(50)
    conf_filled = confidence.fillna(0.5)

    for xi, score, conf in zip(x, scores_filled, conf_filled):
        norm_score = np.clip(score / 100.0, 0, 1)
        dev = norm_score - 0.5
        norm_score = 0.5 + np.sign(dev) * (abs(dev) ** 0.85)
        norm_score = np.clip(norm_score, 0, 1)
        bar_color = score_cmap(norm_score)
        alpha = 0.8 if conf >= 0.6 else 0.4
        bar_height = score / 100.0
        ax.bar(xi, bar_height, bottom=0, color=bar_color, alpha=alpha, width=0.8, zorder=2)

        conf_height = 0.08 * conf
        if conf >= 0.7:
            conf_color = COLORS['green']
        elif conf >= 0.4:
            conf_color = COLORS['yellow']
        else:
            conf_color = COLORS['red']
        ax.bar(xi, conf_height, bottom=-0.12, color=conf_color, alpha=0.6, width=0.6, zorder=3)

    ax.axhline(y=0.30, color=COLORS['red'], linestyle='--', linewidth=0.8, alpha=0.4, zorder=1)
    ax.axhline(y=0.50, color=COLORS['neutral'], linestyle='--', linewidth=0.8, alpha=0.4, zorder=1)
    ax.axhline(y=0.70, color=COLORS['green'], linestyle='--', linewidth=0.8, alpha=0.4, zorder=1)
    ax.set_ylim(-0.15, 1.05)
    ax.set_yticks([0, 0.3, 0.5, 0.7, 1.0])
    ax.set_yticklabels(['0', '30', '50', '70', '100'])
    ax.set_title(title, color=COLORS['text'], fontsize=10, fontweight='bold', loc='left')

    dot_mode = dot_mode or 'current'
    if dot_mode == 'all':
        dot_keys = ['current', 'option_a', 'option_b']
    else:
        dot_keys = [dot_mode]

    for key in dot_keys:
        series = dot_sources.get(key)
        if series is None:
            continue
        spec = DOT_SPECS.get(key, DOT_SPECS['current'])
        offset = spec['offset'] if dot_mode == 'all' else 0.0
        accum_mask = series >= 70
        dist_mask = series <= 30
        if accum_mask.any():
            ax.scatter(
                x[accum_mask] + offset,
                np.full(accum_mask.sum(), 1.02),
                s=60,
                c=spec['accum'],
                edgecolors=COLORS['white'],
                linewidths=0.6,
                zorder=6,
            )
        if dist_mask.any():
            ax.scatter(
                x[dist_mask] + offset,
                np.full(dist_mask.sum(), -0.06),
                s=60,
                c=spec['dist'],
                edgecolors=COLORS['white'],
                linewidths=0.6,
                zorder=6,
            )


In [None]:
query = '''
    SELECT
        date,
        symbol,
        short_buy_volume,
        short_sell_volume,
        short_buy_sell_ratio,
        short_buy_sell_ratio_z,
        lit_buy_volume,
        lit_sell_volume,
        lit_flow_imbalance,
        lit_flow_imbalance_z,
        return_z,
        otc_participation_z,
        confidence
    FROM daily_metrics
    WHERE symbol = ? AND date BETWEEN ? AND ?
    ORDER BY date
'''

with duckdb.connect(str(DB_PATH)) as conn:
    df = conn.execute(query, [TICKER.upper(), START_DATE, END_DATE]).df()

if df.empty:
    raise ValueError(f'No data found for {TICKER} between {START_DATE} and {END_DATE}.')

df['date'] = pd.to_datetime(df['date'])
numeric_cols = [
    'short_buy_volume',
    'short_sell_volume',
    'short_buy_sell_ratio',
    'short_buy_sell_ratio_z',
    'lit_buy_volume',
    'lit_sell_volume',
    'lit_flow_imbalance',
    'lit_flow_imbalance_z',
    'return_z',
    'otc_participation_z',
    'confidence',
]
for col in numeric_cols:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')

short_buy = df['short_buy_volume'].fillna(0.0)
short_sell = df['short_sell_volume'].fillna(0.0)
lit_buy = df['lit_buy_volume'].fillna(0.0)
lit_sell = df['lit_sell_volume'].fillna(0.0)
total_buy = short_buy + lit_buy
total_sell = short_sell + lit_sell

df['combined_ratio'] = pd.NA
valid_ratio = total_sell > 0
df.loc[valid_ratio, 'combined_ratio'] = total_buy[valid_ratio] / total_sell[valid_ratio]
df['combined_ratio'] = pd.to_numeric(df['combined_ratio'], errors='coerce')

df['vw_flow'] = pd.NA
has_flow = (short_buy > 0) | (short_sell > 0) | (lit_buy > 0) | (lit_sell > 0)
df.loc[has_flow, 'vw_flow'] = total_buy[has_flow] - total_sell[has_flow]
df['vw_flow'] = pd.to_numeric(df['vw_flow'], errors='coerce')

df['finra_buy_volume'] = df['short_buy_volume']

if df['short_buy_sell_ratio'].isna().all():
    valid = df['short_sell_volume'] > 0
    df.loc[valid, 'short_buy_sell_ratio'] = df.loc[valid, 'short_buy_volume'] / df.loc[valid, 'short_sell_volume']

if df['short_buy_sell_ratio_z'].isna().all():
    df['short_buy_sell_ratio_z'] = _rolling_zscore(
        df['short_buy_sell_ratio'], config.short_z_window, config.zscore_min_periods
    )

df['vw_flow_z'] = _rolling_zscore(df['vw_flow'], config.short_z_window, config.zscore_min_periods)

lit_z = df['lit_flow_imbalance_z']
if lit_z.isna().all():
    lit_z = _rolling_zscore(df['lit_flow_imbalance'], config.short_z_window, config.zscore_min_periods)

price_z = df['return_z'].fillna(0.0)
otc_z = df['otc_participation_z'].fillna(0.0)

df['acc_score_current'] = _compute_score_display(
    df['short_buy_sell_ratio_z'], lit_z, price_z, otc_z, config
)
df['acc_score_a'] = _compute_score_display(
    df['vw_flow_z'], lit_z, price_z, otc_z, config
)
blended_short_z = BLEND_SHORT_WEIGHT * df['short_buy_sell_ratio_z'] + BLEND_FLOW_WEIGHT * df['vw_flow_z']
df['acc_score_b'] = _compute_score_display(
    blended_short_z, lit_z, price_z, otc_z, config
)

df['confidence'] = df['confidence'].fillna(0.5)
df.head()


In [None]:
plt.style.use('dark_background')
fig, axes = plt.subplots(
    7,
    1,
    figsize=(16, 22),
    sharex=True,
    gridspec_kw={'height_ratios': [2.2, 2.2, 1.6, 2.2, 1.8, 1.8, 1.8]},
)
fig.patch.set_facecolor(COLORS['background'])

x = np.arange(len(df))
labels = df['date'].dt.strftime('%y-%m-%d').tolist()

# Panel 1: Combined Buy/Sell Ratio
ax = axes[0]
_apply_axis_style(ax)
series = df['combined_ratio']
mask = series.notna()
ax.plot(x[mask], series[mask], color=COLORS['cyan'], linewidth=1.8)
ax.scatter(x[mask], series[mask], color=COLORS['cyan'], s=25, edgecolors=COLORS['white'], linewidths=0.4)
_set_ratio_axis(ax, series)
ax.set_ylabel('Combined Ratio', color=COLORS['text'])
ax.set_title('Combined Buy/Sell Ratio', color=COLORS['text'], fontsize=10, fontweight='bold', loc='left')

# Panel 2: VW Flow
ax = axes[1]
_apply_axis_style(ax)
series = df['vw_flow']
mask = series.notna()
ax.plot(x[mask], series[mask], color=COLORS['cyan'], linewidth=1.8)
ax.scatter(x[mask], series[mask], color=COLORS['cyan'], s=25, edgecolors=COLORS['white'], linewidths=0.4)
_set_flow_axis(ax, series)
ax.set_ylabel('VW Flow', color=COLORS['text'])
ax.set_title('Volume Weighted Directional Flow', color=COLORS['text'], fontsize=10, fontweight='bold', loc='left')

# Panel 3: FINRA Buy Volume (B)
ax = axes[2]
_apply_axis_style(ax)
series = df['finra_buy_volume']
mask = series.notna()
ax.bar(x[mask], series[mask], color=COLORS['yellow'], alpha=0.5, width=0.7)
_set_volume_axis(ax, series)
ax.set_ylabel('FINRA B', color=COLORS['text'])
ax.set_title('FINRA Buy Volume (B)', color=COLORS['text'], fontsize=10, fontweight='bold', loc='left')

# Panel 4: Short Sale Buy/Sell Ratio
ax = axes[3]
_apply_axis_style(ax)
series = df['short_buy_sell_ratio']
mask = series.notna()
ax.plot(x[mask], series[mask], color=COLORS['cyan'], linewidth=1.8)
ax.scatter(x[mask], series[mask], color=COLORS['cyan'], s=25, edgecolors=COLORS['white'], linewidths=0.4)
_set_ratio_axis(ax, series)
ax.set_ylabel('Short Ratio', color=COLORS['text'])
ax.set_title('Short Sale Buy/Sell Ratio', color=COLORS['text'], fontsize=10, fontweight='bold', loc='left')

dot_sources = {
    'current': df['acc_score_current'],
    'option_a': df['acc_score_a'],
    'option_b': df['acc_score_b'],
}

# Panel 5: Accumulation Score (Current)
ax = axes[4]
_apply_axis_style(ax)
_plot_score_panel(ax, x, df['acc_score_current'], df['confidence'], 'Accumulation Score (Current)', DOT_MODE, dot_sources)

# Panel 6: Accumulation Score (Option A: vw_flow_z)
ax = axes[5]
_apply_axis_style(ax)
_plot_score_panel(ax, x, df['acc_score_a'], df['confidence'], 'Accumulation Score (Option A: vw_flow_z)', DOT_MODE, dot_sources)

# Panel 7: Accumulation Score (Option B: blended)
ax = axes[6]
_apply_axis_style(ax)
_plot_score_panel(ax, x, df['acc_score_b'], df['confidence'], 'Accumulation Score (Option B: blended)', DOT_MODE, dot_sources)

axes[-1].set_xticks(x)
axes[-1].set_xticklabels(labels, rotation=45, ha='right', fontsize=8)

fig.suptitle(f'{TICKER} - Accumulation Signal Sandbox', color=COLORS['text'], fontsize=14, fontweight='bold')
plt.tight_layout(rect=[0, 0, 1, 0.97])
plt.show()


In [None]:
# Experimental signal: imbalance * log1p(B + S)
b = df['finra_buy_volume']
s = df['short_sell_volume']
total = b + s
imbalance = np.where(total > 0, (b - s) / total, np.nan)
signal = imbalance * np.log1p(total)

plt.style.use('dark_background')
fig, ax = plt.subplots(figsize=(16, 4))
fig.patch.set_facecolor(COLORS['background'])
_apply_axis_style(ax)
mask = pd.Series(signal).notna().to_numpy()
ax.plot(x[mask], signal[mask], color=COLORS['yellow'], linewidth=1.8)
ax.scatter(x[mask], signal[mask], color=COLORS['yellow'], s=20, edgecolors=COLORS['white'], linewidths=0.4)
ax.axhline(0.0, color=COLORS['neutral'], linestyle='--', linewidth=1.0, alpha=0.7)
ax.set_title('Imbalance * log1p(B + S)', color=COLORS['text'], fontsize=10, fontweight='bold', loc='left')
ax.set_ylabel('Signal', color=COLORS['text'])
ax.set_xticks(x)
ax.set_xticklabels(labels, rotation=45, ha='right', fontsize=8)
plt.tight_layout()
plt.show()
