In [None]:
import os
import sys
from dataclasses import replace
from datetime import date
from pathlib import Path

from dotenv import load_dotenv

project_root = Path.cwd()
if not (project_root / 'darkpool_analysis').exists() and (project_root.parent / 'darkpool_analysis').exists():
    project_root = project_root.parent
sys.path.append(str(project_root))

load_dotenv(project_root / '.env')

from darkpool_analysis.config import EXCLUDED_FINRA_TICKERS, load_config
from darkpool_analysis.fetch_finra import _load_finra_from_api, _normalize_finra_columns
from darkpool_analysis.fetch_polygon_equity import fetch_polygon_trades

config = load_config()

print(f'Project root: {project_root}')
print(f'FINRA key loaded: {bool(config.finra_api_key)}')
print(f'FINRA secret loaded: {bool(config.finra_api_secret)}')
print(f'Polygon key loaded: {bool(config.polygon_api_key)}')


In [None]:
print(config.finra_otc_url)
print(config.finra_otc_file)
print(config.finra_request_method)
print(config.finra_request_json)
print(config.finra_request_params)
print("FINRA key loaded:", bool(config.finra_api_key))
print("FINRA secret loaded:", bool(config.finra_api_secret))


In [None]:
raw_df = _load_finra_from_api(finra_config, symbols=finra_tickers)
print(raw_df.columns)
for col in ["weekStartDate", "week_start_date", "tradeDate", "date"]:
    if col in raw_df.columns:
        print(col, raw_df[col].min(), raw_df[col].max())


In [8]:
import duckdb
con = duckdb.connect("darkpool_analysis/data/darkpool.duckdb")
print(con.execute("select min(week_start_date), max(week_start_date) from finra_otc_volume_raw").fetchall())


[(datetime.date(2023, 11, 6), datetime.date(2023, 11, 6))]


In [None]:
DATE = '2025-12-22'

TICKERS_TYPE = 'SINGLE'  # Options: 'SINGLE', 'SECTOR_CORE', 'GLOBAL_MACRO'
SINGLE_TICKERS = ['AMZN']

SECTOR_CORE_TICKERS = [
    'XLK',  # Technology (platforms, software, mega-cap growth)
    'SMH',  # Semiconductors (hardware + capex cycle)
    'XLF',  # Financials (money-center banks, insurers)
    'KRE',  # Regional Banks (rates, liquidity stress)
    'XLE',  # Energy (commodities, inflation hedge)
    'XLI',  # Industrials (cyclicals, defense, capex)
    'XLY',  # Consumer Discretionary (growth beta)
    'XLP',  # Consumer Staples (defensive)
    'XLV',  # Health Care (defensive + policy)
    'XLU',  # Utilities (rates, yield proxy)
]

GLOBAL_MACRO_TICKERS = [
    'SPY',   # US large-cap core (S&P 500)
    'QQQ',   # US growth / tech beta
    'IWM',   # US small caps (domestic liquidity)
    'EFA',   # Developed markets ex-US (EU + Japan)
    'EEM',   # Emerging markets (global risk / China beta)
    'EWJ',   # Japan (yield-curve / FX-sensitive)
    'FXI',   # China large-cap (policy + growth stress)
    'VGK',   # Europe (value / banks / energy tilt)
    'TLT',   # US long rates (risk-off / duration)
    'VIXY',  # Volatility (risk regime)
    'UUP',   # US dollar (global liquidity / stress)
    'GLD',   # Gold (inflation / real rates)
    'USO',   # Crude oil (global growth / inflation)
]

if TICKERS_TYPE == 'SINGLE':
    selected_tickers = SINGLE_TICKERS
elif TICKERS_TYPE == 'SECTOR_CORE':
    selected_tickers = SECTOR_CORE_TICKERS
elif TICKERS_TYPE == 'GLOBAL_MACRO':
    selected_tickers = GLOBAL_MACRO_TICKERS
else:
    raise ValueError(f'Unknown TICKERS_TYPE: {TICKERS_TYPE}')

selected_tickers = [ticker.strip().upper() for ticker in selected_tickers]
print('Selected tickers:', selected_tickers)


In [None]:
if not config.finra_api_key or not config.finra_api_secret:
    print('FINRA credentials missing; set FINRA_API_KEY and FINRA_API_SECRET in .env.')
elif not selected_tickers:
    print('No tickers selected for FINRA check.')
else:
    finra_tickers = [t for t in selected_tickers if t not in EXCLUDED_FINRA_TICKERS]
    if not finra_tickers:
        print('All selected tickers are excluded from FINRA checks.')
    else:
        finra_config = replace(config, finra_tickers=finra_tickers)
        raw_df = _load_finra_from_api(finra_config, symbols=finra_tickers)
        if raw_df.empty:
            print('FINRA API returned no rows for these tickers.')
        else:
            normalized = _normalize_finra_columns(raw_df, finra_config)
            normalized = normalized[normalized['symbol'].isin(finra_tickers)].copy()
            if normalized.empty:
                print('FINRA API returned no rows after symbol filter.')
            else:
                found = sorted(normalized['symbol'].unique())
                missing = sorted(set(finra_tickers) - set(found))
                target_date = date.fromisoformat(DATE)
                available_weeks = sorted(normalized['week_start_date'].unique())
                min_week = min(available_weeks)
                max_week = max(available_weeks)
                eligible_weeks = [week for week in available_weeks if week <= target_date]
                matched_week = max(eligible_weeks) if eligible_weeks else None
                print('FINRA found:', found)
                print('FINRA missing:', missing)
                print(f'FINRA weeks: {min_week} -> {max_week}')
                print('Target date:', target_date)
                if matched_week is None:
                    print('No FINRA week available on or before target date.')
                else:
                    print('Matched FINRA week:', matched_week)
                    normalized[normalized['week_start_date'] == matched_week].head()


In [None]:
if not config.polygon_api_key:
    print('Polygon key missing; set POLYGON_API_KEY in .env.')
elif not selected_tickers:
    print('No tickers selected for Polygon check.')
else:
    trade_date = date.fromisoformat(DATE)
    os.environ['POLYGON_LIMIT'] = '1000'
    os.environ['POLYGON_MAX_PAGES'] = '1'
    polygon_df, polygon_failures = fetch_polygon_trades(config, selected_tickers, trade_date)
    if polygon_df.empty:
        print('Polygon returned no rows for these tickers.')
    else:
        counts = polygon_df['symbol'].value_counts().sort_index()
        print('Polygon rows per ticker:')
        print(counts)
        polygon_df.head()
    if polygon_failures:
        print('Polygon failures:', polygon_failures)
