In [6]:
# Cell 1 - imports and setup

import os
import sys
from dataclasses import replace
from datetime import date
from pathlib import Path

from dotenv import load_dotenv

project_root = Path.cwd()
if not (project_root / 'darkpool_analysis').exists() and (project_root.parent / 'darkpool_analysis').exists():
    project_root = project_root.parent
sys.path.append(str(project_root))

load_dotenv(project_root / '.env')

from darkpool_analysis.config import EXCLUDED_FINRA_TICKERS, load_config
# Module was refactored: fetch_finra -> fetch_finra_otc (OTC weekly) + fetch_finra_short (short sale daily)
from darkpool_analysis.fetch_finra_otc import _load_finra_from_api, _normalize_finra_columns
from darkpool_analysis.fetch_polygon_equity import fetch_polygon_trades

config = load_config()

print(f'Project root: {project_root}')
print(f'FINRA key loaded: {bool(config.finra_api_key)}')
print(f'FINRA secret loaded: {bool(config.finra_api_secret)}')
print(f'Polygon key loaded: {bool(config.polygon_api_key)}')
print(f'FINRA OTC URL: {config.finra_otc_url}')
print(f'FINRA Token URL: {config.finra_token_url}')

Project root: c:\Users\fvign\Dropbox\Vscode\darkpool
FINRA key loaded: True
FINRA secret loaded: True
Polygon key loaded: True
FINRA OTC URL: https://api.finra.org/data/group/otcMarket/name/weeklySummary
FINRA Token URL: https://ews.fip.finra.org/fip/rest/ews/oauth2/access_token?grant_type=client_credentials


In [7]:
# Cell 2 - ticker selection

DATE = '2025-12-22'

TICKERS_TYPE = 'SINGLE'  # Options: 'SINGLE', 'SECTOR_CORE', 'GLOBAL_MACRO'
SINGLE_TICKERS = ['AMZN']

SECTOR_CORE_TICKERS = [
    'XLK',  # Technology (platforms, software, mega-cap growth)
    'SMH',  # Semiconductors (hardware + capex cycle)
    'XLF',  # Financials (money-center banks, insurers)
    'KRE',  # Regional Banks (rates, liquidity stress)
    'XLE',  # Energy (commodities, inflation hedge)
    'XLI',  # Industrials (cyclicals, defense, capex)
    'XLY',  # Consumer Discretionary (growth beta)
    'XLP',  # Consumer Staples (defensive)
    'XLV',  # Health Care (defensive + policy)
    'XLU',  # Utilities (rates, yield proxy)
]

GLOBAL_MACRO_TICKERS = [
    'SPY',   # US large-cap core (S&P 500)
    'QQQ',   # US growth / tech beta
    'IWM',   # US small caps (domestic liquidity)
    'EFA',   # Developed markets ex-US (EU + Japan)
    'EEM',   # Emerging markets (global risk / China beta)
    'EWJ',   # Japan (yield-curve / FX-sensitive)
    'FXI',   # China large-cap (policy + growth stress)
    'VGK',   # Europe (value / banks / energy tilt)
    'TLT',   # US long rates (risk-off / duration)
    'VIXY',  # Volatility (risk regime)
    'UUP',   # US dollar (global liquidity / stress)
    'GLD',   # Gold (inflation / real rates)
    'USO',   # Crude oil (global growth / inflation)
]

if TICKERS_TYPE == 'SINGLE':
    selected_tickers = SINGLE_TICKERS
elif TICKERS_TYPE == 'SECTOR_CORE':
    selected_tickers = SECTOR_CORE_TICKERS
elif TICKERS_TYPE == 'GLOBAL_MACRO':
    selected_tickers = GLOBAL_MACRO_TICKERS
else:
    raise ValueError(f'Unknown TICKERS_TYPE: {TICKERS_TYPE}')

selected_tickers = [ticker.strip().upper() for ticker in selected_tickers]
print('Selected tickers:', selected_tickers)

Selected tickers: ['AMZN']


In [8]:
# Cell 3 - FINRA OTC Weekly ticker check

if not config.finra_api_key or not config.finra_api_secret:
    print('FINRA credentials missing; set FINRA_API_KEY and FINRA_API_SECRET in .env.')
elif not selected_tickers:
    print('No tickers selected for FINRA check.')
else:
    finra_tickers = [t for t in selected_tickers if t not in EXCLUDED_FINRA_TICKERS]
    if not finra_tickers:
        print('All selected tickers are excluded from FINRA checks.')
    else:
        target_date = date.fromisoformat(DATE)
        finra_config = replace(config, finra_tickers=finra_tickers)

        # Pass target_date to API to get current data
        raw_df = _load_finra_from_api(finra_config, symbols=finra_tickers, target_date=target_date)

        if raw_df.empty:
            print('FINRA API returned no rows for these tickers.')
        else:
            # Note: _normalize_finra_columns now requires source_file parameter
            normalized = _normalize_finra_columns(raw_df, finra_config, source_file=None)
            normalized = normalized[normalized['symbol'].isin(finra_tickers)].copy()
            if normalized.empty:
                print('FINRA API returned no rows after symbol filter.')
            else:
                found = sorted(normalized['symbol'].unique())
                missing = sorted(set(finra_tickers) - set(found))
                available_weeks = sorted(normalized['week_start_date'].unique())
                min_week = min(available_weeks)
                max_week = max(available_weeks)
                eligible_weeks = [week for week in available_weeks if week <= target_date]
                matched_week = max(eligible_weeks) if eligible_weeks else None

                print('FINRA found:', found)
                print('FINRA missing:', missing)
                print(f'FINRA coverage: {min_week} → {max_week}')
                print('Target date:', target_date)
                if matched_week is None:
                    print('No FINRA week available on or before target date.')
                else:
                    print('Matched FINRA week:', matched_week)
                    display(normalized[normalized['week_start_date'] == matched_week])

FINRA found: ['AMZN']
FINRA missing: []
FINRA coverage: 2025-11-10 → 2025-11-10
Target date: 2025-12-22
Matched FINRA week: 2025-11-10


Unnamed: 0,symbol,week_start_date,off_exchange_volume,trade_count,tier_identifier,tier_description,issue_name,market_participant_name,mpid,last_update_date,source_file
2241,AMZN,2025-11-10,1102480,2739,T1,NMS Tier 1,"Amazon.com, Inc. Common Stock",BIDS BIDS ATS,BIDS,2025-12-01,
4559,AMZN,2025-11-10,2215,112,T1,NMS Tier 1,"Amazon.com, Inc. Common Stock","BOSS BRUCE MARKETS, LLC.",BOSS,2025-12-01,


In [9]:
# =============================================================================
# Cell 4 - DEBUG: Test FINRA OAuth flow for Short Sale (regShoDaily) endpoint
# =============================================================================
# This cell tests the correct OAuth 2.0 flow for FINRA Query API:
# Step A: Get access token from FINRA Identity Platform (FIP)
# Step B: Call data endpoint with Bearer token

import base64
import requests

CLIENT_ID = os.getenv("FINRA_API_KEY")
CLIENT_SECRET = os.getenv("FINRA_API_SECRET")

TOKEN_URL = "https://ews.fip.finra.org/fip/rest/ews/oauth2/access_token?grant_type=client_credentials"
DATA_URL = "https://api.finra.org/data/group/otcMarket/name/regShoDaily"

print("=== Step A: Get OAuth Token ===")
basic = base64.b64encode(f"{CLIENT_ID}:{CLIENT_SECRET}".encode()).decode()
tok = requests.post(TOKEN_URL, headers={"Authorization": f"Basic {basic}", "Accept": "application/json"})
print(f"Token status: {tok.status_code}")
print(f"Token response: {tok.text[:1000]}")

if tok.status_code == 200:
    access_token = tok.json()["access_token"]
    print(f"Access token obtained: {access_token[:20]}...")
    
    print("\n=== Step B: Call regShoDaily endpoint ===")
    headers = {
        "Authorization": f"Bearer {access_token}",
        "Accept": "application/json",
        "Content-Type": "application/json",
    }
    
    # Start with minimal payload - no filters, just limit
    payload = {"limit": 5, "offset": 0}
    
    r = requests.post(DATA_URL, headers=headers, json=payload)
    print(f"Data status: {r.status_code}")
    print(f"Data response: {r.text[:2000]}")
    
    if r.status_code == 200:
        data = r.json()
        if isinstance(data, list):
            print(f"\nReturned {len(data)} rows")
            if data:
                print("Sample row keys:", list(data[0].keys()))
        elif isinstance(data, dict) and "data" in data:
            print(f"\nReturned {len(data['data'])} rows")
            if data["data"]:
                print("Sample row keys:", list(data["data"][0].keys()))
else:
    print("Failed to get token - check credentials")

=== Step A: Get OAuth Token ===
Token status: 200
Token response: {"access_token":"*AAJTSQACMDIABHR5cGUAA0pXVAACUzEAAjAx*eyJ0eXAiOiJKV1QiLCJjdHkiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.ZXlKMGVYQWlPaUpLVjFRaUxDSmxibU1pT2lKQk1USTRRMEpETFVoVE1qVTJJaXdpWVd4bklqb2laR2x5SW4wLi5hZVdGQXBtWlJiV21uNV8tSFZ4N3BBLjVsdEZYSGJVbTNTWEpVLWdLZ1EyOHZyQXpjaWZZdTR5X1IyT2lvcndUbnNQalRTZ0NtVzJwbjBKUnBRMmVSWUdDZGdfdGlDSzQySUluRzZUYU05UnJIMUxKaVhyVkVpbTZ4TkVYaFBCMm1uVUNWMjk3NVJuUU9HbUh5TkpUMFhnMWJUY196elRvemY3bkxhWGxDMGd1ajVJY0ZUWWpnQVZGOFJRRndPU09KNHlSUzNBc2tXeFdHejBQdDhrZEp4ZTZ6Ymc4b2ZUNTBGZ1lvNjFNcV9KYnBZcDVwaks1Y3VDYUVDQnduZF8tOFdHTXlLYzV2dzRJQzQ3SHk5MjE0dVFpa240WkhaZGVYcnJDcFkwNzhybDB2OVVPc3RVa3h6ZU9OVERCTndSdDJrRkFRNXRpTU5rcE04VXRkX0x1VnpjbWlkZlV5dFk4MGtmLVRXOTI1eXhsWkt2R2lpellmSmtxekVJYjh2QjkydUN1eUtsTmxSVXdoUm9OaWVfYzZqbjlyQ28zX2sxd2RyZmZMaUQyRmUyRmQ5QjJMTml4bHJSRHdyZzR1MloydHF5M3gtSzRXLUpjd1hsdTFiWU8tTUlrdU9rVUZ4cUNGNlRlS1BiMWFUeFlJNW5USDhsYmtfV2Rnd1lFcVR4SDUzTTktSUlCYXlBWEx3eTd6b2xiRDJGbVdPYnB1bnlmaUxkcDVhTG5oeG

In [10]:
# Cell 5 - Polygon ticker check

if not config.polygon_api_key:
    print('Polygon key missing; set POLYGON_API_KEY in .env.')
elif not selected_tickers:
    print('No tickers selected for Polygon check.')
else:
    trade_date = date.fromisoformat(DATE)
    os.environ['POLYGON_LIMIT'] = '1000'
    os.environ['POLYGON_MAX_PAGES'] = '1'
    polygon_df, polygon_failures = fetch_polygon_trades(config, selected_tickers, trade_date)
    if polygon_df.empty:
        print('Polygon returned no rows for these tickers.')
    else:
        counts = polygon_df['symbol'].value_counts().sort_index()
        print('Polygon rows per ticker:')
        print(counts)
        polygon_df.head()
    if polygon_failures:
        print('Polygon failures:', polygon_failures)


Polygon rows per ticker:
symbol
AMZN    1000
Name: count, dtype: int64
