# Senator Backtest EDA (Transaction vs Filing Date)

This notebook explores performance of US senators' disclosed trades,
comparing strategies that enter on the **transaction date** vs the
**filing date**. It is designed to be the experimental playground
before we formalize these calculations into persistent DB tables and
Streamlit views.

High-level steps in this notebook:
1. Load 5 years of PTR trades from the local database.
2. Ensure we have prices at **transaction** and **filing** dates.
3. Define parameterised strategies (TX vs FILING, HOLD_TODAY vs HOLD_N_DAYS, MIDPOINT vs EQUAL weight).
4. Compute per-trade PnL and aggregate per senator and per strategy.
5. Compute risk metrics (Sharpe, Calmar, drawdowns, etc.) and visualise results.

> NOTE: This is an initial skeleton. Fill in and iterate on the helper
> functions below as we refine the backtest design.


In [41]:
# --- Setup & Imports ---
from __future__ import annotations

import os
import datetime as dt
from typing import Literal
import sys

import numpy as np
import pandas as pd
import plotly.express as px
from dotenv import load_dotenv
from sqlalchemy import text

# Ensure DATABASE_URL (and any other secrets) from .env are loaded
# Ensure DATABASE_URL (and any other secrets) from .env are loaded
project_root = os.path.abspath(os.path.join(os.getcwd(), os.pardir))

# Make sure the project root (which contains db/, scraper/, app/) is on sys.path
if project_root not in sys.path:  # <--- NEW
    sys.path.insert(0, project_root)

load_dotenv(os.path.join(project_root, '.env'))
# Local project imports
from db.config import engine, SessionLocal
from db.models import Trade
from db.prices import get_price_on_or_before, get_latest_price

# Analysis helpers for senator-level backtests and EDA
from analysis_helpers import (
    load_trades_window as ah_load_trades_window,
    prepare_trades,
    add_basic_returns,
    add_multi_horizon_returns,
    add_quartile_flags,
    quartile_stats_for_senators,
    quartile_stats_for_tickers,
    top_senators_by_quarter,
)

print('Using DATABASE_URL:', os.getenv('DATABASE_URL'))


Using DATABASE_URL: mssql+pyodbc://nussifmain:nussif123456789!@insiderscraper.database.windows.net:1433/insiderscraper-dev-db?driver=ODBC+Driver+18+for+SQL+Server&Encrypt=yes&TrustServerCertificate=no&Connection+Timeout=30


In [42]:
# --- Data loading helpers ---
def load_trades_window(
    start_date: dt.date,
    end_date: dt.date,
) -> pd.DataFrame:
    """Load trades from the DB between filing_date [start_date, end_date].

    Optionally filter by chamber (defaults to 'Senate'). This mirrors the
    join used in app/data_access.py but exposes an arbitrary date window
    instead of a fixed 'last N days'.
    """

    params = {
        'start_date': start_date,
        'end_date': end_date,
    }


    query = text(
        f"""
        SELECT t.*, m.company_name, m.sector, m.industry
        FROM trades AS t
        LEFT JOIN ticker_metadata AS m
          ON t.ticker = m.ticker
        WHERE t.filing_date BETWEEN :start_date AND :end_date
        ORDER BY t.filing_date ASC
        """
    )

    with engine.connect() as conn:
        df = pd.read_sql(query, conn, params=params)
    return df

# Example usage (uncomment and tweak dates as needed):
today = dt.date.today()
five_years_ago = today - dt.timedelta(days=5 * 365)
trades_df = load_trades_window(five_years_ago, today)
trades_df.head()


Unnamed: 0,id,senator_name,senator_first_name,senator_last_name,senator_display_name,chamber,report_id,report_type,report_format,filing_date,...,amount_range_raw,amount_min,amount_max,mid_point,comment,price_at_transaction,current_price,company_name,sector,industry
0,1696,Thomas R Carper,Thomas R,Carper,"Carper, Thomas R. (Senator)",Senate,d6290ba2-3a9e-4774-bcd6-6bd2d90e4138,PTR,ptr,2024-03-04,...,"$1,001 - $15,000",1001.0,15000.0,8000.5,,21.8225,28.82,,,
1,1695,Thomas R Carper,Thomas R,Carper,"Carper, Thomas R. (Senator)",Senate,d6290ba2-3a9e-4774-bcd6-6bd2d90e4138,PTR,ptr,2024-03-04,...,"$1,001 - $15,000",1001.0,15000.0,8000.5,,137.759,116.69,,,
2,2185,Thomas H Tuberville,Thomas H,Tuberville,"Tuberville, Tommy (Senator)",Senate,2fc8e17b-0db3-404a-b617-b180804063c6,PTR,ptr,2024-03-15,...,"$50,001 - $100,000",50001.0,100000.0,75000.5,,,,,,
3,2184,Thomas H Tuberville,Thomas H,Tuberville,"Tuberville, Tommy (Senator)",Senate,2fc8e17b-0db3-404a-b617-b180804063c6,PTR,ptr,2024-03-15,...,"$50,001 - $100,000",50001.0,100000.0,75000.5,,20.5242,25.24,,,
4,1681,Shelley M Capito,Shelley M,Capito,"Capito, Shelley Moore (Senator)",Senate,94bc262a-9045-40d2-be53-18fd4ee6568c,PTR,ptr,2024-03-15,...,"$15,001 - $50,000",15001.0,50000.0,32500.5,,90.7402,97.49,,,


In [43]:
# --- Price at filing-date helper (in-memory) ---
def attach_price_at_filing(df: pd.DataFrame) -> pd.DataFrame:
    """Attach a `price_at_filing` column to the trades DataFrame.

    Uses the shared PriceCache via get_price_on_or_before, similar to how
    price_at_transaction is populated during ingest.
    """

    if df.empty or 'ticker' not in df.columns or 'filing_date' not in df.columns:
        df['price_at_filing'] = np.nan
        return df

    df = df.copy()
    key_pairs = (
        df[['ticker', 'filing_date']]
        .dropna()
        .drop_duplicates()
        .itertuples(index=False, name=None)
    )

    from collections import defaultdict
    price_map: dict[tuple[str, dt.date], float] = {}

    with SessionLocal() as session:
        for ticker, filing_date in key_pairs:
            if not ticker:
                continue
            price = get_price_on_or_before(session, ticker, filing_date)
            if price is not None:
                price_map[(ticker, filing_date)] = price

    df['price_at_filing'] = [
        price_map.get((t, d)) if (t is not None and d is not None) else np.nan
        for t, d in zip(df.get('ticker'), df.get('filing_date'))
    ]

    return df

# Example usage after loading trades_df:
trades_df = attach_price_at_filing(trades_df)
trades_df[['ticker', 'filing_date', 'price_at_filing']].head()


$SSBK: possibly delisted; no timezone found

1 Failed download:
['SSBK']: possibly delisted; no timezone found
$GOGL: possibly delisted; no timezone found

1 Failed download:
['GOGL']: possibly delisted; no timezone found
$ALTM: possibly delisted; no timezone found

1 Failed download:
['ALTM']: possibly delisted; no timezone found
$ETRN: possibly delisted; no timezone found

1 Failed download:
['ETRN']: possibly delisted; no timezone found
$LSXMK: possibly delisted; no timezone found

1 Failed download:
['LSXMK']: possibly delisted; no timezone found
$WBA: possibly delisted; no timezone found

1 Failed download:
['WBA']: possibly delisted; no timezone found
$PARA: possibly delisted; no timezone found

1 Failed download:
['PARA']: possibly delisted; no timezone found
$HES: possibly delisted; no timezone found

1 Failed download:
['HES']: possibly delisted; no timezone found
$SSBK: possibly delisted; no timezone found

1 Failed download:
['SSBK']: possibly delisted; no timezone found
$WB

Unnamed: 0,ticker,filing_date,price_at_filing
0,EQNR,2024-03-04,21.822527
1,TGT,2024-03-04,137.759003
2,SSBK,2024-03-15,
3,SBLK,2024-03-15,20.524195
4,SBUX,2024-03-15,90.740211


In [44]:
# --- Strategy engine ---
EntryMode = Literal['TX', 'FILING']
HoldMode = Literal['TODAY', 'N_DAYS']
SizeMode = Literal['MIDPOINT', 'EQUAL']

def apply_strategy(
    df: pd.DataFrame,
    entry_mode: EntryMode,
    hold_mode: HoldMode,
    hold_days: int | None,
    size_mode: SizeMode,
) -> pd.DataFrame:
    """Compute per-trade PnL for a given strategy configuration.

    Dimensions:
    * entry_mode: 'TX' (transaction_date) vs 'FILING' (filing_date).
    * hold_mode: 'TODAY' (mark-to-market) vs 'N_DAYS' (fixed horizon).
    * size_mode: 'MIDPOINT' (size ∝ mid_point) vs 'EQUAL' (1 unit per trade).
+
    Exit prices are obtained via PriceCache/yfinance using
    get_price_on_or_before / get_latest_price.
    """

    if df.empty:
        return df.assign(
            strategy_key=[],
            entry_date=[],
            exit_date=[],
            entry_price=[],
            exit_price=[],
            direction=[],
            notional=[],
            trade_return=[],
            pnl_dollars=[],
        )

    df = df.copy()

    # 1) Entry date & price
    if entry_mode == 'TX':
        entry_date = df['transaction_date']
        entry_price = df['price_at_transaction']
    elif entry_mode == 'FILING':
        entry_date = df['filing_date']
        entry_price = df['price_at_filing']
    else:
        raise ValueError(f'Unknown entry_mode: {entry_mode}')

    # 2) Exit date & price
    if hold_mode == 'TODAY':
        # Mark-to-market using latest available close per ticker.
        # We approximate exit_date as 'today' for all trades.
        exit_date = pd.Series(dt.date.today(), index=df.index)
        tickers = sorted({t for t in df['ticker'].dropna().unique() if t})
        latest_price_map: dict[str, float] = {}
        with SessionLocal() as session:
            for ticker in tickers:
                price = get_latest_price(session, ticker)
                if price is not None:
                    latest_price_map[ticker] = price

        exit_price = [
            latest_price_map.get(t) if t is not None else np.nan
            for t in df.get('ticker')
        ]
    elif hold_mode == 'N_DAYS':
        if hold_days is None or hold_days <= 0:
            raise ValueError('hold_days must be a positive int')
        # Target exit date = entry_date + hold_days (calendar).
        exit_date = entry_date + pd.to_timedelta(hold_days, unit='D')

        key_pairs = (
            df[['ticker']].assign(exit_date=exit_date)
            .dropna()
            .drop_duplicates()
            .itertuples(index=False, name=None)
        )

        price_map: dict[tuple[str, dt.date], float] = {}
        with SessionLocal() as session:
            for ticker, dt_exit in key_pairs:
                if not ticker or pd.isna(dt_exit):
                    continue
                # Convert pandas Timestamp -> date if needed
                if hasattr(dt_exit, 'date'):
                    target_date = dt_exit.date()
                else:
                    target_date = dt_exit
                price = get_price_on_or_before(session, ticker, target_date)
                if price is not None:
                    price_map[(ticker, target_date)] = price

        exit_price = []
        for t, d in zip(df.get('ticker'), exit_date):
            if t is None or pd.isna(d):
                exit_price.append(np.nan)
                continue
            if hasattr(d, 'date'):
                key = (t, d.date())
            else:
                key = (t, d)
            exit_price.append(price_map.get(key, np.nan))
    else:
        raise ValueError(f'Unknown hold_mode: {hold_mode}')

    exit_price = pd.Series(exit_price, index=df.index)

    # 3) Direction: BUY = +1, SELL = -1, ignore EXCHANGE (0)
    tx_type = df['transaction_type'].fillna('')
    direction = np.where(
        tx_type == 'BUY',
        1.0,
        np.where(tx_type == 'SELL', -1.0, 0.0),
    )

    # 4) Notional sizing
    if size_mode == 'MIDPOINT':
        notional = df['mid_point'].fillna(0.0)
    elif size_mode == 'EQUAL':
        notional = np.where(direction != 0.0, 1.0, 0.0)
    else:
        raise ValueError(f'Unknown size_mode: {size_mode}')

    # 5) Returns & PnL
    entry_price = pd.to_numeric(entry_price, errors='coerce')
    exit_price = pd.to_numeric(exit_price, errors='coerce')
    valid_mask = (entry_price > 0) & (exit_price > 0) & (direction != 0.0)
    raw_ret = np.where(
        valid_mask,
        (exit_price - entry_price) / entry_price,
        np.nan,
    )
    trade_return = direction * raw_ret
    pnl_dollars = notional * trade_return

    strategy_key = f'{entry_mode}_HOLD_{hold_mode}_{hold_days or 0}_{size_mode}'

    out = df.assign(
        strategy_key=strategy_key,
        entry_date=entry_date,
        exit_date=exit_date,
        entry_price=entry_price,
        exit_price=exit_price,
        direction=direction,
        notional=notional,
        trade_return=trade_return,
        pnl_dollars=pnl_dollars,
    )

    return out


In [35]:
# --- Metrics & risk helpers ---
def compute_sharpe(returns: pd.Series, periods_per_year: int = 252) -> float:
    """Simple Sharpe ratio from a return series (no risk-free rate).
    Assumes `returns` are per-period; we use 252 by default but note that
    here periods are trades, not days, so this is mainly for comparison
    between strategies rather than an absolute figure.
    """
    r = returns.dropna()
    if len(r) < 2:
        return float('nan')
    mu = r.mean()
    sigma = r.std(ddof=1)
    if sigma == 0:
        return float('nan')
    return float(mu / sigma * np.sqrt(periods_per_year))

def summarize_senator_strategy(trades_with_pnl: pd.DataFrame) -> pd.DataFrame:
    """Aggregate per-trade results into per-senator, per-strategy metrics.

    Expects columns: senator_name, strategy_key, trade_return, notional,
    pnl_dollars, entry_date, exit_date.
    """
    if trades_with_pnl.empty:
        return pd.DataFrame()

    df = trades_with_pnl.dropna(subset=['trade_return']).copy()

    def _agg(group: pd.DataFrame) -> pd.Series:
        g = group.sort_values('exit_date')
        rets = g['trade_return']
        # Equity curve under equal-weight re-investment assumption.
        equity = (1.0 + rets).cumprod()
        start_date = g['entry_date'].min()
        end_date = g['exit_date'].max()
        days = max((end_date - start_date).days, 1) if pd.notna(end_date) and pd.notna(start_date) else 1
        years = days / 365.25
        if years <= 0:
            cagr = float('nan')
        else:
            cagr = float(equity.iloc[-1] ** (1.0 / years) - 1.0)
        drawdown = equity / equity.cummax() - 1.0
        max_dd = float(drawdown.min())
        calmar = float('nan')
        if max_dd < 0 and not np.isnan(cagr):
            calmar = float(cagr / abs(max_dd))
        sharpe = compute_sharpe(rets)
        return pd.Series({
            'n_trades': len(g),
            'total_notional': g['notional'].sum(),
            'total_pnl': g['pnl_dollars'].sum(),
            'avg_return': rets.mean(),
            'median_return': rets.median(),
            'win_rate': (rets > 0).mean(),
            'sharpe': sharpe,
            'cagr': cagr,
            'max_drawdown': max_dd,
            'calmar': calmar,
        })

    summary = (
        df.groupby(['senator_name', 'strategy_key'], as_index=False)
          .apply(_agg)
          .reset_index(drop=True)
    )
    return summary


In [45]:
# --- Example workflow: run a few strategies and visualise rankings ---
# NOTE: This cell can be expensive over the full 5-year window because it
# will trigger price lookups via yfinance. Consider starting with a shorter
# date range while iterating.

# At the top of your EDA section:
if 'trades_with_filing_price' not in globals():
    today = dt.date.today()
    start = today - dt.timedelta(days=5 * 365)
    trades = load_trades_window(start, today)
    trades_with_filing_price = attach_price_at_filing(trades)
else:
    print("Reusing trades_with_filing_price from previous cell.")

# Then always use trades_with_filing_price downstream
trades = trades_with_filing_price


configs = [
    ('TX', 'TODAY', None, 'MIDPOINT'),
    ('FILING', 'TODAY', None, 'MIDPOINT'),
]
results = []
for entry_mode, hold_mode, hold_days, size_mode in configs:
    res = apply_strategy(trades, entry_mode, hold_mode, hold_days, size_mode)
    results.append(res)
all_trades = pd.concat(results, ignore_index=True)

summary = summarize_senator_strategy(all_trades)
strategy_to_plot = 'TX_HOLD_TODAY_0_MIDPOINT'
plot_df = (
    summary[summary['strategy_key'] == strategy_to_plot]
    .sort_values('sharpe', ascending=False)
    .head(20)
)

fig = px.bar(
    plot_df,
    x='senator_name',
    y='sharpe',
    title=f'Top senators by Sharpe ({strategy_to_plot})',
)
fig.show()


Reusing trades_with_filing_price from previous cell.


$BRK.B: possibly delisted; no timezone found

1 Failed download:
['BRK.B']: possibly delisted; no timezone found
$BSCP: possibly delisted; no price data found  (1d 2026-01-23 -> 2026-02-23)

1 Failed download:
['BSCP']: possibly delisted; no price data found  (1d 2026-01-23 -> 2026-02-23)
$IBDQ: possibly delisted; no price data found  (1d 2026-01-23 -> 2026-02-23)

1 Failed download:
['IBDQ']: possibly delisted; no price data found  (1d 2026-01-23 -> 2026-02-23)
$K: possibly delisted; no price data found  (1d 2026-01-23 -> 2026-02-23) (Yahoo error = "No data found, symbol may be delisted")

1 Failed download:
['K']: possibly delisted; no price data found  (1d 2026-01-23 -> 2026-02-23) (Yahoo error = "No data found, symbol may be delisted")
$BRK.B: possibly delisted; no timezone found

1 Failed download:
['BRK.B']: possibly delisted; no timezone found
$BSCP: possibly delisted; no price data found  (1d 2026-01-23 -> 2026-02-23)

1 Failed download:
['BSCP']: possibly delisted; no price da

In [37]:
all_trades['senator_name'].nunique(), all_trades['senator_name'].value_counts().head()
summary['senator_name'].nunique(), summary['strategy_key'].unique()


(13,
 array(['FILING_HOLD_TODAY_0_MIDPOINT', 'TX_HOLD_TODAY_0_MIDPOINT'],
       dtype=object))

In [38]:
# --- Analysis using analysis_helpers: multi-horizon returns & quartiles ---

import datetime as dt
from IPython.display import display

# Parameter: lookback window for analysis (by filing_date)
lookback_days = 365  # adjust as needed (e.g. 90 for last quarter)
today = dt.date.today()
start = today - dt.timedelta(days=lookback_days)

# 1) Load raw trades from DB using the shared helpers
trades_raw_ah = ah_load_trades_window(start, today, chamber="Senate")
print(f"Loaded {len(trades_raw_ah)} raw trades between {start} and {today}.")

# 2) Prepare trades and add return metrics based on price_at_transaction/current_price
trades_prep = prepare_trades(trades_raw_ah)
trades_prep = add_basic_returns(trades_prep)
trades_prep = add_multi_horizon_returns(trades_prep, horizons=(1, 7, 30))
print(f"Prepared {len(trades_prep)} trades with non-null pricing.")

# 3) Global distribution summary across horizons (senator perspective)
metrics = [
    'pct_return',
    'ret_close_close_1d',
    'ret_close_close_7d',
    'ret_close_close_30d',
]
df_long = trades_prep.melt(
    id_vars=['id', 'senator_display_name', 'ticker'],
    value_vars=metrics,
    var_name='horizon',
    value_name='ret',
).dropna(subset=['ret'])

horizon_summary = (
    df_long.groupby('horizon')['ret']
    .agg(['count', 'mean', 'median', 'std', 'min', 'max'])
    .reset_index()
)
print("\nGlobal return distribution by horizon:")
display(horizon_summary)

# 4) Quartile-based "suspiciousness" metrics for senators and tickers
metric = 'ret_close_close_7d'  # use 7-day horizon as a baseline
trades_q = add_quartile_flags(trades_prep, metric)

sen_q_stats = quartile_stats_for_senators(trades_q, metric)
sen_q_stats = sen_q_stats.sort_values('top_share', ascending=False)
print("\nTop senators by share of trades in top quartile (7d close-close):")
display(sen_q_stats.head(20))

ticker_q_stats = quartile_stats_for_tickers(trades_q, metric)
ticker_q_stats = ticker_q_stats.sort_values('top_share', ascending=False)
print("\nTop tickers by share of trades in top quartile (7d close-close):")
display(ticker_q_stats.head(20))

# 5) Top senators by quarter using the same 7-day horizon
sen_by_quarter = top_senators_by_quarter(
    trades_prep,
    metric=metric,
    min_trades=3,
    top_k=10,
)
print("\nTop senators by quarter (7d close-close average returns):")
display(sen_by_quarter.head(30))


Loaded 566 raw trades between 2025-02-22 and 2026-02-22.
Prepared 551 trades with non-null pricing.

Global return distribution by horizon:


Unnamed: 0,horizon,count,mean,median,std,min,max
0,pct_return,551,-0.0119,-0.019781,0.56688,-3.089948,3.175944
1,ret_close_close_1d,551,-0.000384,0.0,0.00999,-0.180249,0.056638
2,ret_close_close_30d,551,-0.001114,-0.0,0.065838,-0.501043,0.48395
3,ret_close_close_7d,551,0.000299,0.0,0.021502,-0.219407,0.18532



Top senators by share of trades in top quartile (7d close-close):


Unnamed: 0,senator_display_name,n_trades,top_share,bottom_share,avg_ret,med_ret,win_rate
0,"Banks, James E. (Senator)",1,1.0,1.0,0.0,-0.0,0.0
9,"Kennedy, John (Senator)",1,1.0,1.0,0.0,0.0,0.0
18,"Smith, Tina (Senator)",3,1.0,1.0,0.0,-0.0,0.0
17,"Schiff, Adam B. (Senator)",1,1.0,1.0,0.0,-0.0,0.0
16,"Peters, Gary (Senator)",6,1.0,1.0,0.0,-0.0,0.0
14,"Moreno, Bernardo (Senator)",1,1.0,1.0,0.0,-0.0,0.0
13,"Moran, Jerry (Senator)",2,1.0,1.0,0.0,0.0,0.0
10,"King, Angus (Senator)",26,1.0,0.961538,0.004081,0.0,0.038462
11,"McConnell, A. Mitchell Jr. (Senator)",6,1.0,0.666667,0.012549,0.0,0.333333
8,"Justice II, James Conley (Senator)",1,1.0,1.0,0.0,-0.0,0.0



Top tickers by share of trades in top quartile (7d close-close):


Unnamed: 0,ticker,n_trades,top_share,bottom_share,avg_ret,med_ret,win_rate
107,JMBS,3,1.0,1.0,0.0,0.0,0.0
147,PANW,1,1.0,1.0,0.0,0.0,0.0
136,NLY,1,1.0,1.0,0.0,0.0,0.0
137,NOW,1,1.0,1.0,0.0,0.0,0.0
138,NRG,1,1.0,1.0,0.0,-0.0,0.0
140,OC,1,1.0,1.0,0.0,-0.0,0.0
141,OGN,1,1.0,1.0,0.0,-0.0,0.0
142,OLLI,1,1.0,1.0,0.0,0.0,0.0
143,OMC,2,1.0,0.5,0.009896,0.009896,0.5
144,ONON,1,1.0,1.0,0.0,0.0,0.0



Top senators by quarter (7d close-close average returns):


Unnamed: 0,senator_display_name,trade_quarter,n_trades,avg_ret,med_ret,win_rate
0,"Mullin, Markwayne (Senator)",2023Q1,9,0.0,0.0,0.0
1,"Mullin, Markwayne (Senator)",2023Q4,9,0.0,-0.0,0.0
2,"Mullin, Markwayne (Senator)",2024Q1,26,0.0,0.0,0.0
3,"Mullin, Markwayne (Senator)",2025Q1,23,-0.000912,0.0,0.043478
4,"Boozman, John (Senator)",2025Q1,26,-0.001271,-0.0,0.115385
5,"Capito, Shelley Moore (Senator)",2025Q1,8,-0.005252,-0.0,0.125
6,"McCormick, David H. (Senator)",2025Q1,15,-0.012813,-0.009069,0.2
7,"Whitehouse, Sheldon (Senator)",2025Q2,6,0.020775,-0.0,0.166667
8,"Tuberville, Tommy (Senator)",2025Q2,15,0.012355,-0.0,0.066667
9,"Capito, Shelley Moore (Senator)",2025Q2,10,0.008191,-0.0,0.2


In [40]:
import os
import datetime as dt
import pandas as pd
import numpy as np
from IPython.display import display
from dotenv import load_dotenv

# Setup paths and env
project_root = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
load_dotenv(os.path.join(project_root, '.env'))

from db.config import SessionLocal
from analysis_helpers import (
    load_trades_window,
    prepare_trades,
    add_basic_returns,
    add_multi_horizon_returns,
    add_quartile_flags,
    quartile_stats_for_senators
)

# --- 1. DATA LOADING ---
lookback_days = 365 
today = dt.date.today()
start = today - dt.timedelta(days=lookback_days)

trades_raw = load_trades_window(start, today)

# --- 2. THE REPAIR STEP ---
# We run the preparation. If you updated the price helper above, 
# this will now pull actual 7-day and 30-day prices.
trades_prep = prepare_trades(trades_raw)
trades_prep = add_basic_returns(trades_prep)
trades_prep = add_multi_horizon_returns(trades_prep, horizons=(1, 7, 30))

# --- 3. SANITY CHECK (Crucial) ---
# Check if we still have the "Zero Return" bug
zero_rate = (trades_prep['ret_close_close_7d'] == 0).mean()
print(f"Percentage of trades with exactly 0.0 return: {zero_rate:.2%}")
if zero_rate > 0.50:
    print("WARNING: Bug still present. Prices are not fetching forward.")

# --- 4. ANALYSIS ---
metric = 'ret_close_close_7d'
# Add quartile flags with a "Strict" check to avoid 0s falling into both buckets
trades_q = add_quartile_flags(trades_prep, metric)

# If q25 and q75 are the same (0), the stats will still be broken.
# This helper identifies the top performers.
sen_stats = quartile_stats_for_senators(trades_q, metric)

print("\nTop Senators by 7-day Outperformance:")
display(sen_stats.sort_values('top_share', ascending=False).head(10))

# --- 5. CLEAN VISUALIZATION ---
import plotly.express as px
fig = px.histogram(trades_prep, x=metric, title="Distribution of 7-Day Returns", 
                   nbins=50, marginal="rug")
fig.show()

Percentage of trades with exactly 0.0 return: 88.57%

Top Senators by 7-day Outperformance:


Unnamed: 0,senator_display_name,n_trades,top_share,bottom_share,avg_ret,med_ret,win_rate
0,"Banks, James E. (Senator)",1,1.0,1.0,0.0,-0.0,0.0
9,"Kennedy, John (Senator)",1,1.0,1.0,0.0,0.0,0.0
18,"Smith, Tina (Senator)",3,1.0,1.0,0.0,-0.0,0.0
17,"Schiff, Adam B. (Senator)",1,1.0,1.0,0.0,-0.0,0.0
16,"Peters, Gary (Senator)",6,1.0,1.0,0.0,-0.0,0.0
14,"Moreno, Bernardo (Senator)",1,1.0,1.0,0.0,-0.0,0.0
13,"Moran, Jerry (Senator)",2,1.0,1.0,0.0,0.0,0.0
10,"King, Angus (Senator)",26,1.0,0.961538,0.004081,0.0,0.038462
11,"McConnell, A. Mitchell Jr. (Senator)",6,1.0,0.666667,0.012549,0.0,0.333333
8,"Justice II, James Conley (Senator)",1,1.0,1.0,0.0,-0.0,0.0


In [None]:

today = dt.date.today()
start = today - dt.timedelta(days=365 * 3)  # or 365, etc.

trades_raw = load_trades_window(start, today)
print(len(trades_raw), "rows")
print(trades_raw.columns.tolist())
trades_raw.head()

1004 rows
['id', 'senator_name', 'senator_first_name', 'senator_last_name', 'senator_display_name', 'chamber', 'report_id', 'report_type', 'report_format', 'filing_date', 'transaction_date', 'owner', 'ticker', 'asset_name', 'asset_type', 'transaction_type', 'transaction_type_raw', 'amount_range_raw', 'amount_min', 'amount_max', 'mid_point', 'comment', 'price_at_transaction', 'current_price', 'company_name', 'sector', 'industry']


Unnamed: 0,id,senator_name,senator_first_name,senator_last_name,senator_display_name,chamber,report_id,report_type,report_format,filing_date,...,amount_range_raw,amount_min,amount_max,mid_point,comment,price_at_transaction,current_price,company_name,sector,industry
0,1696,Thomas R Carper,Thomas R,Carper,"Carper, Thomas R. (Senator)",Senate,d6290ba2-3a9e-4774-bcd6-6bd2d90e4138,PTR,ptr,2024-03-04,...,"$1,001 - $15,000",1001.0,15000.0,8000.5,,21.8225,28.82,,,
1,1695,Thomas R Carper,Thomas R,Carper,"Carper, Thomas R. (Senator)",Senate,d6290ba2-3a9e-4774-bcd6-6bd2d90e4138,PTR,ptr,2024-03-04,...,"$1,001 - $15,000",1001.0,15000.0,8000.5,,137.759,116.69,,,
2,2185,Thomas H Tuberville,Thomas H,Tuberville,"Tuberville, Tommy (Senator)",Senate,2fc8e17b-0db3-404a-b617-b180804063c6,PTR,ptr,2024-03-15,...,"$50,001 - $100,000",50001.0,100000.0,75000.5,,,,,,
3,2184,Thomas H Tuberville,Thomas H,Tuberville,"Tuberville, Tommy (Senator)",Senate,2fc8e17b-0db3-404a-b617-b180804063c6,PTR,ptr,2024-03-15,...,"$50,001 - $100,000",50001.0,100000.0,75000.5,,20.5242,25.24,,,
4,1681,Shelley M Capito,Shelley M,Capito,"Capito, Shelley Moore (Senator)",Senate,94bc262a-9045-40d2-be53-18fd4ee6568c,PTR,ptr,2024-03-15,...,"$15,001 - $50,000",15001.0,50000.0,32500.5,,90.7402,97.49,,,


In [54]:
cols_to_drop = [
    "report_id",
    "senator_first_name",
    "senator_last_name",
    "senator_display_name",
    "report_type",
    "report_format",
    "comment",
    "company_name",
    "sector",
    "industry"
]

trades_raw = trades_raw.drop(columns=cols_to_drop, errors="ignore")
trades_raw.columns
trades_raw.head()

Unnamed: 0,id,senator_name,chamber,filing_date,transaction_date,owner,ticker,asset_name,asset_type,transaction_type,transaction_type_raw,amount_range_raw,amount_min,amount_max,mid_point,price_at_transaction,current_price
0,1696,Thomas R Carper,Senate,2024-03-04,2024-02-16,Spouse,EQNR,Equinor ASA,Stock,SELL,Sale (Full),"$1,001 - $15,000",1001.0,15000.0,8000.5,21.8225,28.82
1,1695,Thomas R Carper,Senate,2024-03-04,2024-02-16,Spouse,TGT,Target Corp,Stock,BUY,Purchase,"$1,001 - $15,000",1001.0,15000.0,8000.5,137.759,116.69
2,2185,Thomas H Tuberville,Senate,2024-03-15,2024-02-27,Joint,SSBK,"Southern States Bancshares, Inc. - Common Stock",Stock,BUY,Purchase,"$50,001 - $100,000",50001.0,100000.0,75000.5,,
3,2184,Thomas H Tuberville,Senate,2024-03-15,2024-02-27,Joint,SBLK,Star Bulk Carriers Corp. - Common Shares,Stock,BUY,Purchase,"$50,001 - $100,000",50001.0,100000.0,75000.5,20.5242,25.24
4,1681,Shelley M Capito,Senate,2024-03-15,2024-02-23,Spouse,SBUX,Starbucks Corp,Stock,BUY,Purchase,"$15,001 - $50,000",15001.0,50000.0,32500.5,90.7402,97.49


In [60]:
trades_prep = prepare_trades(trades_raw)
print(f"After prepare_trades: {len(trades_prep)} trades")

# Add per-trade pnl and returns using price_at_transaction & current_price
trades_prep = add_basic_returns(trades_prep)
trades_prep.head(10)
names = ["A. Mitchell McConnell, Jr."]
trades_prep_subset = trades_prep[trades_prep["senator_name"].isin(names)]
trades_prep_subset

After prepare_trades: 966 trades


Unnamed: 0,id,senator_name,chamber,filing_date,transaction_date,owner,ticker,asset_name,asset_type,transaction_type,...,price_at_transaction,current_price,transaction_type_norm,direction,trade_quarter,shares_est,pnl,pct_return,days_held,ann_return
14,1798,"A. Mitchell McConnell, Jr.",Senate,2024-03-19,2024-03-05,Spouse,WFC,Wells Fargo & Company,Stock,BUY,...,54.083,88.7,BUY,1.0,2024Q1,147.930033,5120.893969,0.640072,719,0.285509
163,1791,"A. Mitchell McConnell, Jr.",Senate,2024-06-13,2024-06-04,Spouse,WFC,Wells Fargo & Company,Stock,BUY,...,54.083,88.7,BUY,1.0,2024Q2,147.930033,5120.893969,0.640072,628,0.333154
298,1794,"A. Mitchell McConnell, Jr.",Senate,2024-09-16,2024-09-01,Spouse,WFC,Wells Fargo & Company,Stock,BUY,...,54.083,88.7,BUY,1.0,2024Q3,147.930033,5120.893969,0.640072,539,0.39798
386,1793,"A. Mitchell McConnell, Jr.",Senate,2025-01-04,2024-12-02,Spouse,WFC,Wells Fargo & Company,Stock,BUY,...,54.083,88.7,BUY,1.0,2024Q4,147.930033,5120.893969,0.640072,447,0.497777
494,1795,"A. Mitchell McConnell, Jr.",Senate,2025-03-12,2025-03-03,Spouse,WFC,Wells Fargo & Company,Stock,BUY,...,75.3934,88.7,BUY,1.0,2025Q1,106.116716,1412.05269,0.176496,356,0.18134
605,1797,"A. Mitchell McConnell, Jr.",Senate,2025-06-19,2025-06-01,Spouse,WFC,Wells Fargo & Company,Stock,BUY,...,73.5927,88.7,BUY,1.0,2025Q2,108.713228,1642.363355,0.205283,266,0.292018
614,1792,"A. Mitchell McConnell, Jr.",Senate,2025-07-07,2025-06-26,Spouse,LAZR,Luminar Technologies Inc,Stock,SELL,...,2.95,0.0566,SELL,-1.0,2025Q2,11017.118644,31876.931085,0.980814,241,1.815635
719,1796,"A. Mitchell McConnell, Jr.",Senate,2025-09-12,2025-09-01,Spouse,WFC,Wells Fargo & Company,Stock,BUY,...,73.5534,88.7,BUY,1.0,2025Q3,108.771314,1647.515591,0.205927,174,0.481109
842,1355,"A. Mitchell McConnell, Jr.",Senate,2025-12-19,2025-12-01,Spouse,WFC,Wells Fargo & Company,Stock,BUY,...,84.9823,88.7,BUY,1.0,2025Q4,94.143133,349.995927,0.043747,83,0.207185
861,1356,"A. Mitchell McConnell, Jr.",Senate,2026-01-13,2025-12-24,Spouse,LAZR,Luminar Technologies Inc,Stock,SELL,...,0.153,0.0566,SELL,-1.0,2025Q4,52290.849673,5040.837908,0.630065,60,18.539519


In [61]:
import numpy as np

senator_pnl = (
    trades_prep
    .groupby("senator_name")
    .agg(
        n_trades=("id", "count"),
        total_notional=("mid_point", "sum"),
        total_pnl=("pnl", "sum"),
        avg_return=("pct_return", "mean"),
        median_return=("pct_return", "median"),
        win_rate=("pct_return", lambda x: (x > 0).mean()),
    )
    .reset_index()
)
senator_pnl.head()

# Optional: focus on senators with a reasonable number of trades
min_trades = 5
senator_pnl_filtered = senator_pnl[senator_pnl["n_trades"] >= min_trades]

# Rank by whatever metric you care about
senator_pnl_by_pnl = senator_pnl_filtered.sort_values("total_pnl", ascending=False)
senator_pnl_by_return = senator_pnl_filtered.sort_values("avg_return", ascending=False)

senator_pnl_by_pnl.head(20)



Unnamed: 0,senator_name,n_trades,total_notional,total_pnl,avg_return,median_return,win_rate
0,"A. Mitchell McConnell, Jr.",10,104505.0,62453.27243,0.480262,0.635069,1.0
17,Markwayne Mullin,192,9761596.0,53586.73768,0.077134,0.030002,0.526042
2,"Angus S King, Jr.",27,216013.5,46200.300154,0.213877,0.189768,0.703704
14,Katie Britt,24,192012.0,20887.88077,0.108784,0.162564,0.541667
19,Ron L Wyden,5,341002.5,17367.781616,-0.025054,-0.114839,0.4
6,Gary C Peters,14,210007.0,-3759.684887,-0.068585,0.056828,0.571429
15,Lindsey Graham,7,512503.5,-6402.017012,-0.028426,0.026994,0.714286
24,Thomas R Carper,32,323016.0,-16651.303714,-0.172678,-0.113115,0.46875
21,Shelley M Capito,93,915546.5,-28541.447457,-0.032372,-0.100142,0.419355
25,Tina Smith,6,850003.0,-57824.049786,0.012031,-0.131526,0.333333
