# Strategy Analysis

Full probability analysis for two strategies:
1. **Favorite-Longshot Bias** - Portfolio of extreme favorites
2. **BTC $1M** - Options-priced digital bet

In [None]:
import asyncio
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm, binom
from scipy.special import comb
from datetime import datetime

from src.adapters import PolymarketAdapter

import nest_asyncio
nest_asyncio.apply()

plt.style.use('seaborn-v0_8-whitegrid')
np.random.seed(42)

print(f"Analysis: {datetime.now().strftime('%Y-%m-%d %H:%M')}")

---
# 1. Favorite-Longshot Bias

## Data Collection

In [None]:
async def fetch_markets(limit=500):
    adapter = PolymarketAdapter()
    await adapter.connect()
    return await adapter.get_markets(active_only=True, limit=limit)

markets = asyncio.run(fetch_markets(500))
print(f"Fetched {len(markets)} markets")

In [None]:
# Edge model from research
def estimate_edge(price):
    if price >= 0.99: return 0.005
    elif price >= 0.97: return 0.02
    elif price >= 0.95: return 0.03
    elif price >= 0.93: return 0.025
    else: return 0.02

# Collect positions
positions = []
for m in markets:
    if not m.is_active or not m.volume or m.volume < 50000:
        continue
    for side, price in [('YES', m.yes_price), ('NO', m.no_price)]:
        if 0.93 <= price < 0.99:  # Focus on tradeable range
            positions.append({
                'market': m.question[:40] if m.question else 'Unknown',
                'side': side,
                'price': price,
                'edge': estimate_edge(price),
                'volume': m.volume,
            })

df = pd.DataFrame(positions).sort_values('price', ascending=False)
print(f"Positions in 93-99% range: {len(df)}")
df.head(10)

## Portfolio Math

For a portfolio of n independent bets at price p with true probability p + edge:

- **Stake per bet**: $25
- **Profit if win**: $25 Ã— (1-p)/p
- **Loss if lose**: $25
- **P(win)**: p + edge
- **P(loss)**: 1 - (p + edge)

In [None]:
# Single bet analysis
def analyze_single_bet(price, edge, stake=25):
    true_prob = min(price + edge, 0.9999)
    profit_if_win = stake * (1 - price) / price
    loss_if_lose = stake
    
    ev = true_prob * profit_if_win - (1 - true_prob) * loss_if_lose
    
    return {
        'price': price,
        'edge': edge,
        'true_prob': true_prob,
        'profit_win': profit_if_win,
        'loss_lose': loss_if_lose,
        'ev': ev,
        'p_win': true_prob,
        'p_lose': 1 - true_prob,
    }

print("Single Bet Analysis")
print("=" * 70)
print(f"{'Price':<8} {'Edge':<8} {'TrueP':<8} {'Win$':<10} {'Lose$':<10} {'EV':<10} {'P(lose)':<10}")
print("-" * 70)
for price in [0.93, 0.95, 0.96, 0.97, 0.98]:
    r = analyze_single_bet(price, estimate_edge(price))
    print(f"{r['price']:.0%}{'':>4} {r['edge']:.1%}{'':>4} {r['true_prob']:.1%}{'':>4} "
          f"+${r['profit_win']:.2f}{'':>4} -${r['loss_lose']:.2f}{'':>5} "
          f"${r['ev']:+.2f}{'':>5} {r['p_lose']:.2%}")

In [None]:
# Define portfolio
# Take top positions by volume in 95-97% range
tier1 = df[(df['price'] >= 0.95) & (df['price'] < 0.97)].head(10)
tier2 = df[(df['price'] >= 0.93) & (df['price'] < 0.95)].head(5)

portfolio = pd.concat([tier1, tier2]).reset_index(drop=True)
portfolio['stake'] = 25
portfolio['true_prob'] = portfolio.apply(lambda r: min(r['price'] + r['edge'], 0.9999), axis=1)
portfolio['profit_win'] = portfolio.apply(lambda r: 25 * (1 - r['price']) / r['price'], axis=1)
portfolio['loss_lose'] = 25

print(f"Portfolio: {len(portfolio)} positions")
print(f"Total capital: ${portfolio['stake'].sum()}")
print()
portfolio[['side', 'price', 'edge', 'true_prob', 'profit_win', 'market']]

## Outcome Distribution

Calculate the exact probability distribution of portfolio outcomes.

In [None]:
def portfolio_distribution(positions_df):
    """
    Calculate exact distribution of portfolio P&L.
    
    For n positions, there are 2^n possible outcomes.
    We enumerate all of them for exact probabilities.
    """
    n = len(positions_df)
    probs = positions_df['true_prob'].values
    profits = positions_df['profit_win'].values
    losses = positions_df['loss_lose'].values
    
    outcomes = []
    
    # Enumerate all 2^n outcomes
    for i in range(2**n):
        # Binary representation: 1 = win, 0 = lose
        wins = [(i >> j) & 1 for j in range(n)]
        
        # Calculate probability of this outcome
        prob = 1.0
        for j, w in enumerate(wins):
            prob *= probs[j] if w else (1 - probs[j])
        
        # Calculate P&L
        pnl = sum(profits[j] if wins[j] else -losses[j] for j in range(n))
        
        outcomes.append({
            'wins': sum(wins),
            'losses': n - sum(wins),
            'prob': prob,
            'pnl': pnl,
        })
    
    return pd.DataFrame(outcomes)

# Calculate distribution
dist = portfolio_distribution(portfolio)

# Aggregate by number of wins
by_wins = dist.groupby('wins').agg({
    'prob': 'sum',
    'pnl': 'mean',  # All outcomes with same # wins have same P&L
}).reset_index()

print("Outcome Distribution by Number of Wins")
print("=" * 60)
print(f"{'Wins':<8} {'Losses':<8} {'Probability':<15} {'P&L':<15}")
print("-" * 60)
for _, row in by_wins.iterrows():
    losses = len(portfolio) - row['wins']
    print(f"{int(row['wins']):<8} {int(losses):<8} {row['prob']:.2%}{'':>8} ${row['pnl']:+.2f}")

In [None]:
# Summary statistics
total_capital = portfolio['stake'].sum()

# P(profit) = P(P&L > 0)
p_profit = dist[dist['pnl'] > 0]['prob'].sum()
p_breakeven = dist[dist['pnl'] == 0]['prob'].sum()
p_loss = dist[dist['pnl'] < 0]['prob'].sum()

# Expected value
ev = (dist['prob'] * dist['pnl']).sum()

# Percentiles
dist_sorted = dist.sort_values('pnl')
dist_sorted['cum_prob'] = dist_sorted['prob'].cumsum()

def percentile_pnl(p):
    return dist_sorted[dist_sorted['cum_prob'] >= p].iloc[0]['pnl']

p5 = percentile_pnl(0.05)
p25 = percentile_pnl(0.25)
p50 = percentile_pnl(0.50)
p75 = percentile_pnl(0.75)
p95 = percentile_pnl(0.95)

# Worst case
worst = dist['pnl'].min()
best = dist['pnl'].max()

print("=" * 60)
print("PORTFOLIO SUMMARY")
print("=" * 60)
print(f"\nPositions: {len(portfolio)}")
print(f"Total Capital: ${total_capital}")
print(f"\nPROBABILITIES")
print(f"  P(profit):     {p_profit:.1%}")
print(f"  P(breakeven):  {p_breakeven:.1%}")
print(f"  P(loss):       {p_loss:.1%}")
print(f"\nEXPECTED VALUE")
print(f"  E[P&L]: ${ev:+.2f}")
print(f"  E[Return]: {ev/total_capital*100:+.2f}%")
print(f"\nDISTRIBUTION")
print(f"  Worst case:  ${worst:.2f} ({worst/total_capital*100:.0f}%)")
print(f"  5th pct:     ${p5:.2f}")
print(f"  25th pct:    ${p25:.2f}")
print(f"  Median:      ${p50:.2f}")
print(f"  75th pct:    ${p75:.2f}")
print(f"  95th pct:    ${p95:.2f}")
print(f"  Best case:   ${best:.2f}")

In [None]:
# Visualize distribution
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# P&L distribution
ax1 = axes[0]
colors = ['#d62728' if pnl < 0 else '#2ca02c' for pnl in by_wins['pnl']]
bars = ax1.bar(by_wins['wins'], by_wins['prob'] * 100, color=colors, edgecolor='black', alpha=0.7)
ax1.set_xlabel('Number of Wins')
ax1.set_ylabel('Probability (%)')
ax1.set_title(f'Outcome Distribution (n={len(portfolio)})', fontweight='bold')
ax1.set_xticks(range(len(portfolio) + 1))

# Add P&L labels
for bar, (_, row) in zip(bars, by_wins.iterrows()):
    height = bar.get_height()
    ax1.annotate(f'${row["pnl"]:+.0f}',
                xy=(bar.get_x() + bar.get_width()/2, height),
                xytext=(0, 3), textcoords='offset points',
                ha='center', va='bottom', fontsize=8)

# Cumulative distribution
ax2 = axes[1]
pnl_sorted = np.sort(dist['pnl'].unique())
cum_probs = [dist[dist['pnl'] <= pnl]['prob'].sum() for pnl in pnl_sorted]
ax2.step(pnl_sorted, np.array(cum_probs) * 100, where='post', linewidth=2)
ax2.axhline(50, color='gray', linestyle='--', alpha=0.5, label='Median')
ax2.axvline(0, color='black', linewidth=1)
ax2.fill_between(pnl_sorted, 0, np.array(cum_probs) * 100, 
                  where=(pnl_sorted < 0), alpha=0.3, color='red', label='Loss region')
ax2.set_xlabel('P&L ($)')
ax2.set_ylabel('Cumulative Probability (%)')
ax2.set_title('Cumulative Distribution', fontweight='bold')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\nP(loss) = {p_loss:.1%} (red bars / shaded area)")
print(f"P(profit) = {p_profit:.1%} (green bars)")

## Scenario Analysis: What If Edge Is Wrong?

In [None]:
def analyze_scenario(positions_df, edge_multiplier):
    """Analyze portfolio under different edge assumptions."""
    df = positions_df.copy()
    df['adj_edge'] = df['edge'] * edge_multiplier
    df['true_prob'] = df.apply(lambda r: min(r['price'] + r['adj_edge'], 0.9999), axis=1)
    
    dist = portfolio_distribution(df)
    
    p_profit = dist[dist['pnl'] > 0]['prob'].sum()
    p_loss = dist[dist['pnl'] < 0]['prob'].sum()
    ev = (dist['prob'] * dist['pnl']).sum()
    
    return {'p_profit': p_profit, 'p_loss': p_loss, 'ev': ev}

scenarios = [
    (0.0, 'No edge (market is fair)'),
    (0.5, 'Half the estimated edge'),
    (1.0, 'Full estimated edge'),
    (1.5, 'Edge 50% higher than estimated'),
]

print("Scenario Analysis: What If Edge Estimate Is Wrong?")
print("=" * 70)
print(f"{'Scenario':<35} {'P(profit)':<12} {'P(loss)':<12} {'E[P&L]':<12}")
print("-" * 70)

for mult, desc in scenarios:
    r = analyze_scenario(portfolio, mult)
    print(f"{desc:<35} {r['p_profit']:.1%}{'':>5} {r['p_loss']:.1%}{'':>5} ${r['ev']:+.2f}")

## Correlation Risk

The above assumes independent outcomes. In reality, positions may be correlated (e.g., multiple sports bets, political events).

In [None]:
def monte_carlo_correlated(positions_df, n_sims=10000, correlation=0.0):
    """
    Monte Carlo with correlated outcomes.
    Uses a common factor model: each outcome has correlation 'rho' with a common factor.
    """
    n = len(positions_df)
    probs = positions_df['true_prob'].values
    profits = positions_df['profit_win'].values
    losses = positions_df['loss_lose'].values
    
    pnls = []
    
    for _ in range(n_sims):
        # Common factor
        common = np.random.normal(0, 1)
        
        # Individual factors
        individual = np.random.normal(0, 1, n)
        
        # Combined (with correlation)
        z = np.sqrt(correlation) * common + np.sqrt(1 - correlation) * individual
        
        # Convert to uniform [0,1] via CDF
        u = norm.cdf(z)
        
        # Determine wins
        wins = u < probs
        
        # Calculate P&L
        pnl = sum(profits[j] if wins[j] else -losses[j] for j in range(n))
        pnls.append(pnl)
    
    return np.array(pnls)

# Run with different correlations
correlations = [0.0, 0.2, 0.4, 0.6]

print("Impact of Correlation on Risk")
print("=" * 70)
print(f"{'Correlation':<15} {'P(profit)':<12} {'P(loss)':<12} {'E[P&L]':<12} {'5th pct':<12}")
print("-" * 70)

for corr in correlations:
    pnls = monte_carlo_correlated(portfolio, n_sims=10000, correlation=corr)
    p_profit = (pnls > 0).mean()
    p_loss = (pnls < 0).mean()
    ev = pnls.mean()
    p5 = np.percentile(pnls, 5)
    print(f"{corr:.0%}{'':>10} {p_profit:.1%}{'':>5} {p_loss:.1%}{'':>5} ${ev:+.2f}{'':>5} ${p5:.2f}")

---
# 2. BTC $1M Market

Binary outcome: Either BTC hits $1M before GTA VI or it doesn't.

In [None]:
def black_scholes_digital(S, K, T, r, sigma):
    """P(S_T > K) under Black-Scholes."""
    if T <= 0:
        return 1.0 if S >= K else 0.0
    d2 = (np.log(S / K) + (r - 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
    return norm.cdf(d2)

# Parameters
SPOT = 95000
STRIKE = 1000000
VOL = 0.55
RATE = 0.04
EXPIRY = 1.5  # ~18 months (GTA VI)

MARKET_PRICE = 0.485  # YES price on Polymarket

# Fair value
fair_value = black_scholes_digital(SPOT, STRIKE, EXPIRY, RATE, VOL)

print("BTC $1M Market Analysis")
print("=" * 50)
print(f"Spot: ${SPOT:,}")
print(f"Strike: ${STRIKE:,}")
print(f"Required move: {STRIKE/SPOT:.1f}x")
print(f"Volatility: {VOL:.0%}")
print(f"Expiry: {EXPIRY} years")
print(f"\nMarket price (YES): {MARKET_PRICE:.1%}")
print(f"Fair value (BS): {fair_value:.6%}")
print(f"Edge: {MARKET_PRICE - fair_value:.1%}")

In [None]:
# Trade analysis: SHORT YES (buy NO)
# If we short YES at 48.5%, we receive $0.485 per share
# If YES wins (BTC hits $1M), we pay $1, losing $0.515
# If NO wins (BTC doesn't hit $1M), we keep $0.485

stake = 100  # $100 position
shares = stake / MARKET_PRICE  # shares we can short

# Outcomes
profit_if_no = stake  # We keep the $100 we received
loss_if_yes = shares * (1 - MARKET_PRICE)  # We pay (1 - 0.485) per share

# Using fair value as true probability
p_yes = fair_value
p_no = 1 - fair_value

ev = p_no * profit_if_no - p_yes * loss_if_yes

print("\nTrade: SHORT YES (equivalently, BUY NO)")
print("=" * 50)
print(f"Position size: ${stake}")
print(f"\nOutcomes:")
print(f"  BTC hits $1M:    Lose ${loss_if_yes:.2f} (P = {p_yes:.6%})")
print(f"  BTC doesn't:     Win ${profit_if_no:.2f} (P = {p_no:.4%})")
print(f"\nExpected value: ${ev:.2f}")
print(f"Expected return: {ev/stake*100:.1f}%")

In [None]:
# What if Black-Scholes is wrong?
# BTC might not follow lognormal distribution
# Let's see what true probability makes this a fair bet

print("Sensitivity: What True Probability Makes This Fair?")
print("=" * 50)

# Breakeven: EV = 0
# p_no * profit - p_yes * loss = 0
# (1 - p_yes) * 100 - p_yes * 106.19 = 0
# 100 - 100*p_yes - 106.19*p_yes = 0
# 100 = 206.19 * p_yes
# p_yes = 100 / 206.19 = 48.5%

breakeven_p = stake / (stake + loss_if_yes)
print(f"Breakeven P(BTC hits $1M): {breakeven_p:.1%}")
print(f"\nFor this trade to be -EV, you need to believe:")
print(f"  P(BTC 10x in {EXPIRY} years) > {breakeven_p:.1%}")
print(f"\nBlack-Scholes says: {fair_value:.6%}")
print(f"Market says: {MARKET_PRICE:.1%}")

# What vol would justify market price?
def implied_vol_for_prob(target_prob, S, K, T, r):
    from scipy.optimize import brentq
    try:
        return brentq(lambda v: black_scholes_digital(S, K, T, r, v) - target_prob, 0.01, 10.0)
    except:
        return None

implied_vol = implied_vol_for_prob(MARKET_PRICE, SPOT, STRIKE, EXPIRY, RATE)
if implied_vol:
    print(f"\nImplied vol to justify market: {implied_vol:.0%}")
    print(f"(Current BTC vol: ~{VOL:.0%})")

In [None]:
# Visualize
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# P vs Strike
ax1 = axes[0]
strikes = np.array([100, 150, 200, 300, 500, 750, 1000]) * 1000
probs = [black_scholes_digital(SPOT, k, EXPIRY, RATE, VOL) * 100 for k in strikes]
ax1.bar([f'${k//1000}k' for k in strikes], probs, color='steelblue', edgecolor='black')
ax1.axhline(MARKET_PRICE * 100, color='red', linestyle='--', label=f'Market: {MARKET_PRICE:.1%}')
ax1.set_xlabel('Strike Price')
ax1.set_ylabel('Probability (%)')
ax1.set_title('P(BTC > Strike) by Black-Scholes', fontweight='bold')
ax1.legend()
ax1.set_ylim(0, max(probs) * 1.2)

# P vs Vol for $1M strike
ax2 = axes[1]
vols = np.linspace(0.3, 2.0, 100)
probs_vol = [black_scholes_digital(SPOT, STRIKE, EXPIRY, RATE, v) * 100 for v in vols]
ax2.plot(vols * 100, probs_vol, 'b-', linewidth=2)
ax2.axhline(MARKET_PRICE * 100, color='red', linestyle='--', label=f'Market: {MARKET_PRICE:.1%}')
ax2.axvline(VOL * 100, color='green', linestyle=':', label=f'Current vol: {VOL:.0%}')
ax2.set_xlabel('Volatility (%)')
ax2.set_ylabel('P(BTC > $1M) %')
ax2.set_title('$1M Probability vs Volatility', fontweight='bold')
ax2.legend()
ax2.set_ylim(0, 60)

plt.tight_layout()
plt.show()

print(f"\nNote: Even at 200% vol, P(BTC > $1M) is only ~{black_scholes_digital(SPOT, STRIKE, EXPIRY, RATE, 2.0)*100:.1f}%")

---
# Summary

In [None]:
print("=" * 70)
print("STRATEGY COMPARISON")
print("=" * 70)

print("\n1. FAVORITE-LONGSHOT BIAS")
print(f"   Capital: ${total_capital}")
print(f"   Positions: {len(portfolio)}")
print(f"   P(profit): {p_profit:.1%}")
print(f"   P(loss): {p_loss:.1%}")
print(f"   E[P&L]: ${ev:.2f} ({ev/total_capital*100:.1f}%)")
print(f"   Worst case: ${worst:.2f} ({worst/total_capital*100:.0f}%)")
print(f"   Median: ${p50:.2f}")

btc_ev = p_no * profit_if_no - p_yes * loss_if_yes
print("\n2. BTC $1M SHORT")
print(f"   Capital at risk: ${loss_if_yes:.2f}")
print(f"   P(profit): {p_no:.4%}")
print(f"   P(loss): {p_yes:.6%}")
print(f"   E[P&L]: ${btc_ev:.2f}")
print(f"   Win: +${profit_if_no:.2f}")
print(f"   Lose: -${loss_if_yes:.2f}")

print("\n" + "=" * 70)
print("CAVEATS")
print("=" * 70)
print("- F-L edge estimates from horse racing/Kalshi, not Polymarket-specific")
print("- BTC may not follow lognormal dynamics")
print("- Correlation between F-L positions increases tail risk")
print("- Settlement risk on both strategies")