# DraftKings Showdown EV — Colab Notebook
Paste your RAW GitHub CSV URLs in the next cell and run top-to-bottom.

In [None]:

# === 1) Paste your RAW GitHub URLs here ===
PROJECTIONS_URL = "https://raw.githubusercontent.com/<user>/<repo>/<branch>/projections.csv"
LEVERAGE_URL    = "https://raw.githubusercontent.com/<user>/<repo>/<branch>/leverage.csv"
PERCENTILES_URL = "https://raw.githubusercontent.com/<user>/<repo>/<branch>/percentiles.csv"

# SimConfig overrides (optional)
CFG_KW = dict(
    n_sims=5000,             # increase for more accuracy if runtime allows
    candidate_pool_size=4000,
    max_overlap=3,
    leave_salary_max=3500,
    rng_seed=123,
)
print("URLs set. If you need to use a blob URL, convert it to RAW (raw.githubusercontent.com).")


In [None]:
import os, textwrap
base = '/content/showdown_ev/showdown'
os.makedirs(base, exist_ok=True)
open(os.path.join(base, '__init__.py'), 'w', encoding='utf-8').write(r'''from .config import SimConfig
from .data_io import load_projections, load_leverage, load_percentiles, merge_inputs
from .percentiles import QuantileSampler
from .correlation import build_correlation
from .lineup import Lineup, is_valid_lineup, apply_cpt_salary, validate_player_pool
from .generator import CandidateGenerator
from .opponent import OpponentField
from .ev import EVSimulator
from .portfolio import PortfolioOptimizer
''')
open(os.path.join(base, 'config.py'), 'w', encoding='utf-8').write(r'''from dataclasses import dataclass

@dataclass
class SimConfig:
    # Simulation controls
    n_sims: int = 5000                 # Monte Carlo slates
    field_size: int = 475              # total entries in contest
    our_entries: int = 10              # how many lineups we submit
    prize_first: float = 100.0         # total prize to split among 1st-place ties
    mode: str = "quantile"             # "quantile" or "stats" (stats placeholder)
    dup_penalty: float = 0.15          # penalize dup-prone lineups (0..1)
    max_salary: int = 50000            # DK cap
    leave_salary_max: int = 3500       # max salary left on table for our candidates
    max_overlap: int = 3               # max shared players among our 10 lineups
    enforce_unique_cpt: bool = False   # can toggle; portfolio optimizer will balance naturally
    rng_seed: int = 42                 # reproducibility

    # Candidate generation
    candidate_pool_size: int = 4000    # number of candidate lineups for us
    cpt_top_k: int = 15                # limit captain choices to top-K by p50 points
    flex_top_k: int = 35               # limit flex pool by p50 points (per team combined)

    # Correlation knobs for Gaussian copula (used in quantile mode)
    base_same_team: float = 0.20
    qb_receiver_boost: float = 0.25
    dst_vs_opp_offense: float = -0.30
    dsts_mutual: float = -0.20
    k_vs_offense: float = 0.05
    cross_team_baseline: float = 0.05

    # Opponent field modeling
    field_model: str = "A"             # "A" sampling by ownership, "B" optimizer-like
    field_portfolio_size: int = 8000   # bank of distinct field lineups to sample from
    field_noise_sd: float = 2.0        # points of noise in optimizer-like mode
''')
open(os.path.join(base, 'correlation.py'), 'w', encoding='utf-8').write(r'''import numpy as np
import pandas as pd

def _pos_group(pos: str) -> str:
    pos = pos.upper()
    if pos in ['QB','RB','WR','TE','K','DST']:
        return pos
    return 'OTH'

def build_correlation(players_df: pd.DataFrame, cfg) -> np.ndarray:
    n = len(players_df)
    C = np.eye(n)
    # Precompute quick lookups
    team = players_df['Team'].to_numpy()
    pos  = players_df['Pos'].to_numpy()
    name = players_df['Player'].to_numpy()

    # Identify QBs by team (for receiver boost)
    qb_idx_by_team = {}
    for i,(t,p) in enumerate(zip(team,pos)):
        if p=='QB': qb_idx_by_team.setdefault(t, []).append(i)

    # Correlation heuristic
    for i in range(n):
        for j in range(i+1, n):
            corr = cfg.cross_team_baseline
            same_team = team[i]==team[j]
            if same_team:
                corr = cfg.base_same_team
                # boost QB with pass catchers
                if (pos[i]=='QB' and pos[j] in ('WR','TE','RB')) or (pos[j]=='QB' and pos[i] in ('WR','TE','RB')):
                    corr += cfg.qb_receiver_boost
                # mild positive kicker with offense
                if (pos[i]=='K' and pos[j] in ('QB','WR','TE','RB')) or (pos[j]=='K' and pos[i] in ('QB','WR','TE','RB')):
                    corr += cfg.k_vs_offense
                # DSTs on same team with their offense: small (driven by field position/TDs)
                if (pos[i]=='DST' and pos[j] in ('QB','WR','TE','RB','K')) or (pos[j]=='DST' and pos[i] in ('QB','WR','TE','RB','K')):
                    corr += 0.05
            else:
                # cross-team relations
                if (pos[i]=='DST' and pos[j] in ('QB','WR','TE','RB')) or (pos[j]=='DST' and pos[i] in ('QB','WR','TE','RB')):
                    corr = cfg.dst_vs_opp_offense
                if (pos[i]=='DST' and pos[j]=='DST'):
                    corr = cfg.dsts_mutual
                # very slight negative kicker vs opposing DST
                if (pos[i]=='K' and pos[j]=='DST') or (pos[j]=='K' and pos[i]=='DST'):
                    corr = min(corr, -0.05)

            C[i,j] = C[j,i] = np.clip(corr, -0.95, 0.95)

    # Ensure positive semidefinite by adding small ridge to diagonal if needed
    # (nearest PSD via eigenvalue clipping)
    eigvals, eigvecs = np.linalg.eigh(C)
    min_e = eigvals.min()
    if min_e < 1e-6:
        eigvals = np.maximum(eigvals, 1e-6)
        C = (eigvecs * eigvals) @ eigvecs.T
        # normalize diagonal back to 1
        d = np.sqrt(np.diag(C))
        C = C / (d[:,None]*d[None,:] + 1e-12)
    return C
''')
open(os.path.join(base, 'data_io.py'), 'w', encoding='utf-8').write(r'''import pandas as pd

# Expected headers:
# Projections CSV:
# 'RTS ID','player','position','team','salary','rushAtts','rushYds','rushTDs',
# 'recvTgts','recvRec','recYds','recTDs','passAtts','passComp','passYds','passTDs','ints',
# 'Proj Own','projected points'
#
# Leverage CSV:
# 'Player','Pos','Team','Salary','FLEX Own','CPT Own','Total Own','FLEX Rate','CPT Rate',
# 'Total Rate','CPT Lev','Total Lev'
#
# Percentiles CSV:
# 'player','position','team','p000','p005',...,'p100'

def load_projections(path: str) -> pd.DataFrame:
    df = pd.read_csv(path)
    # normalize column names
    cols = {c: c.strip() for c in df.columns}
    df.rename(columns=cols, inplace=True)
    # unify key names
    df.rename(columns={
        'player': 'Player',
        'position': 'Pos',
        'team': 'Team',
        'salary': 'Salary',
        'Proj Own': 'Total Own',
        'projected points': 'ProjPts'
    }, inplace=True)
    # Fill missing Total Own if absent
    if 'Total Own' not in df:
        df['Total Own'] = 0.0
    # Ensure dtypes
    df['Salary'] = df['Salary'].astype(int)
    if 'ProjPts' in df:
        df['ProjPts'] = df['ProjPts'].astype(float)
    return df

def load_leverage(path: str) -> pd.DataFrame:
    df = pd.read_csv(path)
    cols = {c: c.strip() for c in df.columns}
    df.rename(columns=cols, inplace=True)
    # Ensure dtypes
    for col in ['FLEX Own','CPT Own','Total Own','FLEX Rate','CPT Rate','Total Rate','CPT Lev','Total Lev']:
        if col in df:
            df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0.0)
    df['Salary'] = df['Salary'].astype(int)
    return df

def load_percentiles(path: str) -> pd.DataFrame:
    df = pd.read_csv(path)
    cols = {c: c.strip() for c in df.columns}
    df.rename(columns=cols, inplace=True)
    df.rename(columns={
        'player': 'Player',
        'position': 'Pos',
        'team': 'Team'
    }, inplace=True)
    # Ensure numeric percentiles
    for c in df.columns:
        if c.startswith('p'):
            df[c] = pd.to_numeric(df[c], errors='coerce')
    return df

def merge_inputs(proj_df: pd.DataFrame, lev_df: pd.DataFrame, pct_df: pd.DataFrame) -> pd.DataFrame:
    # Merge on Player/Team/Pos; some CSVs might have casing mismatches
    key_cols = ['Player','Pos','Team']
    for df in (proj_df, lev_df, pct_df):
        for k in key_cols:
            if k in df:
                df[k] = df[k].astype(str).str.strip()

    df = proj_df.merge(lev_df, on=key_cols+['Salary'], how='left', suffixes=('','_lev'))
    df = df.merge(pct_df, on=key_cols, how='left', suffixes=('','_pct'))

    # Fallback rates if leverage missing
    if 'CPT Rate' not in df: df['CPT Rate'] = 0.0
    if 'FLEX Rate' not in df: df['FLEX Rate'] = 0.0
    if 'Total Rate' not in df: df['Total Rate'] = df.get('Total Own', 0.0)

    # Normalize CPT/FLEX rates if they exist but don't sum meaningfully
    rates = df[['CPT Rate','FLEX Rate']].fillna(0.0)
    if (rates.sum().sum() == 0) and ('Total Own' in df):
        # derive naive split if only Total Own exists
        df['CPT Rate'] = df['Total Own'] * 0.15
        df['FLEX Rate'] = df['Total Own'] * 0.85

    # Ensure projection fallback: use p50 if ProjPts missing
    if 'ProjPts' not in df or df['ProjPts'].isna().all():
        if 'p050' in df:
            df['ProjPts'] = df['p050']
        else:
            df['ProjPts'] = 0.0

    return df
''')
open(os.path.join(base, 'ev.py'), 'w', encoding='utf-8').write(r'''import numpy as np
import pandas as pd
from scipy.stats import norm

from .percentiles import QuantileSampler

def _mvnorm_to_uniform(corr: np.ndarray, n_draws: int, rng) -> np.ndarray:
    # Draw correlated normals then map to uniforms via Phi
    Z = rng.multivariate_normal(mean=np.zeros(corr.shape[0]), cov=corr, size=n_draws)
    U = 0.5*(1+erf(Z / np.sqrt(2)))  # manual Phi
    return U

def erf(x):
    # vectorized error function using np.erf if available, else approximation
    try:
        from math import erf as m_erf
        vfunc = np.vectorize(m_erf)
        return vfunc(x)
    except Exception:
        # Abramowitz & Stegun approximation
        # Not perfect but fine for our use
        sign = np.sign(x)
        a1=0.254829592; a2=-0.284496736; a3=1.421413741; a4=-1.453152027; a5=1.061405429; p=0.3275911
        t = 1.0/(1.0+p*np.abs(x))
        y = 1.0 - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*np.exp(-x*x)
        return sign*y

class EVSimulator:
    def __init__(self, players_df: pd.DataFrame, corr: np.ndarray, cfg):
        self.df = players_df.reset_index(drop=True)
        self.corr = corr
        self.cfg = cfg
        self.rng = np.random.default_rng(cfg.rng_seed+7)

        # Build per-player quantile samplers
        self.samplers = []
        for _, row in self.df.iterrows():
            pct = {c: row[c] for c in self.df.columns if c.startswith('p')}
            self.samplers.append(QuantileSampler(pct))

        # helpful map
        self.idx_by_name = {row['Player']: i for i, row in self.df.iterrows()}

    def simulate_points(self, n_sims: int):
        # Gaussian copula to get correlated uniforms, then inverse CDF per player
        U = self._correlated_uniforms(n_sims)
        # Map to fantasy points per player (vectorized)
        pts = np.zeros((n_sims, len(self.df)), dtype=float)
        for j, sampler in enumerate(self.samplers):
            pts[:, j] = sampler.sample(U[:, j])
        return pts  # shape (n_sims, n_players)

    def _correlated_uniforms(self, n_sims: int):
        # Use eigen-decomposition for speed / stability
        vals, vecs = np.linalg.eigh(self.corr)
        vals = np.clip(vals, 1e-8, None)
        A = vecs @ np.diag(np.sqrt(vals))
        Z = self.rng.standard_normal(size=(n_sims, self.corr.shape[0]))
        X = Z @ A.T
        # standard normal CDF
        U = 0.5 * (1.0 + erf(X / np.sqrt(2)))
        return U

    def lineup_scores(self, pts_mat: np.ndarray, lineup_df: pd.DataFrame) -> np.ndarray:
        # lineup_df columns: CPT, FLEX1..FLEX5
        idx = self.idx_by_name
        n = len(lineup_df)
        scores = np.zeros((pts_mat.shape[0], n), dtype=float)
        for k, row in lineup_df.iterrows():
            names = [row['CPT'], row['FLEX1'], row['FLEX2'], row['FLEX3'], row['FLEX4'], row['FLEX5']]
            idcs = [idx[nm] for nm in names]
            # CPT 1.5x
            s = 1.5*pts_mat[:, idcs[0]] + pts_mat[:, idcs[1]] + pts_mat[:, idcs[2]] + pts_mat[:, idcs[3]] + pts_mat[:, idcs[4]] + pts_mat[:, idcs[5]]
            scores[:, k] = s
        return scores

    def expected_value(self, our_lineups: pd.DataFrame, field_lineups: pd.DataFrame) -> pd.Series:
        n_sims = self.cfg.n_sims
        # Simulate points for all unique players
        pts = self.simulate_points(n_sims)

        # Scores
        our_scores = self.lineup_scores(pts, our_lineups)
        field_scores = self.lineup_scores(pts, field_lineups)

        # For each sim: find best score among field + our 10; tally prize split
        ev = np.zeros(our_lineups.shape[0], dtype=float)

        for t in range(n_sims):
            # combine scores
            all_scores = np.concatenate([our_scores[t], field_scores[t]])
            top = all_scores.max()
            # which our lineups tie top?
            ours_tie = np.where(our_scores[t] >= top - 1e-9)[0]
            # how many total tied?
            total_tie = (all_scores >= top - 1e-9).sum()

            if len(ours_tie) > 0:
                split = self.cfg.prize_first / total_tie
                ev[ours_tie] += split

        # average across sims
        return pd.Series(ev / n_sims, index=our_lineups.index, name='EV')
''')
open(os.path.join(base, 'generator.py'), 'w', encoding='utf-8').write(r'''import numpy as np
import pandas as pd
from itertools import combinations
from .lineup import is_valid_lineup, apply_cpt_salary

class CandidateGenerator:
    def __init__(self, df: pd.DataFrame, cfg):
        self.df = df.reset_index(drop=True)
        self.cfg = cfg
        self.rng = np.random.default_rng(cfg.rng_seed)

        # Precompute p50 and ranks
        self.df['p50'] = self.df.get('p050', self.df['ProjPts'])
        self.df.sort_values('p50', ascending=False, inplace=True)
        self.df.reset_index(drop=True, inplace=True)

    def _filtered_pool(self):
        # Limit pool sizes to keep enumeration feasible
        # Take top-K CPT candidates by p50, and top-K flex candidates overall
        cpt_candidates = self.df.head(self.cfg.cpt_top_k).copy()

        # Flex pool: take top flex_top_k overall + a few random longshots for leverage
        flex_top = self.df.head(self.cfg.flex_top_k).copy()
        longshots = self.df.iloc[self.cfg.flex_top_k:].sample(
            n=min(10, max(0, len(self.df)-self.cfg.flex_top_k)),
            random_state=self.cfg.rng_seed
        ) if len(self.df) > self.cfg.flex_top_k else self.df.iloc[0:0]
        flex_pool = pd.concat([flex_top, longshots], ignore_index=True).drop_duplicates(subset=['Player'])

        return cpt_candidates, flex_pool

    def generate(self, max_salary: int=None, leave_salary_max: int=None, n_samples: int=None):
        max_salary = max_salary or self.cfg.max_salary
        leave_salary_max = leave_salary_max if leave_salary_max is not None else self.cfg.leave_salary_max
        target_min_salary = max_salary - leave_salary_max

        cpt_candidates, flex_pool = self._filtered_pool()

        # Pre-index by player for quick lookups
        by_name = self.df.set_index('Player')

        # Build candidate flex combinations with pruning via simple greedy sampling
        names = flex_pool['Player'].tolist()
        candidates = set()

        # We'll sample combinations rather than brute-force all C( |pool|, 5 )
        trials = n_samples or self.cfg.candidate_pool_size * 5

        for _ in range(trials):
            # Randomly bias toward higher p50
            probs = flex_pool['p50'].to_numpy()
            probs = probs / probs.sum()
            picks = self.rng.choice(len(names), size=5, replace=False, p=probs)
            flex_names = [names[i] for i in picks]

            # Loop CPTs
            for _, row in cpt_candidates.iterrows():
                cpt = row['Player']
                ok, sal, teams = is_valid_lineup(flex_names, cpt, by_name.reset_index(), max_salary)
                if not ok: 
                    continue
                if sal < target_min_salary: 
                    continue
                lineup_key = (cpt,) + tuple(sorted(flex_names))
                candidates.add(lineup_key)
                if len(candidates) >= self.cfg.candidate_pool_size:
                    break
            if len(candidates) >= self.cfg.candidate_pool_size:
                break

        # Return as DataFrame
        out = []
        for key in candidates:
            cpt = key[0]; flex = key[1:]
            sal = apply_cpt_salary(int(by_name.loc[cpt,'Salary'])) + int(by_name.loc[list(flex),'Salary'].sum())
            out.append({'CPT': cpt, 'FLEX1': flex[0], 'FLEX2': flex[1], 'FLEX3': flex[2], 'FLEX4': flex[3], 'FLEX5': flex[4], 'Salary': sal})
        return pd.DataFrame(out)
''')
open(os.path.join(base, 'lineup.py'), 'w', encoding='utf-8').write(r'''from dataclasses import dataclass
from typing import List, Dict, Tuple
import numpy as np
import pandas as pd

ALLOWED_POS = {'QB','WR','RB','TE','K','DST'}

@dataclass(frozen=True)
class Lineup:
    cpt: str              # player name @ CPT
    flex: Tuple[str,...]  # 5 player names
    salary: int
    teams: Tuple[str,...] # teams present
    players: Tuple[str,...] # all 6 names (CPT duplicates prevented upstream)

def apply_cpt_salary(base_salary: int) -> int:
    # EXACT 1.5x (no rounding beyond int)
    return int(round(1.5 * base_salary))

def is_valid_lineup(names: List[str], cpt_name: str, df: pd.DataFrame, max_salary: int) -> Tuple[bool,int,Tuple[str,...]]:
    # Ensure no duplicate CPT in flex, both teams present, salary cap
    if cpt_name in names:
        return False, 0, ()
    all_names = [cpt_name] + names
    rows = df.set_index('Player').loc[all_names]
    salary = apply_cpt_salary(int(rows.loc[cpt_name,'Salary'])) + int(rows.loc[names, 'Salary'].sum())
    if salary > max_salary:
        return False, salary, ()
    teams = set(rows['Team'].tolist())
    if len(teams) < 2:
        return False, salary, ()
    return True, salary, tuple(teams)

def validate_player_pool(df: pd.DataFrame) -> pd.DataFrame:
    # Filter to allowed positions and players expected active
    df = df[df['Pos'].isin(ALLOWED_POS)].copy()
    # basic sanity
    df = df.dropna(subset=['Player','Pos','Team','Salary','ProjPts'])
    return df
''')
open(os.path.join(base, 'opponent.py'), 'w', encoding='utf-8').write(r'''import numpy as np
import pandas as pd
from .lineup import is_valid_lineup, apply_cpt_salary

class OpponentField:
    def __init__(self, players_df: pd.DataFrame, cfg):
        self.df = players_df.reset_index(drop=True)
        self.cfg = cfg
        self.rng = np.random.default_rng(cfg.rng_seed)

        # Rates used for sampling
        self.cpt_rates = np.clip(self.df['CPT Rate'].fillna(0.0).to_numpy(), 0, None)
        self.flex_rates = np.clip(self.df['FLEX Rate'].fillna(0.0).to_numpy(), 0, None)
        if self.cpt_rates.sum() == 0:
            # derive naive split: small CPT share
            tot = self.df['Total Own'].fillna(0.0).to_numpy()
            self.cpt_rates = 0.15 * tot
            self.flex_rates = 0.85 * tot

        # Normalize
        self.cpt_rates = self.cpt_rates / (self.cpt_rates.sum() + 1e-9)
        self.flex_rates = self.flex_rates / (self.flex_rates.sum() + 1e-9)

    def _sample_lineup_by_rates(self):
        names = self.df['Player'].tolist()
        # pick CPT by CPT rates
        cpt_idx = self.rng.choice(len(names), p=self.cpt_rates)
        cpt = names[cpt_idx]

        # pick flex by FLEX rates without replacement
        # to encourage diversity, draw more than 5 then keep top-5 unique by descending draw prob * random
        idxs = self.rng.choice(len(names), size=8, replace=False, p=self.flex_rates)
        flex = []
        for i in idxs:
            if names[i]==cpt: continue
            flex.append(names[i])
            if len(flex)==5: break
        if len(flex)<5:
            # fallback: fill randomly
            remaining = [n for n in names if n not in flex and n!=cpt]
            self.rng.shuffle(remaining)
            flex += remaining[:5-len(flex)]

        # check validity
        ok, sal, teams = is_valid_lineup(flex, cpt, self.df, self.cfg.max_salary)
        if not ok: return None
        return (cpt, tuple(sorted(flex)), sal)

    def bank_field_lineups(self, bank_size=None):
        bank_size = bank_size or self.cfg.field_portfolio_size
        seen = set()
        bank = []
        tries = 0
        while len(bank) < bank_size and tries < bank_size*20:
            line = self._sample_lineup_by_rates()
            tries += 1
            if line is None: 
                continue
            key = (line[0], line[1])
            if key in seen: 
                continue
            seen.add(key)
            bank.append({'CPT': line[0], 'FLEX': line[1], 'Salary': line[2]})
        return pd.DataFrame(bank)

    def sample_field_entries(self, bank: pd.DataFrame, n_entries: int):
        # Sample with weights influenced by total ownership of the 6 players
        if len(bank)==0:
            return pd.DataFrame(columns=['CPT','FLEX','Salary'])
        weights = []
        own_map = self.df.set_index('Player')['Total Rate'].fillna(0.0).to_dict()
        for _,r in bank.iterrows():
            pts = sum(own_map.get(p,0.0) for p in ([r['CPT']] + list(r['FLEX'])))
            weights.append(pts)
        w = np.array(weights); w = w / (w.sum()+1e-9)
        idx = np.random.default_rng(self.cfg.rng_seed+1).choice(len(bank), size=n_entries, replace=True, p=w)
        return bank.iloc[idx].reset_index(drop=True)
''')
open(os.path.join(base, 'percentiles.py'), 'w', encoding='utf-8').write(r'''import numpy as np

class QuantileSampler:
    """Piecewise-linear inverse CDF built from provided percentiles p000..p100."""
    def __init__(self, percentiles: dict):
        # percentiles: key -> value mapping for columns like 'p000','p005',...,'p100'
        # Build sorted (q, x) arrays where q in [0,1]
        qs, xs = [], []
        for k, v in percentiles.items():
            if k.startswith('p') and v is not None and not np.isnan(v):
                try:
                    q = int(k[1:]) / 100.0  # e.g., 'p005' -> 5 -> 0.05
                except:
                    continue
                qs.append(q); xs.append(float(v))
        if len(qs) < 2:
            # fallback to degenerate distribution at 0
            qs = [0.0, 1.0]; xs = [0.0, 0.0]
        order = np.argsort(qs)
        self.q = np.array(qs)[order]
        self.x = np.array(xs)[order]

        # Enforce monotonicity in xs to avoid tiny inversions
        self.x = np.maximum.accumulate(self.x)

        # cache bounds
        self.x_min = self.x[0]
        self.x_max = self.x[-1]

    def sample(self, u: np.ndarray) -> np.ndarray:
        """Map uniforms u in [0,1] to samples via piecewise-linear inverse CDF."""
        u = np.clip(u, 0.0, 1.0)
        return np.interp(u, self.q, self.x)

    def mean(self) -> float:
        # approximate mean via trapezoidal integration of inverse CDF
        # E[X] = \int_0^1 Q(u) du ~ average of knots
        return float(np.trapz(self.x, self.q) / (self.q[-1]-self.q[0] if self.q[-1]>self.q[0] else 1.0))
''')
open(os.path.join(base, 'portfolio.py'), 'w', encoding='utf-8').write(r'''import numpy as np
import pandas as pd

def _overlap(a_row, b_row):
    a = set([a_row['CPT'], a_row['FLEX1'], a_row['FLEX2'], a_row['FLEX3'], a_row['FLEX4'], a_row['FLEX5']])
    b = set([b_row['CPT'], b_row['FLEX1'], b_row['FLEX2'], b_row['FLEX3'], b_row['FLEX4'], b_row['FLEX5']])
    return len(a & b)

class PortfolioOptimizer:
    def __init__(self, cfg):
        self.cfg = cfg

    def select(self, candidates: pd.DataFrame, ev: pd.Series):
        # Greedy submodular-like selection:
        # maximize EV while enforcing max overlap and mild CPT diversity, salary leave <= cfg.leave_salary_max
        df = candidates.copy()
        df = df.join(ev)
        df = df.sort_values('EV', ascending=False).reset_index(drop=True)

        chosen_idx = []
        cpts_used = set()

        for i, row in df.iterrows():
            if len(chosen_idx) >= self.cfg.our_entries:
                break
            # overlap constraint
            ok = True
            for j in chosen_idx:
                if _overlap(row, df.loc[j]) > self.cfg.max_overlap:
                    ok = False; break
            if not ok: 
                continue
            # Optional CPT uniqueness encouragement (not strict)
            if self.cfg.enforce_unique_cpt and row['CPT'] in cpts_used:
                continue

            chosen_idx.append(i)
            cpts_used.add(row['CPT'])

        return df.loc[chosen_idx].reset_index(drop=True)
''')
open(os.path.join(base, 'run_example.py'), 'w', encoding='utf-8').write(r'''
# Example runner for Colab using direct GitHub RAW links.
import pandas as pd
from .config import SimConfig
from .data_io import load_projections, load_leverage, load_percentiles, merge_inputs
from .lineup import validate_player_pool
from .generator import CandidateGenerator
from .correlation import build_correlation
from .opponent import OpponentField
from .ev import EVSimulator
from .portfolio import PortfolioOptimizer

def run(proj_csv, lev_csv, pct_csv):
    cfg = SimConfig()
    proj = load_projections(proj_csv)    # accepts local path or https raw link
    lev  = load_leverage(lev_csv)
    pct  = load_percentiles(pct_csv)
    players = merge_inputs(proj, lev, pct)
    players = validate_player_pool(players)

    gen = CandidateGenerator(players, cfg)
    cands = gen.generate()

    C = build_correlation(players, cfg)
    opp = OpponentField(players, cfg)
    bank = opp.bank_field_lineups()
    field = opp.sample_field_entries(bank, n_entries=cfg.field_size - cfg.our_entries)

    sim = EVSimulator(players, C, cfg)
    ev = sim.expected_value(cands, field)

    port = PortfolioOptimizer(cfg)
    chosen = port.select(cands, ev)

    return chosen, ev.sort_values(ascending=False).head(20)

if __name__ == "__main__":
    # Paste your RAW GitHub URLs here (must start with https://raw.githubusercontent.com/...)
    projections_url  = "https://raw.githubusercontent.com/<user>/<repo>/<branch>/projections.csv"
    leverage_url     = "https://raw.githubusercontent.com/<user>/<repo>/<branch>/leverage.csv"
    percentiles_url  = "https://raw.githubusercontent.com/<user>/<repo>/<branch>/percentiles.csv"

    chosen, top = run(projections_url, leverage_url, percentiles_url)
    print(chosen)
    print(top)
''')

In [None]:

# === 3) Import the package we just wrote ===
import sys, os
sys.path.append('/content/showdown_ev')
from showdown.config import SimConfig
from showdown.data_io import load_projections, load_leverage, load_percentiles, merge_inputs
from showdown.lineup import validate_player_pool
from showdown.generator import CandidateGenerator
from showdown.correlation import build_correlation
from showdown.opponent import OpponentField
from showdown.ev import EVSimulator
from showdown.portfolio import PortfolioOptimizer

print("Package imported.")


In [None]:

# === 4) Load data, simulate, and select portfolio ===
import pandas as pd

cfg = SimConfig(**CFG_KW)

proj = load_projections(PROJECTIONS_URL)
lev  = load_leverage(LEVERAGE_URL)
pct  = load_percentiles(PERCENTILES_URL)
players = merge_inputs(proj, lev, pct)
players = validate_player_pool(players)

gen = CandidateGenerator(players, cfg)
cands = gen.generate()

C = build_correlation(players, cfg)
opp = OpponentField(players, cfg)
bank = opp.bank_field_lineups()
field = opp.sample_field_entries(bank, n_entries=cfg.field_size - cfg.our_entries)

sim = EVSimulator(players, C, cfg)
ev = sim.expected_value(cands, field)

port = PortfolioOptimizer(cfg)
chosen = port.select(cands, ev)

# Show and save outputs
display_cols = ['CPT','FLEX1','FLEX2','FLEX3','FLEX4','FLEX5','Salary']
out_chosen = chosen[display_cols + ['EV']].copy()
out_top = cands.join(ev).sort_values('EV', ascending=False).head(20)[display_cols + ['EV']].reset_index(drop=True)

print("=== Selected 10 lineups ===")
display(out_chosen.reset_index(drop=True))
print("=== Top-20 EV candidates ===")
display(out_top)

out_chosen.to_csv('/content/chosen_lineups.csv', index=False)
out_top.to_csv('/content/top20_ev_candidates.csv', index=False)
print("Saved /content/chosen_lineups.csv and /content/top20_ev_candidates.csv")
