In [1]:
# pip install selenium
# you may need to install a driver for selenium, see here https://selenium-python.readthedocs.io/installation.html

# pip install beautifulsoup4
# pip install scipy
# pip install numpy
# pip install pulp

In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import numpy as np
from scipy.stats import norm
import itertools
from collections import defaultdict

In [3]:
def scrape_fantasy(url, wait_for_element, wait_time=10):
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    driver = webdriver.Chrome(options=chrome_options)
    
    driver.get(url)
    wait = WebDriverWait(driver, wait_time)

    try:
        element = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, wait_for_element)))  
        page_source = driver.page_source

    except Exception as e:
        print("Error waiting for the element:", e)

    finally:
        driver.quit()
    
    return page_source

In [4]:
# scrape data from espn
# you can change the year by modifying the season_ID

LEAGUE_ID = 756983
SEASON_ID = 2024

season_html = scrape_fantasy(
    url=f"https://fantasy.espn.com/football/league/schedule?leagueId={LEAGUE_ID}&seasonId={SEASON_ID}",
    wait_for_element="table.Table",
)

bracket_html= scrape_fantasy(
    url=f"https://fantasy.espn.com/football/league/playoffs?leagueId={LEAGUE_ID}",
    wait_for_element="div.ScoreCell__Team",
)


In [5]:
# extract the scores for each game in the season

soup = BeautifulSoup(season_html, "html.parser")

tables = soup.find_all("table", {'class', 'Table'})

table_titles = list(map(lambda x: x.text.strip(), soup.find_all("div", {'class': 'table-caption'})))
table_titles = list(filter(lambda x: 'Playoff' not in x, table_titles)) # filter out playoffs

all_week_data = dict()
for title, table in zip(table_titles, tables):
    headers = []
    for th in table.find("thead").find_all("th"):
        headers.append(th.get_text(strip=True))

    data = []
    for row in table.find("tbody").find_all("tr"):
        cells = row.find_all("td")
        row_data = [cell.get_text(strip=True) for cell in cells]
        data.append(row_data)

    simplified_data = [(item[1], item[2]) for item in data]+[(item[4], item[3]) for item in data]
    simplified_data = sorted(simplified_data, key=lambda x: x[0])
    all_week_data[title] = simplified_data

name_mappings = dict()
for item in data:
    name_mappings[item[0].split('(')[0].strip('@').strip()] = item[1]
    name_mappings[item[5].split('(')[0].strip('@').strip()] = item[4]

all_week_data

{'NFL Week 1': [('Cooper Garren', '85.52'),
  ('Daniel Bauman', '106.54'),
  ('Garrett Presko, Garrett Presko', '93.34'),
  ('Jacob Kaufman', '103.98'),
  ('James Young', '111.5'),
  ('Matthew Clauss', '81.24'),
  ('Owen Wright', '128.8'),
  ('Vishal Beeki', '93.18'),
  ('William Baker', '110.06'),
  ('Zach Jonas', '87.14')],
 'NFL Week 2': [('Cooper Garren', '94.22'),
  ('Daniel Bauman', '117.68'),
  ('Garrett Presko, Garrett Presko', '96.7'),
  ('Jacob Kaufman', '138.26'),
  ('James Young', '116.12'),
  ('Matthew Clauss', '98.9'),
  ('Owen Wright', '76.18'),
  ('Vishal Beeki', '83.8'),
  ('William Baker', '105.92'),
  ('Zach Jonas', '94.76')],
 'NFL Week 3': [('Cooper Garren', '43.26'),
  ('Daniel Bauman', '98.76'),
  ('Garrett Presko, Garrett Presko', '51.96'),
  ('Jacob Kaufman', '85.38'),
  ('James Young', '81.12'),
  ('Matthew Clauss', '134.28'),
  ('Owen Wright', '121.86'),
  ('Vishal Beeki', '65.82'),
  ('William Baker', '108.64'),
  ('Zach Jonas', '77.26')],
 'NFL Week 4': [('

In [6]:
# extract the seeds and matchups for the bracket

soup = BeautifulSoup(bracket_html, "html.parser")

matchups_html = soup.find_all('div', {'class': 'matchup-teams-score-container'})[:4]

name_to_seeds, matchups = dict(), []
for matchup_html in matchups_html:
    teams_html = matchup_html.find_all('div', {'class': 'ScoreCell__Team'})
    matchup = []
    for team_html in teams_html:
        name = team_html.find("div", {'class': 'ScoreCell__TeamName'}).text
        if name == 'BYE':
            continue
        rank = team_html.find("div", {'class': 'ScoreCell__Rank'}).text
        name_to_seeds[name_mappings[name]] = int(rank)
        matchup.append(name_mappings[name])
    matchups.append(tuple(matchup))

matchup_seeds = []
for matchup in matchups:
    matchup_seeds.append(tuple([int(name_to_seeds[name]) for name in matchup]))

print('matchups:', matchups)
print('seeds:', matchup_seeds)

matchups: [('Matthew Clauss',), ('William Baker', 'Owen Wright'), ('Daniel Bauman', 'Jacob Kaufman'), ('James Young',)]
seeds: [(1,), (5, 4), (6, 3), (2,)]


In [7]:
names_list = list(zip(*list(all_week_data.values())[0]))[0]
name_to_idx = {name: i for i, name in enumerate(names_list)}

full_season_table = np.asarray([[float(score) for _, score in all_week_data[k]] for k in all_week_data.keys()], dtype=np.float32)
X = full_season_table # matrix of shape: [total_weeks, total_teams]

X.shape

(14, 10)

In [8]:
def denoise_X(X, k):
    # reconstruct X using the top k singular values, to remove noise from lower singular values
    # removing low singular values can help remove the effect of players being traded or injuries, resorting to a mean instead
    x_mean = X.mean(axis=0)
    x_centered = X - x_mean
    cov = (1/(X.shape[0]-1))*x_centered.T@x_centered
    eigenvalues, eigenvectors = np.linalg.eig(cov)
    idx = np.argsort(eigenvalues)[::-1]
    eigenvalues = (eigenvalues[idx])[:k]
    eigenvectors = (eigenvectors[:, idx])[:, :k]
    x_denoised_centered = (x_centered@eigenvectors)@(eigenvectors.T)
    x_denoised = x_denoised_centered + x_mean
    return x_denoised

def denoise_means_covs(X, k):
    # compute mean and covariance matrix for the denoised X
    # k uses the top k singular values for X
    denoised_x = denoise_X(X, k)
    return denoised_x.mean(axis=0), np.cov(denoised_x, rowvar=False, bias=False)

def win_probability(
    means,
    cov,
    name1,
    name2,
    name_to_idx,
):
    # means and cov computed from denoise_means_covs
    # name1, name2 are the two users to compare
    # name_to_idx maps the names to the index in X, it is computed above
    # returns the probability of name1 beating name2 according to gaussians for each team's score fit to season data
    idx1, idx2 = name_to_idx[name1], name_to_idx[name2]
    mean = means[idx1] - means[idx2]
    var = cov[idx1, idx1] + cov[idx2, idx2] - 2 * cov[idx1, idx2]
    win_prob = 1-norm.cdf((0 - mean)/np.sqrt(var), loc=0, scale=1)
    return win_prob

def next_round(
    matchups,
    seeds,
    results,
):
    # matchups: list of tuples of names which are playing against each other.
    #          such as [('Matthew Clauss',), ('William Baker', 'Owen Wright'), ('Daniel Bauman', 'Jacob Kaufman'), ('James Young',)]
    # seeds: list of tuples of the seed of each person in the matchup.
    #        such as [(1,), (5, 4), (6, 3), (2,)]
    # results: list of 0 or 1 indicies indicating who won each matchup.
    #          such as [0, 1, 0, 0]
    # returns the next round matchups and seeds lists in the bracket
    names, seeds = list(zip(*sorted([(matchup[result], seed[result]) for matchup, seed, result in zip(matchups, seeds, results)], key=lambda x: x[1])))
    new_matchups = list(zip(names[:len(names)//2], names[len(names)//2:][::-1]))
    new_seeds = list(zip(names[:len(names)//2], names[len(names)//2:][::-1]))
    return new_matchups, new_seeds

def get_all_possible_matchup_sequences(
    matchups,
    seeds,
):
    # matchups: list of tuples of names which are playing against each other.
    #          such as [('Matthew Clauss',), ('William Baker', 'Owen Wright'), ('Daniel Bauman', 'Jacob Kaufman'), ('James Young',)]
    # seeds: list of tuples of the seed of each person in the matchup.
    #        such as [(1,), (5, 4), (6, 3), (2,)]
    # returns a list of dicts with all possible matchup/result sequences that the bracket could have
    if len(matchups) == 0:
        return []
    total_matchups = []
    for results in itertools.product(*list(map(lambda x: tuple(range(len(x))), matchups))): 
        all_sequences = get_all_possible_matchup_sequences(*next_round(matchups, seeds, results))
        if len(all_sequences) > 0:
            for sequence in all_sequences:
                total_matchups.append({
                    'matchups':matchups+sequence['matchups'],
                    'results': results+sequence['results'],
                })
        else:
            total_matchups.append({
                'matchups': matchups,
                'results': results,
            })
    return total_matchups

def get_matchup_sequence_p(
    means,
    cov,
    matchups,
    results,
    name_to_idx,
):
    # means and cov computed from denoise_means_covs
    # matchups: list of tuples of names which are playing against each other.
    #          such as [('Matthew Clauss',), ('William Baker', 'Owen Wright'), ('Daniel Bauman', 'Jacob Kaufman'), ('James Young',)]
    # results: list of 0 or 1 indicies indicating who won each matchup.
    #          such as [0, 1, 0, 0]
    # name_to_idx maps the names to the index in X, it is computed above
    # returns the probability of the bracket resolving according to results
    prob = 1.0
    for matchup, result in zip(matchups, results):
        if len(matchup) == 1:
            prob *= 1.0
        else:
            prob *= win_probability(means, cov, matchup[result], matchup[1-result], name_to_idx)
    return prob

def get_win_probs(
    means,
    cov,
    matchups,
    seeds,
    name_to_idx,
):
    # means and cov computed from denoise_means_covs
    # matchups: list of tuples of names which are playing against each other.
    #          such as [('Matthew Clauss',), ('William Baker', 'Owen Wright'), ('Daniel Bauman', 'Jacob Kaufman'), ('James Young',)]
    # seeds: list of tuples of the seed of each person in the matchup.
    #        such as [(1,), (5, 4), (6, 3), (2,)]
    # name_to_idx maps the names to the index in X, it is computed above
    # returns the bracket win probabilities for each person in the bracket
    all_sequences = get_all_possible_matchup_sequences(
        matchups=matchups,
        seeds=seeds,
    )
    full_probs = defaultdict(float)
    for sequence in all_sequences:
        assert len(sequence['matchups']) == 7
        p = get_matchup_sequence_p(means, cov, sequence['matchups'], sequence['results'], name_to_idx)
        winner = sequence['matchups'][-1][sequence['results'][-1]]
        full_probs[winner] += p

    return full_probs

In [9]:
# sweep over all values for k
for k in range(1, 11):
    means, cov = denoise_means_covs(X, k)
    win_probs = get_win_probs(
        means=means,
        cov=cov,
        matchups=matchups,
        seeds=matchup_seeds,
        name_to_idx=name_to_idx,
    )
    print(f'k={k}')
    for name, prob in win_probs.items():
        print(name, prob)
    print('='*25)


k=1
James Young 0.13086529813173045
Matthew Clauss 0.7260892347686632
William Baker 0.0534016172826053
Daniel Bauman 0.06015127672379922
Jacob Kaufman 0.0294925730932019
Owen Wright 0.0
k=2
James Young 0.1479694121036678
Matthew Clauss 0.518643206035914
William Baker 0.09945424699488264
Daniel Bauman 0.054111040238866794
Jacob Kaufman 0.06556672769919275
Owen Wright 0.11425536692747607
k=3
James Young 0.1651811930836119
Matthew Clauss 0.4085734808636555
William Baker 0.13713882023830493
Daniel Bauman 0.09402525649150109
Jacob Kaufman 0.07478992239804001
Owen Wright 0.12029132692488649
k=4
James Young 0.17991625125783686
Matthew Clauss 0.3931104609977043
William Baker 0.12982188557952792
Daniel Bauman 0.09408145871566702
Jacob Kaufman 0.08015218500429733
Owen Wright 0.12291775844496652
k=5
James Young 0.18215152987803696
Matthew Clauss 0.38232425481464777
William Baker 0.13155184985071577
Daniel Bauman 0.09569397221868842
Jacob Kaufman 0.08176324780678153
Owen Wright 0.12651514543112946

In [10]:
# lets go with k=5
k = 5

means, cov = denoise_means_covs(X, k)
win_probs = get_win_probs(
    means=means,
    cov=cov,
    matchups=matchups,
    seeds=matchup_seeds,
    name_to_idx=name_to_idx,
)

print(f'k={k}')
for name, prob in win_probs.items():
    print(name, prob)


k=5
James Young 0.18215152987803696
Matthew Clauss 0.38232425481464777
William Baker 0.13155184985071577
Daniel Bauman 0.09569397221868842
Jacob Kaufman 0.08176324780678153
Owen Wright 0.12651514543112946


In [12]:
# get most likely brackets
all_sequences = get_all_possible_matchup_sequences(
    matchups=matchups,
    seeds=matchup_seeds,
)
full_probs = defaultdict(float)
sequence_ps = []
for sequence in all_sequences:
    p = get_matchup_sequence_p(means, cov, sequence['matchups'], sequence['results'], name_to_idx)
    sequence_ps.append((sequence, p))
sorted(sequence_ps, key=lambda x: x[1])[::-1]

[({'matchups': [('Matthew Clauss',),
    ('William Baker', 'Owen Wright'),
    ('Daniel Bauman', 'Jacob Kaufman'),
    ('James Young',),
    ('Matthew Clauss', 'Daniel Bauman'),
    ('James Young', 'William Baker'),
    ('Matthew Clauss', 'William Baker')],
   'results': (0, 0, 0, 0, 0, 1, 0)},
  0.056149126319161184),
 ({'matchups': [('Matthew Clauss',),
    ('William Baker', 'Owen Wright'),
    ('Daniel Bauman', 'Jacob Kaufman'),
    ('James Young',),
    ('Matthew Clauss', 'Daniel Bauman'),
    ('James Young', 'Owen Wright'),
    ('Matthew Clauss', 'Owen Wright')],
   'results': (0, 1, 0, 0, 0, 1, 0)},
  0.055104732994349176),
 ({'matchups': [('Matthew Clauss',),
    ('William Baker', 'Owen Wright'),
    ('Daniel Bauman', 'Jacob Kaufman'),
    ('James Young',),
    ('Matthew Clauss', 'Daniel Bauman'),
    ('James Young', 'William Baker'),
    ('James Young', 'Matthew Clauss')],
   'results': (0, 0, 0, 0, 0, 0, 1)},
  0.04851818749624438),
 ({'matchups': [('Matthew Clauss',),
    ('W

In [13]:
# now for the gambling optimization part
from pulp import LpProblem, LpMaximize, LpVariable, lpSum

In [14]:

def optimize_bets(
    betting_odds_limits,
    win_probabilities,
    total_budget,
):
    # creates a linear program to find the bet placements that get positive ev
    # betting_odds_limits: a dict of name to tuple of (odds, bet_limit)
    #                      such as {
    #                           "Matthew Clauss": (+190, 105), # (odds, bet_limit)
    #                           "James Young":   (+325, 60),
    #                           "Jacob Kaufman":     (+535, 35),
    #                           "William Baker":   (+545, 35),
    #                           "Owen Wright":    (+580, 35),
    #                           "Daniel Bauman":    (+825, 25),
    #                      }
    # win_probabilities: the odds listed above
    # total_budget: the total budget you are willing to bet with
    # returns (a dict with the amounts to bet on each person, the ev of these bets)
    problem = LpProblem("Bet_Allocation", LpMaximize)
    bets = {p: LpVariable(f"Bet_{p}", lowBound=0) for p in betting_odds_limits.keys()}
    problem += lpSum([bets[p] for p in betting_odds_limits.keys()]) <= total_budget, "Total_Budget"
    for p, (_, max_b) in betting_odds_limits.items():
        problem += bets[p] <= max_b, f"Max_Bet_{p}"
    
    decimal_odds = {p: 1 + (odds/100.0) for p, (odds, _) in betting_odds_limits.items()}
    problem += lpSum([bets[p]*(decimal_odds[p]*win_probabilities[p] - 1) for p in betting_odds_limits.keys()])
    
    problem.solve()

    to_bet = dict()
    for p in betting_odds_limits.keys():
        to_bet[p] = bets[p].varValue
    ev = sum([bets[p].varValue*(decimal_odds[p]*win_probabilities[p]-1) for p in betting_odds_limits.keys()])
    return to_bet, ev

In [15]:

betting_odds_limits = {
    "Matthew Clauss": (+190, 105), # (odds, bet_limit)
    "James Young":   (+325, 60),
    "Jacob Kaufman":     (+535, 35),
    "William Baker":   (+545, 35),
    "Owen Wright":    (+580, 35),
    "Daniel Bauman":    (+825, 25),
}

to_bet, ev = optimize_bets(
    betting_odds_limits=betting_odds_limits,
    win_probabilities=win_probs,
    total_budget=200,
)

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /opt/homebrew/Caskroom/miniforge/base/lib/python3.10/site-packages/pulp/solverdir/cbc/osx/64/cbc /var/folders/hh/wxw64bn57dn34694gw8k3b2w0000gp/T/15b3d509deed4db4a47cbdb20272d987-pulp.mps max timeMode elapsed branch printingOptions all solution /var/folders/hh/wxw64bn57dn34694gw8k3b2w0000gp/T/15b3d509deed4db4a47cbdb20272d987-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 12 COLUMNS
At line 31 RHS
At line 39 BOUNDS
At line 40 ENDATA
Problem MODEL has 7 rows, 6 columns and 12 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Presolve 0 (-7) rows, 0 (-6) columns and 0 (-12) elements
Empty problem - 0 rows, 0 columns and 0 elements
Optimal - objective value 11.417736
After Postsolve, objective 11.417736, infeasibilities - dual 0 (0), primal 0 (0)
Optimal objective 11.41773559 - 0 iterations time 0.002, Presolve 0.00
Option f

In [16]:
for name, amount in to_bet.items():
    print(f"{name}: ${amount}")
print()
print(f"EV: {ev}")

Matthew Clauss: $105.0
James Young: $0.0
Jacob Kaufman: $0.0
William Baker: $0.0
Owen Wright: $0.0
Daniel Bauman: $0.0

EV: 11.417735591060252


In [None]:
# parlay EV = 0.8956582635
# total EV = 12.3133938546