In [1]:
%load_ext autoreload
%autoreload 2

In [30]:
import torch
import pandas as pd
from model import PitcherTransformerModel
from dataset import MLB_Pitcher_Dataset, MLB_Batter_Dataset
from preprocess_data import preprocess
from config import pitcher_features, batter_features, model_config
from torch.utils.data import DataLoader

# silence all warnings
import warnings

warnings.filterwarnings("ignore")

In [31]:
df = pd.read_excel('live_data/03-19-2025-mlb-season-player-feed.xlsx', skiprows=[0])

In [32]:
pitcher_model = PitcherTransformerModel(
    pitcher_input_dim=model_config['pitcher_input_dim'],
    opp_batter_input_dim=model_config['opp_batter_input_dim'],
    model_dim=model_config['model_dim'], 
    n_heads=model_config['n_heads'], 
    num_layers=model_config['num_layers'], 
    sequence_length=model_config['sequence_length'],
)

checkpoint = torch.load("live_model/best_model.pth", map_location=torch.device('cuda'))
# Load the saved model state
pitcher_model.load_state_dict(checkpoint['model_state_dict'])
pitcher_model.eval()

PitcherTransformerModel(
  (transformer): CrossAttentionTransformer(
    (primary_embedding): Linear(in_features=10, out_features=64, bias=True)
    (opponent_embedding): Linear(in_features=9, out_features=64, bias=True)
    (primary_encoder): TransformerEncoder(
      (layers): ModuleList(
        (0-1): 2 x TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
          )
          (linear1): Linear(in_features=64, out_features=2048, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear2): Linear(in_features=2048, out_features=64, bias=True)
          (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.1, inplace=False)
          (dropout2): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (opponent_encoder): TransformerEnc

In [33]:
batter_scaler = torch.load("batter_scaler.pt")
pitcher_scaler = torch.load("pitcher_scaler.pt")

In [34]:
pitcher_df, batter_df = preprocess(df, pitcher_scaler=pitcher_scaler, batter_scaler=batter_scaler, inference=True)

pitcher_dataset = MLB_Pitcher_Dataset(
    pitcher_df, 
    batter_df, 
    sequence_length=5, 
    pitcher_features=pitcher_features, 
    batter_features=batter_features
)

batter_dataset = MLB_Batter_Dataset(
    batter_df, 
    pitcher_df, 
    sequence_length=5, 
    pitcher_features=pitcher_features, 
    batter_features=batter_features
)

pitcher_loader = DataLoader(pitcher_dataset, batch_size=1, shuffle=False)
batter_loader = DataLoader(batter_dataset, batch_size=1, shuffle=False)

In [None]:
for batch in pitcher_dataset:
    primary_seq, opponent_seq, targets = batch
    outputs = pitcher_model(primary_seq.to('cuda'), opponent_seq.to('cuda'))
    print(outputs)

In [2]:
import statsapi

In [41]:
from datetime import datetime

# Get today's date in the required format (MM/DD/YYYY)
today = datetime.today().strftime("%m/%d/%Y")

# Retrieve today's schedule.
# This returns a list of dictionaries—one per game.
games = statsapi.schedule(start_date=today, end_date=today)

for game in games:
    game_pk = game.get('game_id')
    away_lineup = statsapi.get("game", {"gamePk": game_pk})['liveData']['boxscore']['teams']['away']['batters']
    away_lineup = [next(iter(statsapi.lookup_player(id, gameType="R", season=None, sportId=1)), {}).get('fullName', 'TBD') for id in away_lineup]
    home_lineup = statsapi.get("game", {"gamePk": game_pk})['liveData']['boxscore']['teams']['home']['batters']
    home_lineup = [next(iter(statsapi.lookup_player(id, gameType="R", season=None, sportId=1)), {}).get('fullName', 'TBD') for id in home_lineup]
    # Extract matchup details directly from the schedule
    matchup = {
        'home': game.get('home_name'),
        'away': game.get('away_name')
    }
    print(f"Matchup: {matchup['away']} @ {matchup['home']}")
    print("Away:", away_lineup)
    print("Home:", home_lineup)
    
    # Retrieve the probable starting pitcher.
    # The schedule may include a key for the probable pitcher.
    probable_pitcher_away = statsapi.get("schedule", {"sportId": 1, "hydrate": "probablePitcher(note)", "gamePk": game_pk})['dates'][0]['games'][0]['teams']['away'].get('probablePitcher', {}).get('fullName', 'TBD')
    probable_pitcher_home = statsapi.get("schedule", {"sportId": 1, "hydrate": "probablePitcher(note)", "gamePk": game_pk})['dates'][0]['games'][0]['teams']['home'].get('probablePitcher', {}).get('fullName', 'TBD')
    print(f"Probable Starting Pitchers: {probable_pitcher_away} vs. {probable_pitcher_home}")
    
    print("-" * 50)


Matchup: Philadelphia Phillies @ Atlanta Braves
Away: ['Johan Rojas', 'TBD', 'Alec Bohm', 'Kody Clemens', 'TBD', 'Edmundo Sosa', 'TBD', 'Rafael Marchán', 'TBD', 'Buddy Kennedy', 'TBD', 'Cal Stevenson', 'TBD', 'TBD', 'TBD', 'TBD', 'TBD', 'TBD', 'Kyle Tyler', 'TBD', 'TBD', 'TBD', 'TBD']
Home: ['Jurickson Profar', 'TBD', 'Austin Riley', 'Nick Allen', 'Matt Olson', 'TBD', 'Marcell Ozuna', 'TBD', 'Ozzie Albies', 'Bryan De La Cruz', 'Michael Harris II', 'Eli White', 'Chadwick Tromp', 'TBD', 'Jarred Kelenic', 'Luke Williams', 'Orlando Arcia', 'TBD', 'TBD', 'Daysbel Hernández', 'Aaron Bummer', 'Raisel Iglesias', 'TBD']
Probable Starting Pitchers: Nabil Crismatt vs. Héctor Neris
--------------------------------------------------
Matchup: Tampa Bay Rays @ Toronto Blue Jays
Away: ['Jonathan Aranda', 'Christopher Morel', 'Richie Palacios', 'Curtis Mead', 'Ben Rortvedt', 'TBD', 'José Caballero', 'TBD', 'TBD', 'TBD', 'TBD', 'TBD', 'TBD', 'Taj Bradley', 'TBD', 'TBD', 'Hunter Bigge']
Home: ['Will Wagn

In [61]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

def scrape_strikeout_odds(url):
    # Initialize the Selenium driver (adjust options as needed)
    options = webdriver.ChromeOptions()
    #options.add_argument("--headless") 
    # Uncomment the following line to run headless:
    options.add_argument("--headless")
    driver = webdriver.Chrome(options=options)
    
    # Open the URL
    driver.get(url)
    
    # Wait for the list items to be present on the page
    wait = WebDriverWait(driver, 15)
    li_elements = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "li.border")))
    
    results = []
    
    for li in li_elements:
        # Extract the player's name from the data-name attribute.
        name = li.get_attribute("data-name")
        
        # Try to extract strikeout odds from the element with class "table-list-best-odds".
        # try:
        # print(f'getting odds for {name}')
        # best_odds_elem = li.find_element(By.CSS_SELECTOR, ".game_odds")
        # print(best_odds_elem.text)
        # best_odds = best_odds_elem.text.strip()
        # except:
        # best_odds = None
        
        # Sometimes, additional details (like strikeout line and odds) are hidden in an expandable section.
        # Check for a button with data-role="openable" and click it if present.
        print('opening drawer')
        openable_text = None
        # try:
        open_button = li.find_element(By.CSS_SELECTOR, "button[data-role='openable']")
            # Click the button to expand the hidden odds details.
        open_button.click()
            # Wait a short while for the content to expand
        time.sleep(3)
            # Find the openable drawer; it usually has a class like "drawer"
        print('looking for odds')
        # print html of li
        #print(li.get_attribute('innerHTML'))
        odds_elements = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "table.odds-table")))
        print(odds_elements)
        openable_div = li.find_element(By.CSS_SELECTOR, "div.table-list-rowsss")
        print(openable_div.get_attribute('innerHTML'))
        openable_text = openable_div.text.strip()
        # except Exception as e:
            # If no expandable content exists or an error occurs, ignore.
        # openable_text = None
        
        # Sometimes the odds might be in additional columns:
        odds_cols = li.find_elements(By.CSS_SELECTOR, "div.table-list-colll")
        odds_texts = [col.text.strip() for col in odds_cols if col.text.strip()]
        
        results.append({
            "name": name,
            "best_odds": 0,
            "odds_texts": odds_texts,
            "openable_text": openable_text
        })
    
    driver.quit()
    return results

# Example usage:
url = "https://www.scoresandodds.com/mlb/props?date=2025-03-20"
odds_data = scrape_strikeout_odds(url)
for item in odds_data:
    print(item)


opening drawer
looking for odds


TimeoutException: Message: 


In [70]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

def scrape_strikeout_odds(url):
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")
    driver = webdriver.Chrome(options=options)
    
    driver.get(url)
    wait = WebDriverWait(driver, 15)
    li_elements = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "li.border")))
    
    results = []
    
    for li in li_elements:
        # Extract the player's name from the data-name attribute.
        name = li.get_attribute("data-name")
        
        # Try to extract strikeout odds from the element with class "table-list-best-odds".
        # try:
        # print(f'getting odds for {name}')
        # best_odds_elem = li.find_element(By.CSS_SELECTOR, ".game_odds")
        # print(best_odds_elem.text)
        # best_odds = best_odds_elem.text.strip()
        # except:
        # best_odds = None
        
        # Sometimes, additional details (like strikeout line and odds) are hidden in an expandable section.
        # Check for a button with data-role="openable" and click it if present.
        print('opening drawer')
        openable_text = None
        # try:
        open_button = li.find_element(By.CSS_SELECTOR, "button[data-role='openable']")
        drawer_selector = open_button.get_attribute("data-content")
        # Click the button to open the drawer.
        open_button.click()
        
        # Wait for the drawer to be visible.
        try:
            drawer = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div.table-list-row.drawer.scroll.highlight.active')))
        except Exception as e:
            print(f"Error waiting for drawer for {name}: {e}")
            continue

        print('here...')
        time.sleep(3)
        print(drawer.get_attribute('innerHTML'))
        # Once open, find the table body with class "active".
        print('after')
        try:
            
            tbody = drawer.find_element(By.CSS_SELECTOR, "table.odds-table")
            rows = tbody.find_elements(By.CSS_SELECTOR, "tr")
        except Exception as e:
            rows = []
            print(f"Error finding table rows for {name}: {e}")
        
        odds_list = []
        for row in rows:
            cells = row.find_elements(By.TAG_NAME, "td")
            if len(cells) >= 2:
                # Get player info from first cell (even if hidden, use JS to retrieve innerText).
                cell_player = cells[0]
                row_player = driver.execute_script("return arguments[0].innerText;", cell_player).strip()
                
                # Get odds info from the second cell.
                odds_cell = cells[1]
                try:
                    data_value = odds_cell.find_element(By.CSS_SELECTOR, "span.data-value").text.strip()
                except:
                    data_value = None
                try:
                    data_odds = odds_cell.find_element(By.CSS_SELECTOR, "small.data-odds").text.strip()
                except:
                    data_odds = None
                
                odds_list.append({
                    "row_player": row_player,
                    "data_value": data_value,
                    "data_odds": data_odds
                })
        
        results.append({
            "player": player_name,
            "odds_data": odds_list
        })
        
        # Optionally, click the button again to close the drawer.
        button.click()
        time.sleep(0.5)  # Give a short delay for the drawer to collapse.
    
    driver.quit()
    return results

# Example usage:
url = "https://www.scoresandodds.com/mlb/props?date=2025-03-20" 
strikeout_odds = scrape_strikeout_odds(url)
for entry in strikeout_odds:
    print(entry)


opening drawer
here...

after
Error finding table rows for taj bradley: Message: no such element: Unable to locate element: {"method":"css selector","selector":"table.odds-table"}
  (Session info: chrome=134.0.6998.90); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
	GetHandleVerifier [0x00007FF7BCCBFE65+26629]
	(No symbol) [0x00007FF7BCC26030]
	(No symbol) [0x00007FF7BCAB931A]
	(No symbol) [0x00007FF7BCB0F8E7]
	(No symbol) [0x00007FF7BCB0FB1C]
	(No symbol) [0x00007FF7BCB0228C]
	(No symbol) [0x00007FF7BCB37AEF]
	(No symbol) [0x00007FF7BCB02156]
	(No symbol) [0x00007FF7BCB37CC0]
	(No symbol) [0x00007FF7BCB60169]
	(No symbol) [0x00007FF7BCB37883]
	(No symbol) [0x00007FF7BCB00550]
	(No symbol) [0x00007FF7BCB01803]
	GetHandleVerifier [0x00007FF7BD0172DD+3529853]
	GetHandleVerifier [0x00007FF7BD02DA42+3621858]
	GetHandleVerifier [0x00007FF7BD0224F3+3575443]
	GetHandleVerifier [0x000

NameError: name 'player_name' is not defined

In [None]:
odds_url = 'https://api.the-odds-api.com'

sport = 'baseball_mlb'
eventId = 'pitcher_strikeouts'
apiKey = '58c2c231e22f3e89a15754d7e88cb4bd'
regions = 'us'
markets = 'pitcher_strikeouts'
dateFormat = 'iso'
oddsFormat = 'american'

endpoint = f'/v4/sports/{sport}/events/{eventId}/odds?apiKey={apiKey}&regions={regions}&markets={markets}&dateFormat={dateFormat}&oddsFormat={oddsFormat}'
url = odds_url + endpoint

import requests
import json

response = requests.get(odds_url)
data = response.json()
print(json.dumps(data, indent=2))


{
  "message": "Markets not supported by this endpoint: pitcher_strikeouts",
  "error_code": "INVALID_MARKET",
  "details_url": "https://the-odds-api.com/liveapi/guides/v4/api-error-codes.html#invalid-market"
}
