In [None]:
import requests
import pandas as pd
import numpy as np
import math
from io import StringIO

In [None]:

# data fetching

def fetch_live_predictions(api_key):
    """
    Fetch pre-tournament predictions from DataGolf.
    Returns a DataFrame of predictions using the 'baseline' model.
    """
    url = f"https://feeds.datagolf.com/preds/pre-tournament?tour=pga&key={api_key}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        return pd.DataFrame(data['baseline'])
    else:
        raise Exception("Failed to fetch live predictions")

def fetch_live_odds(api_key):
    """
    Fetch live betting odds (for the 'win' market) from DataGolf.
    Returns a DataFrame of odds.
    """
    url = f"https://feeds.datagolf.com/betting-tools/outrights?market=win&key={api_key}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        return pd.DataFrame(data['odds'])
    else:
        raise Exception("Failed to fetch live odds")

In [None]:
# funcs
def compute_implied_prob(decimal_odds):
    """Compute implied probability from decimal odds."""
    return 1 / decimal_odds

def decimal_to_american(decimal_odds):
    """Convert decimal odds to American odds format."""
    if decimal_odds >= 2.0:
        return f"+{int((decimal_odds - 1) * 100)}"
    else:
        return f"-{int(100 / (decimal_odds - 1))}"

def strip_vig(odds_series):
    """
    If multiple sportsbooks are available, you can use this to de-vig the odds.
    This function calculates raw implied probabilities and normalizes them.
    """
    raw_implied = odds_series.apply(lambda x: 1 / x)
    total = raw_implied.sum()
    vig_free_probs = raw_implied / total
    return vig_free_probs


In [None]:

# optimization

def custom_decay_weighting(data, decay_rate):
    """
    Apply exponential decay weighting to a list/array of historical values.
    Higher decay_rate means recent rounds are weighted more.
    """
    n = len(data)
    weights = np.array([math.exp(-decay_rate * (n - i - 1)) for i in range(n)])
    weighted_avg = np.sum(np.array(data) * weights) / np.sum(weights)
    return weighted_avg

def build_custom_skill_estimate(historical_sg_series, decay_rate=0.1):
    """
    Compute a custom weighted average (skill estimate) from historical strokes gained.
    Replace dummy_historical_sg with real historical SG data if available.
    """
    return custom_decay_weighting(historical_sg_series, decay_rate)

def adjust_for_weather(sg_prediction, wind_speed, tee_time):
    """
    Dummy function to adjust a strokes gained prediction based on weather and tee time.
    - Applies a penalty for wind speed.
    - Adjusts for tee time (earlier tee times might get a bonus or penalty).
    """
    # For example: subtract 0.01 stroke per mph of wind, and adjust tee time linearly.
    adjustment = wind_speed * 0.01 - (tee_time / 12.0) * 0.05
    return sg_prediction + adjustment


In [None]:
# simualte

def simulate_tournament(skill_estimates, num_simulations=10000):
    """
    Simulate a tournament using Monte Carlo simulation.
    skill_estimates: dictionary {player_name: (mean, variance)}
    Assumes performance is normally distributed.
    Returns a dictionary with simulated win probabilities.
    """
    wins = {player: 0 for player in skill_estimates.keys()}
    for _ in range(num_simulations):
        scores = {player: np.random.normal(mean, math.sqrt(variance))
                  for player, (mean, variance) in skill_estimates.items()}
        winner = min(scores, key=scores.get)
        wins[winner] += 1
    for player in wins:
        wins[player] /= num_simulations
    return wins


In [19]:
# main func

def main():
    API_KEY = "enter key"
    print("Fetching live predictions and odds...")


    pre_tourney_df = fetch_live_predictions(API_KEY)
    odds_df = fetch_live_odds(API_KEY)


    #  Merge predictions with FanDuel odds

    fanduel_df = odds_df[['player_name', 'fanduel']].dropna(subset=['fanduel'])
    merged_df = pd.merge(pre_tourney_df[['player_name', 'win']], fanduel_df, on='player_name', how='inner')
    merged_df.rename(columns={'win': 'model_win_prob', 'fanduel': 'fanduel_odds'}, inplace=True)

    #  Calculate implied probabilities and value differences
    merged_df['implied_prob'] = merged_df['fanduel_odds'].apply(compute_implied_prob)
    merged_df['value_diff'] = merged_df['model_win_prob'] - merged_df['implied_prob']

    # Convert values for display
    merged_df['model_win_prob_pct'] = (merged_df['model_win_prob'] * 100).round(2)
    merged_df['implied_prob_pct'] = (merged_df['implied_prob'] * 100).round(2)
    merged_df['value_diff_pct'] = (merged_df['value_diff'] * 100).round(2)
    merged_df['american_odds'] = merged_df['fanduel_odds'].apply(decimal_to_american)

    # Display Value Analysis
    print("\n🏆 Top 10 Value Players (Model vs FanDuel Odds):")
    print(merged_df.sort_values(by='value_diff', ascending=False)[
          ['player_name', 'model_win_prob_pct', 'fanduel_odds', 'american_odds', 'implied_prob_pct', 'value_diff_pct']].head(10))

    print("\n📉 Bottom 10 Value Players (Overvalued by FanDuel):")
    print(merged_df.sort_values(by='value_diff')[[
          'player_name', 'model_win_prob_pct', 'fanduel_odds', 'american_odds', 'implied_prob_pct', 'value_diff_pct']].head(10))


    # Build Custom Skill Estimates for Simulation

    #  assume a constant variance (1.0) for all players for simplicity.
    skill_estimates = {row['player_name']: (row['model_win_prob'], 1.0)
                       for _, row in merged_df.iterrows()}

    # Monte Carlo Simulation to Estimate Tournament Outcomes
    sim_results = simulate_tournament(skill_estimates, num_simulations=1000)

    print("\nSimulated Tournament Win Probabilities (dummy simulation):")
    # Sort and display top 10 simulated win probabilities
    sim_results_sorted = sorted(sim_results.items(), key=lambda x: x[1], reverse=True)
    for player, win_prob in sim_results_sorted[:10]:
        print(f"{player}: {win_prob*100:.2f}%")

    # EV Calculation
    merged_df['payout'] = merged_df['fanduel_odds'] - 1
    merged_df['ev'] = (merged_df['payout'] * merged_df['model_win_prob']) - (1 - merged_df['model_win_prob'])
    merged_df['ev_pct'] = (merged_df['ev'] * 100).round(2)

    print("\nDetailed EV Analysis (per $1 bet):")
    print(merged_df.sort_values(by='ev', ascending=False)[
          ['player_name', 'model_win_prob_pct', 'fanduel_odds', 'american_odds', 'implied_prob_pct', 'ev_pct']].head(10))


if __name__ == "__main__":
    main()

Fetching live predictions and odds...

🏆 Top 10 Value Players (Model vs FanDuel Odds):
            player_name  model_win_prob_pct  fanduel_odds american_odds  \
61       Kanaya, Takumi                0.46         301.0        +30000   
25        McGreevy, Max                1.02         111.0        +11000   
17        Novak, Andrew                1.33          81.0         +8000   
52        Sigg, Greyson                0.52         221.0        +22000   
37       Hisatsune, Ryo                0.71         151.0        +15000   
76          Kohles, Ben                0.30         351.0        +35000   
77   Phillips, Chandler                0.30         351.0        +35000   
31           Ghim, Doug                0.83         121.0        +12000   
89        Goodwin, Noah                0.24         401.0        +40000   
120           Pak, John                0.08        1001.0       +100000   

     implied_prob_pct  value_diff_pct  
61               0.33            0.12  
25     