In [25]:
from draft_kings import Client
from draft_kings.data import Sport
import pprint
import pandas as pd
import requests
from typing import List
import time
import duckdb as dd
import numpy as np

In [26]:
client = Client()

# Get current NFL contests
contests_details = client.contests(sport=Sport.NFL)

In [27]:
contest_ids = [contest.contest_id for contest in contests_details.contests 
               if contest.is_guaranteed 
               and contest.draft_group_id == 134308
               and contest.is_double_up == False
               and contest.is_fifty_fifty == False]

In [39]:
def process_contest_detail(contest_detail):
    """Process a single contest detail and return a dictionary with the extracted data"""
    
    # Extract basic contest information
    contest_key = contest_detail['contestKey']
    entry_fee = contest_detail['entryFee']
    total_payouts = contest_detail['totalPayouts']
    maximum_entries = contest_detail['maximumEntries']
    maximum_entries_per_user = contest_detail['maximumEntriesPerUser']
    name = contest_detail['name']

    # Extract payout information
    payout_summary = contest_detail['payoutSummary']

    # Find the lowest minPosition (should be 1 for first place)
    lowest_min_position = min(payout['minPosition'] for payout in payout_summary)
    highest_payout_value = None

    for payout in payout_summary:
        if payout['minPosition'] == lowest_min_position:
            highest_payout_value = payout['payoutDescriptions'][0]['value']
            break

    # Find the highest maxPosition (last place that pays out)
    highest_max_position = max(payout['maxPosition'] for payout in payout_summary)
    lowest_payout_value = None

    for payout in payout_summary:
        if payout['maxPosition'] == highest_max_position:
            lowest_payout_value = payout['payoutDescriptions'][0]['value']
            break

    # Store the place values
    highest_place_paid = lowest_min_position  # Best place (1st)
    lowest_place_paid = highest_max_position  # Worst place that still pays

    return {
        'contestKey': contest_key,
        'entryFee': entry_fee,
        'totalPayouts': total_payouts,
        'maximumEntries': maximum_entries,
        'maximumEntriesPerUser': maximum_entries_per_user,
        'name': name,
        'highest_place_paid': highest_place_paid,
        'lowest_place_paid': lowest_place_paid,
        'highest_payout_value': highest_payout_value,
        'lowest_payout_value': lowest_payout_value,
        'rake_pct' : (1 - round((total_payouts/(maximum_entries*entry_fee)),3))*100
    }

def fetch_contest_data_from_api(contest_ids: List, delay: float = 0.1):
    """
    Fetch contest data from DraftKings API for multiple contest IDs
    
    Args:
        contest_ids: List of contest IDs to fetch
        delay: Delay between requests in seconds (to be respectful to the API)
    
    Returns:
        pandas.DataFrame with contest data
    """
    contests_data = []
    failed_requests = []
    
    print(f"Fetching data for {len(contest_ids)} contests...")
    
    for i, contest_id in enumerate(contest_ids):
        url = f'https://api.draftkings.com/contests/v1/contests/{contest_id}?format=json'
        
        try:
            # print(f"Fetching contest {i+1}/{len(contest_ids)}: {contest_id}")
            
            # Make the API request
            response = requests.get(url)
            response.raise_for_status()  # Raises an exception for bad status codes
            
            # Parse JSON response
            data = response.json()
            
            # Check if contestDetail exists in the response
            if 'contestDetail' in data:
                contest_detail = data['contestDetail']
                contests_data.append(process_contest_detail(contest_detail))
            else:
                print(f"Warning: No contestDetail found for contest {contest_id}")
                failed_requests.append(contest_id)
            
            # Be respectful to the API - add delay between requests
            time.sleep(delay)
            
        except requests.exceptions.RequestException as e:
            print(f"Error fetching contest {contest_id}: {e}")
            failed_requests.append(contest_id)
            continue
        except KeyError as e:
            print(f"Error parsing data for contest {contest_id}: Missing key {e}")
            failed_requests.append(contest_id)
            continue
        except Exception as e:
            print(f"Unexpected error for contest {contest_id}: {e}")
            failed_requests.append(contest_id)
            continue
    
    print(f"\nSuccessfully fetched {len(contests_data)} contests")
    if failed_requests:
        print(f"Failed to fetch {len(failed_requests)} contests: {failed_requests}")
    
    return pd.DataFrame(contests_data)

In [29]:
def create_payout_breakdown_df(contest_ids: List, delay: float = 0.1):
    """
    Create a DataFrame with individual records for each position and payout amount
    
    Args:
        contest_ids: List of contest IDs to fetch
        delay: Delay between requests in seconds
    
    Returns:
        pandas.DataFrame with columns: contestKey, position, payout_value
    """
    payout_records = []
    failed_requests = []
    
    print(f"Fetching payout data for {len(contest_ids)} contests...")
    
    for i, contest_id in enumerate(contest_ids):
        url = f'https://api.draftkings.com/contests/v1/contests/{contest_id}?format=json'
        
        try:
            # print(f"Processing contest {i+1}/{len(contest_ids)}: {contest_id}")
            
            # Make the API request
            response = requests.get(url)
            response.raise_for_status()
            
            # Parse JSON response
            data = response.json()
            
            if 'contestDetail' in data:
                contest_detail = data['contestDetail']
                contest_key = contest_detail['contestKey']
                payout_summary = contest_detail['payoutSummary']
                
                # Process each payout tier
                for payout_tier in payout_summary:
                    min_pos = payout_tier['minPosition']
                    max_pos = payout_tier['maxPosition']
                    payout_value = payout_tier['payoutDescriptions'][0]['value']
                    
                    # Create a record for each individual position
                    for position in range(min_pos, max_pos + 1):
                        payout_records.append({
                            'contestKey': contest_key,
                            'position': position,
                            'payout_value': payout_value
                        })
                
                # print(f"  -> Added {sum(payout['maxPosition'] - payout['minPosition'] + 1 for payout in payout_summary)} position records")
            
            else:
                print(f"Warning: No contestDetail found for contest {contest_id}")
                failed_requests.append(contest_id)
            
            # Be respectful to the API
            time.sleep(delay)
            
        except requests.exceptions.RequestException as e:
            print(f"Error fetching contest {contest_id}: {e}")
            failed_requests.append(contest_id)
            continue
        except Exception as e:
            print(f"Unexpected error for contest {contest_id}: {e}")
            failed_requests.append(contest_id)
            continue
    
    print(f"\nSuccessfully processed {len(set(record['contestKey'] for record in payout_records))} contests")
    print(f"Total payout position records: {len(payout_records)}")
    
    if failed_requests:
        print(f"Failed to fetch {len(failed_requests)} contests: {failed_requests}")
    
    return pd.DataFrame(payout_records)

def create_payout_breakdown_from_existing_data(data):
    """
    Create payout breakdown from already fetched JSON data (single contest)
    
    Args:
        data: JSON data from API response
    
    Returns:
        pandas.DataFrame with position-level payout data
    """
    payout_records = []
    
    if 'contestDetail' in data:
        contest_detail = data['contestDetail']
        contest_key = contest_detail['contestKey']
        payout_summary = contest_detail['payoutSummary']
        
        # Process each payout tier
        for payout_tier in payout_summary:
            min_pos = payout_tier['minPosition']
            max_pos = payout_tier['maxPosition']
            payout_value = payout_tier['payoutDescriptions'][0]['value']
            
            # Create a record for each individual position
            for position in range(min_pos, max_pos + 1):
                payout_records.append({
                    'contestKey': contest_key,
                    'position': position,
                    'payout_value': payout_value
                })
    
    return pd.DataFrame(payout_records)

In [30]:
def calculate_top_percentile_payouts_vectorized(df):
    """
    More efficient version using pandas groupby operations
    """
    
    def contest_analysis(group):
        # Sort by position
        group_sorted = group.sort_values('position')
        total_prize_pool = group_sorted['payout_value'].sum()
        total_positions = len(group_sorted)
        
        # Calculate thresholds
        top_001_pct_threshold = max(1, int(np.ceil(total_positions * 0.0001)))
        top_1_pct_threshold = max(1, int(np.ceil(total_positions * 0.01)))
        
        # Calculate payouts
        top_001_pct_payout = group_sorted.head(top_001_pct_threshold)['payout_value'].sum()
        top_1_pct_payout = group_sorted.head(top_1_pct_threshold)['payout_value'].sum()
        
        return pd.Series({
            'total_positions': total_positions,
            'total_prize_pool': total_prize_pool,
            'top_001_pct_positions': top_001_pct_threshold,
            'top_001_pct_payout': top_001_pct_payout,
            'top_001_pct_percentage': (top_001_pct_payout / total_prize_pool) * 100,
            'top_1_pct_positions': top_1_pct_threshold,
            'top_1_pct_payout': top_1_pct_payout,
            'top_1_pct_percentage': (top_1_pct_payout / total_prize_pool) * 100,
            'winner_payout': group_sorted.iloc[0]['payout_value'],
            'winner_percentage': (group_sorted.iloc[0]['payout_value'] / total_prize_pool) * 100
        })
    
    return df.groupby('contestKey').apply(contest_analysis).reset_index()

In [40]:
df = fetch_contest_data_from_api(contest_ids)

Fetching data for 689 contests...
Unexpected error for contest 182467934: float division by zero
Error parsing data for contest 182626878: Missing key 'payoutDescriptions'
Error parsing data for contest 182626877: Missing key 'payoutDescriptions'
Error parsing data for contest 182594772: Missing key 'payoutDescriptions'
Error parsing data for contest 182541369: Missing key 'payoutDescriptions'
Error parsing data for contest 182626879: Missing key 'payoutDescriptions'

Successfully fetched 683 contests
Failed to fetch 6 contests: [182467934, 182626878, 182626877, 182594772, 182541369, 182626879]


In [32]:
payout_df = create_payout_breakdown_df(contest_ids)

Fetching payout data for 689 contests...
Unexpected error for contest 182626878: 'payoutDescriptions'
Unexpected error for contest 182626877: 'payoutDescriptions'
Unexpected error for contest 182594772: 'payoutDescriptions'
Unexpected error for contest 182541369: 'payoutDescriptions'
Unexpected error for contest 182626879: 'payoutDescriptions'

Successfully processed 684 contests
Total payout position records: 389722
Failed to fetch 5 contests: [182626878, 182626877, 182594772, 182541369, 182626879]


In [33]:
payout_cols = calculate_top_percentile_payouts_vectorized(payout_df)

  return df.groupby('contestKey').apply(contest_analysis).reset_index()


In [41]:
df

Unnamed: 0,contestKey,entryFee,totalPayouts,maximumEntries,maximumEntriesPerUser,name,highest_place_paid,lowest_place_paid,highest_payout_value,lowest_payout_value,rake_pct
0,182453822,20.0,3000000.0,176470,150,NFL $3M Fantasy Football Millionaire [$1M to 1st],1,38825,1000000.0,30.0,15.0
1,182453824,4444.0,2536000.0,634,19,$2.5M MEGA Millionaire [$1M to 1st],1,119,1000000.0,7000.0,10.0
2,182453820,333.0,1250000.0,4170,125,NFL $1.25M Wildcat [$250K to 1st],1,1000,250000.0,500.0,10.0
3,182453821,3.0,800000.0,317082,20,NFL $800K Play-Action [20 Entry Max],1,80725,75000.0,5.0,15.9
4,182471908,111.0,100000.0,1001,11,"NFL $100K 11 Personnel Special [$20K to 1st, 1...",1,232,20000.0,175.0,10.0
...,...,...,...,...,...,...,...,...,...,...,...
678,182472800,1.0,27.0,31,1,NFL $1 Triple Up [Top 9 Win $3],1,9,3.0,3.0,12.9
679,182472799,1.0,27.0,31,1,NFL $1 Triple Up [Top 9 Win $3],1,9,3.0,3.0,12.9
680,182472798,1.0,27.0,31,1,NFL $1 Triple Up [Top 9 Win $3],1,9,3.0,3.0,12.9
681,182472797,1.0,27.0,31,1,NFL $1 Triple Up [Top 9 Win $3],1,9,3.0,3.0,12.9


In [100]:
dd.query(
    """
    select 
    con.name
    ,con.maximumEntries
    ,con.maximumEntriesPerUser
    ,con.entryfee
    ,py.*
    from df con
    left join payout_cols py
        on con.contestkey = py.contestkey
    where 1=1
 --   and maximumEntries between 5000 and 100000
    and name not like '%Triple%'
    and name not like '%Quint%'
        and name not like '%Boost%'
            and name not like '%Satellite%'
            and name not like '%Take All%'
            and entryFee = 5
     --       and maximumEntriesPerUser = 1
            and maximumEntriesPerUser = 1
    order by winner_percentage
    """
).df()

Unnamed: 0,name,maximumEntries,maximumEntriesPerUser,entryFee,contestKey,total_positions,total_prize_pool,top_001_pct_positions,top_001_pct_payout,top_001_pct_percentage,top_1_pct_positions,top_1_pct_payout,top_1_pct_percentage,winner_payout,winner_percentage
0,NFL $8K Huddle [Single Entry],1902,1,5.0,182357248,432.0,8000.0,1.0,750.0,9.375,5.0,1525.0,19.0625,750.0,9.375
1,NFL $100K Huddle [Single Entry],23781,1,5.0,182153738,5805.0,100000.0,1.0,15000.0,15.0,59.0,40350.0,40.35,15000.0,15.0
2,NFL $5 Hundo [Single Entry],100,1,5.0,182152728,22.0,450.0,1.0,70.0,15.555556,1.0,70.0,15.555556,70.0,15.555556
3,NFL $5 Hundo [Single Entry],100,1,5.0,182152729,22.0,450.0,1.0,70.0,15.555556,1.0,70.0,15.555556,70.0,15.555556
4,NFL $5 Hundo [Single Entry],100,1,5.0,182332253,22.0,450.0,1.0,70.0,15.555556,1.0,70.0,15.555556,70.0,15.555556
5,NFL $5 Hundo [Single Entry],100,1,5.0,182351584,22.0,450.0,1.0,70.0,15.555556,1.0,70.0,15.555556,70.0,15.555556
6,NFL $5 Hundo [Single Entry],100,1,5.0,182367124,22.0,450.0,1.0,70.0,15.555556,1.0,70.0,15.555556,70.0,15.555556


In [99]:
dd.query(
    """
    select 
    con.name
    ,con.maximumEntries
    ,con.maximumEntriesPerUser
    ,con.entryfee
    ,py.*
    from df con
    left join payout_cols py
        on con.contestkey = py.contestkey
    where 1=1
      and name like '%Screen%'
      and name like '%75%'
    order by winner_percentage
    """
).df()

Unnamed: 0,name,maximumEntries,maximumEntriesPerUser,entryFee,contestKey,total_positions,total_prize_pool,top_001_pct_positions,top_001_pct_payout,top_001_pct_percentage,top_1_pct_positions,top_1_pct_payout,top_1_pct_percentage,winner_payout,winner_percentage
0,NFL $75K Screen Pass [3 Entry Max],5882,3,15.0,182153770,1283.0,75000.0,1.0,10000.0,13.333333,13.0,32100.0,42.8,10000.0,13.333333


In [90]:
df.columns

Index(['contestKey', 'entryFee', 'totalPayouts', 'maximumEntries',
       'maximumEntriesPerUser', 'name', 'highest_place_paid',
       'lowest_place_paid', 'highest_payout_value', 'lowest_payout_value',
       'pct_1st'],
      dtype='object')

In [24]:
df

Unnamed: 0,contestKey,entryFee,totalPayouts,maximumEntries,maximumEntriesPerUser,name,highest_place_paid,lowest_place_paid,highest_payout_value,lowest_payout_value
0,182453822,20.0,3000000.0,176470,150,NFL $3M Fantasy Football Millionaire [$1M to 1st],1,38825,1000000.0,30.0
1,182453824,4444.0,2536000.0,634,19,$2.5M MEGA Millionaire [$1M to 1st],1,119,1000000.0,7000.0
2,182453820,333.0,1250000.0,4170,125,NFL $1.25M Wildcat [$250K to 1st],1,1000,250000.0,500.0
3,182453821,3.0,800000.0,317082,20,NFL $800K Play-Action [20 Entry Max],1,80725,75000.0,5.0
4,182453825,150.0,500000.0,3703,3,"NFL $500K Power Sweep [$100K to 1st, 3 Entry Max]",1,835,100000.0,225.0
...,...,...,...,...,...,...,...,...,...,...
679,182472800,1.0,27.0,31,1,NFL $1 Triple Up [Top 9 Win $3],1,9,3.0,3.0
680,182472799,1.0,27.0,31,1,NFL $1 Triple Up [Top 9 Win $3],1,9,3.0,3.0
681,182472798,1.0,27.0,31,1,NFL $1 Triple Up [Top 9 Win $3],1,9,3.0,3.0
682,182472797,1.0,27.0,31,1,NFL $1 Triple Up [Top 9 Win $3],1,9,3.0,3.0
