In [None]:
#Author - Dustin Eagar
#January, 2024

## Fantasy Disc Golf Auction Valuation

The objective of this project is to use past results to create a draft auction valuation model for 2024 DGPT Fantasy Disc Golf.

## EDA of Past Results

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import pymc as pm 
import plotly.express as px

plt.style.use('ggplot')

pd.set_option('display.max_rows', None)

In [None]:
players_24 = pd.read_csv('./data/2024_pdga_tourcards_mpo.csv')
players_24

## Scraping Data

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from tqdm.notebook import tqdm
import time

In [None]:


url = 'https://www.pdga.com/tour/event/65208'#Music City Open
table_id = 'tournament-stats-0'

def scrape_pdga_table(url, table_id, event=False):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    table = soup.find('table', id=table_id)
    rows = table.find_all('tr')

    # Extracting the header
    headers = []
    counter=1 #Counter for naming round rating columns
    for i, header in enumerate(rows[0].find_all('th')):
        header_text = header.text.strip()
        if event and not header_text:  # If the header is empty
            header_text = f'rating_{counter}'  # Assign a custom name
            counter+=1
        headers.append(header_text)

    # Extracting the data
    data = []
    for row in rows[1:]:
        cols = [ele.text.strip() for ele in row.find_all('td')]
        data.append(cols)

    # Creating the DataFrame
    df = pd.DataFrame(data, columns=headers)
    return df

# Displaying the DataFrame
#df = scrape_pdga_table(url, table_id, event=True)
#df.head()


In [None]:
def ratings_date_parse(s):
    parsed = s.split('to')[-1].strip(' ')

    return parsed

def scrape_player_stats(pdga_number, years_list):
    
    table_id_stats = "player-results-mpo"
    table_id_ratings = "player-results-details"
    
    stats = pd.DataFrame()
    for year in years_list:
        try:
            url_stats = f'https://www.pdga.com/player/{str(pdga_number)}/stats/{year}'
            stats_year = scrape_pdga_table(url=url_stats, table_id = table_id_stats)
            stats = pd.concat([stats, stats_year])
        except Exception as e:
            print(e)
            pass
        time.sleep(1.5)

    if stats.shape[0]>0:
        stats = stats[stats['Tier'].isin(['ES', 'M', 'A', 'B', 'XM'])]
        stats['Date'] = pd.to_datetime(stats['Dates'].apply(ratings_date_parse))

        stats = stats[['Place', 'Tier', 'Date', 'Tournament']]

    url_ratings = f'https://www.pdga.com/player/{str(pdga_number)}/details'
    
    
    try:
        ratings = scrape_pdga_table(url=url_ratings, table_id=table_id_ratings)
        ratings = ratings[ratings['Tier'].isin(['ES', 'M', 'A', 'B', 'XM'])]
        ratings['Date'] = pd.to_datetime(ratings['Date'].apply(ratings_date_parse))
        ratings = ratings[['Rating', 'Date', 'Tournament', 'Tier', 'Round']]
    except Exception as e:
        ratings=pd.DataFrame()
        print(f'{e}, {pdga_number}')
        pass


    return stats, ratings

stats, ratings = scrape_player_stats('76669', ['2020','2021','2022','2023'])
stats

In [None]:
ratings

In [None]:
#css selectors
css_selectors = {'career_events':'.career-events',
                 'join_date':'.join-date',
                 'rating_current':'.current-rating',
                 'career_events':'.career-events',
                 'career_wins':'.career-wins',
                 'career_earnings':'.career-earnings',
                 'world_rank':'.world-rank'}

#Get current rating and other stats
url = 'https://www.pdga.com/player/81739/details' #Casey White
response = requests.get(url)
collection_dict = {}
# Parse the HTML content
soup = BeautifulSoup(response.content, 'html.parser')
for key, val in css_selectors.items():
    # Find element using CSS selector
    elements = soup.select(val)
    if elements:
        extracted_text = ' '.join([elem.get_text(strip=True) for elem in elements])
    else:
        extracted_text = 'Element not found'

    # Print or process the extracted text
    collection_dict[key]=extracted_text
    print(extracted_text.strip(f'{key}:{val}'))

collection_dict


In [None]:
def get_player_career_stats (player_pdga):

    #css selectors
    css_selectors = {'career_events_raw':'.career-events',
                 'join_date_raw':'.join-date',
                 'rating_current_raw':'.current-rating',
                 'career_events_raw':'.career-events',
                 'career_wins_raw':'.career-wins',
                 'career_earnings_raw':'.career-earnings',
                 'world_rank_raw':'.world-rank'}
    
    #Get current rating and other stats
    url = f'https://www.pdga.com/player/{str(player_pdga)}/details' #Casey White
    response = requests.get(url)

    # Parse the HTML content
    collection_dict = {'pdga_number':player_pdga}
    soup = BeautifulSoup(response.content, 'html.parser')
    for key, val in css_selectors.items():
        # Find element using CSS selector
        elements = soup.select(val)
        if elements:
            extracted_text = ' '.join([elem.get_text(strip=True) for elem in elements])
        else:
            extracted_text = 'Element not found'

        # Print or process the extracted text
        #print(extracted_text.strip(f'{key}:{val}'))
        collection_dict[key]=extracted_text

    return collection_dict
    


get_player_career_stats('81739')       

Career Events:206
Member Since:2016
Current Rating:1031(as of 12-Nov-2024)
Career Wins:23
Career Earnings:$90,371.00
Element not fou


{'pdga_number': '81739',
 'career_events_raw': 'Career Events:206',
 'join_date_raw': 'Member Since:2016',
 'rating_current_raw': 'Current Rating:1031(as of 12-Nov-2024)',
 'career_wins_raw': 'Career Wins:23',
 'career_earnings_raw': 'Career Earnings:$90,371.00',
 'world_rank_raw': 'Element not found'}

In [None]:
def execute_player_stats_scrape():
    for index, row in tqdm(players_24.iterrows(), total=players_24.shape[0]):

        stats = get_player_career_stats(player_pdga=row['pdga_number'])

        for key, value in stats.items():
            players_24.at[index, key]=value

#players_24.to_csv('./data/players_basic_stats.csv')
players_24 = pd.read_csv('./data/players_basic_stats.csv', index_col=0)

In [None]:
players_24 = players_24[:-1]

In [None]:
import re
def extract_numbers(s):
    # Regular expression pattern
    pattern = r'.*[:$#]\s?((\d{1,5}(?:,\d{3})*|\d+)(\.\d+)?)'
    
    # Find all matches
    matches = re.findall(pattern, s)

    if matches:
        # Extracting only the numbers from the matches
        return [match[0] for match in matches][0]
    else:
        #Return none if no matches are found
        return None

In [None]:
players_24['career_events'] = players_24['career_events_raw'].apply(extract_numbers)
players_24['join_date'] = players_24['join_date_raw'].apply(extract_numbers)
players_24['rating_current'] = players_24['rating_current_raw'].apply(extract_numbers)
players_24['career_wins'] = players_24['career_wins_raw'].apply(extract_numbers)
players_24['career_earnings'] = players_24['career_earnings_raw'].apply(extract_numbers)
players_24['world_rank'] = players_24['world_rank_raw'].apply(extract_numbers)

players_24 = players_24.drop(columns=players_24.columns[2:8])

In [None]:
#players_24.to_csv('./data/players_basic_stats_processed.csv')

In [None]:
import time
def execute_player_stats_crawl():
    for index, row in tqdm(players_24.iterrows(), total=players_24.shape[0]):

        stats, ratings = scrape_player_stats(pdga_number=row['pdga_number'], years_list=['2022','2023'])

        players_24.at[index, 'stats_data'] = [stats.to_dict(orient='list')]
        players_24.at[index, 'ratings_data'] = [ratings.to_dict(orient='list')]
        time.sleep(1.5)

execute_player_stats_crawl()


In [None]:
players_24.to_csv('./players_crawled.csv')

In [None]:
players_24

In [None]:
pd.DataFrame(players_24.iloc[0]['ratings_data'][0])

In [None]:
def extract_ratings_vec(input_data, tiers:list=None, cutoff_date=None):
    '''
    Extract a filtered vector of round ratings from 
    a player's rating history object. Include round ratings
    in specified tiers and after cutoff date

    input_data - dictionary object of ratings scraped from
    ratings details. Read into dataframe
    tiers - list of strings
    cutoff_date - string date, 'yyyy-mm-dd', e.g. '2022-01-31'
    '''

    # column_dtypes = {'Rating':int,
    #                  'Date':pd.Timestamp,
    #                  'Tournament':str,
    #                  'Tier':str}
    
    df = pd.DataFrame(input_data)
    #df = df.astype(dtype=column_dtypes)

    #df['Rating'] = df['Rating'].astype(float)
    df['Date'] = pd.to_datetime(df['Date'])
    print(df.dtypes)
    if cutoff_date:
        cutoff = pd.Timestamp(cutoff_date)
        df = df[df['Date']>cutoff_date]

    if tiers:
        df = df[df['Tier'].isin(tiers)]
    ratings_vec = df['Rating'].values

    return ratings_vec

#Test
extract_ratings_vec(players_24.iloc[0]['ratings_data'][0],
                    tiers=['ES', 'M', 'XM', 'A'],
                    cutoff_date='2022-01-01')

In [None]:
#Need to fix this
players_24['ratings_data'].apply(extract_ratings_vec)

In [None]:
pd.DataFrame(players_24['stats_data'].values[0][0]).dtypes

In [None]:
points_map = {
    1:300,
    2:245,
    3:195,
    4:155,
    5:135,
    6:110,
    7:97,
    8:85,
    9:75,
    10:64,
    11:56,
    12:48,
    13:42,
    14:36,
    15:33,
    16:30,
    17:24,
    18:24,
    19:21,
    20:18,
    21:15,
    22:12,
    23:10,
    24:8,
    25:6,
    26:4,
    27:3,
    28:2,
    29:1,   
}

def calculate_points(input_obj, points_map, year):

    df = pd.DataFrame(input_obj[0])
    df = df[df['Tier'].isin(['M', 'ES', 'XM'])]
    df = df[df['Date'].dt.year == year]
    print(len(df))

    df['event_points'] = df['Place'].astype(int).map(points_map)
    df.loc[df['Tier'].isin(['M', 'XM']), 'event_points']*=1.5

    total_points = df['event_points'].sum()

    return total_points


In [None]:
players_24['fantasy_points_23'] = players_24['stats_data'].apply(
    lambda x : calculate_points(x, points_map = points_map, year=2023))

players_24['fantasy_points_22'] = players_24['stats_data'].apply(
    lambda x : calculate_points(x, points_map = points_map, year=2022))

In [None]:
players_24['rating_current'] = players_24['rating_current'].astype(float)

In [None]:
px.scatter(players_24, x='rating_current', y='fantasy_points_23', hover_name='Player')

In [None]:
players_24['rating_current'].corr(players_24['fantasy_points_23'])

In [None]:
sns.histplot(players_24['fantasy_points_23'], bins=50)

In [None]:
px.scatter(players_24, x='fantasy_points_22', y='fantasy_points_23', hover_name='Player')

In [None]:
#weighted average of 22 and 23 fantasy points
from scipy import stats
import numpy as np

players_24['composite_fp'] = .65*players_24['fantasy_points_23'] + .35*players_24['fantasy_points_22']
players_24['composite_percentile'] = players_24['composite_fp'].apply(lambda x : np.round(stats.percentileofscore(players_24['composite_fp'], x), 1))
players_24['frac_calvin'] = players_24['composite_fp']/3464

In [None]:
sns.histplot(players_24['composite_fp'], bins=50)

In [None]:
draft = players_24.drop(columns=['stats_data', 'ratings_data', 'career_earnings', 'world_rank', 'pdga_number']).sort_values(by='composite_fp', ascending=False)
draft.reset_index(drop=True)