In [None]:
#Author - Dustin Eagar
#January, 2024

## Fantasy Disc Golf Auction Valuation

The objective of this project is to use past results to create a draft auction valuation model for 2024 DGPT Fantasy Disc Golf.

## EDA of Past Results

In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import pymc as pm 

plt.style.use('ggplot')

In [6]:
players_24 = pd.read_csv('./data/2024_pdga_tourcards_mpo.csv')
players_24

Unnamed: 0,Player,pdga_number
0,Aaron Gossage,35449
1,Adam Hammes,57365
2,Aidan Scott,99246
3,AJ Carey,61770
4,Albert Tamm,76669
...,...,...
88,Tuomas Hyytiäinen,65715
89,Ty Love,89959
90,Väinö Mäkelä,59635
91,Zach Arlinghaus,65266


## Scraping Data

In [49]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from tqdm.notebook import tqdm

In [20]:


url = 'https://www.pdga.com/tour/event/65208'#Music City Open
table_id = 'tournament-stats-0'

def scrape_pdga_table(url, table_id, event=False):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    table = soup.find('table', id=table_id)
    rows = table.find_all('tr')

    # Extracting the header
    headers = []
    counter=1 #Counter for naming round rating columns
    for i, header in enumerate(rows[0].find_all('th')):
        header_text = header.text.strip()
        if event and not header_text:  # If the header is empty
            header_text = f'rating_{counter}'  # Assign a custom name
            counter+=1
        headers.append(header_text)

    # Extracting the data
    data = []
    for row in rows[1:]:
        cols = [ele.text.strip() for ele in row.find_all('td')]
        data.append(cols)

    # Creating the DataFrame
    df = pd.DataFrame(data, columns=headers)
    return df

# Displaying the DataFrame
df = scrape_pdga_table(url, table_id, event=True)
df.head()


Unnamed: 0,Place,Points,Name,PDGA#,Rating,Par,Rd1,rating_1,Rd2,rating_2,Rd3,rating_3,Total,Prize
0,1,1160.0,Simon Lizotte,8332,1037,-23,59,1050,59,1046,54,1068,172,"$8,000"
1,2,1150.0,Anthony Barela,44382,1042,-22,59,1050,57,1059,57,1047,173,"$5,000"
2,3,1140.0,Calvin Heimburg,45971,1042,-21,61,1037,57,1059,56,1054,174,"$2,352"
3,3,1140.0,Gannon Buhr,75412,1041,-21,59,1050,58,1052,57,1047,174,"$2,352"
4,3,1140.0,Chris Dickerson,62467,1042,-21,59,1050,55,1072,60,1025,174,"$2,352"


In [134]:
def ratings_date_parse(s):
    parsed = s.split('to')[-1].strip(' ')

    return parsed

def scrape_player_stats(pdga_number, year):
    url_stats = f'https://www.pdga.com/player/{pdga_number}/stats/{year}'
    table_id_stats = "player-results-mpo"

    stats = scrape_pdga_table(url=url_stats, table_id = table_id_stats)
    stats = stats[stats['Tier'].isin(['ES', 'M', 'A', 'B', 'XM'])]
    stats['Date'] = pd.to_datetime(stats['Dates'].apply(ratings_date_parse))

    stats = stats[['Place', 'Tier', 'Date', 'Tournament']]

    url_ratings = f'https://www.pdga.com/player/{pdga_number}/details'
    table_id_ratings = "player-results-details"

    ratings = scrape_pdga_table(url=url_ratings, table_id=table_id_ratings)
    ratings = ratings[ratings['Tier'].isin(['ES', 'M', 'A', 'B', 'XM'])]
    ratings['Date'] = pd.to_datetime(ratings['Date'].apply(ratings_date_parse))
    ratings = ratings[['Rating', 'Date', 'Tournament', 'Tier', 'Round']]


    return stats, ratings

stats, ratings = scrape_player_stats('45971', '2023')
stats

Unnamed: 0,Place,Tier,Date,Tournament
0,1,ES,2023-02-26,DGPT - Las Vegas Challenge presented by Innova
1,3,ES,2023-03-12,DGPT - Prodigy presents WACO
2,3,ES,2023-03-19,DGPT - The Open at Austin presented by Lone St...
3,1,A,2023-03-26,DGPT Silver - Innova Open at The 28th Annual T...
4,3,ES,2023-04-09,DGPT Elite - Music City Open presented by Lone...
5,4,A,2023-04-16,DGPT Silver - Innova Blue Ridge Championship a...
6,16,M,2023-04-23,PDGA Champions Cup Presented by Bushnell
7,1,ES,2023-04-30,DGPT - Play It Again Sports Jonesboro Open pre...
8,6,ES,2023-05-14,DGPT - OTB Open presented by MVP Disc Sports
9,2,A,2023-05-21,DGPT Silver - Beaver State Fling Presented by ...


{'Rating': {0: '1038',
  1: '1082',
  2: '1061',
  3: '1023',
  4: '1041',
  5: '1040',
  6: '1061',
  7: '1072',
  8: '1065',
  9: '1050',
  10: '1046',
  11: '1048',
  12: '1049',
  13: '1049',
  14: '1043',
  15: '1056',
  16: '1081',
  17: '1049',
  18: '1032',
  19: '1088',
  20: '1029',
  21: '1059',
  22: '1081',
  23: '1021',
  24: '1068',
  25: '1068',
  26: '1039',
  27: '1045',
  28: '1066',
  29: '1038',
  30: '1067',
  31: '1041',
  32: '1061',
  33: '1054',
  34: '1047',
  35: '1041',
  36: '1069',
  37: '999',
  38: '1053',
  39: '1057',
  40: '1043',
  41: '1057',
  42: '1026',
  43: '1059',
  44: '1026',
  45: '1071',
  46: '1071',
  47: '1037',
  48: '1033',
  49: '1033',
  50: '1053',
  51: '1019',
  52: '1044',
  53: '1065',
  54: '1051',
  55: '1037',
  56: '1059',
  57: '1054',
  58: '1070',
  59: '1086',
  60: '1064',
  61: '1051',
  62: '1053',
  63: '1045',
  64: '1062',
  65: '1051',
  66: '1053',
  67: '1029',
  68: '1063',
  69: '1062',
  70: '1047',
  71: '

Timestamp('2011-01-16 00:00:00')

In [48]:
#css selectors
css_selectors = {'career_events':'.career-events',
                 'join_date':'.join-date',
                 'rating_current':'.current-rating',
                 'career_events':'.career-events',
                 'career_wins':'.career-wins',
                 'career_earnings':'.career-earnings',
                 'world_rank':'.world-rank'}

#Get current rating and other stats
url = 'https://www.pdga.com/player/81739/details' #Casey White
response = requests.get(url)
collection_dict = {}
# Parse the HTML content
soup = BeautifulSoup(response.content, 'html.parser')
for key, val in css_selectors.items():
    # Find element using CSS selector
    elements = soup.select(val)
    if elements:
        extracted_text = ' '.join([elem.get_text(strip=True) for elem in elements])
    else:
        extracted_text = 'Element not found'

    # Print or process the extracted text
    collection_dict[key]=extracted_text
    print(extracted_text.strip(f'{key}:{val}'))

collection_dict


Career Events:179
Member Since:2016
Current Rating:1018(as of 14-Nov-2023)
Career Wins:22
Career Earnings:$65,331.00
United States Tour Rank:#67


{'career_events': 'Career Events:179',
 'join_date': 'Member Since:2016',
 'rating_current': 'Current Rating:1018(as of 14-Nov-2023)',
 'career_wins': 'Career Wins:22',
 'career_earnings': 'Career Earnings:$65,331.00',
 'world_rank': 'United States Tour Rank:#67'}

In [50]:
def get_player_career_stats (player_pdga):

    #css selectors
    css_selectors = {'career_events_raw':'.career-events',
                 'join_date_raw':'.join-date',
                 'rating_current_raw':'.current-rating',
                 'career_events_raw':'.career-events',
                 'career_wins_raw':'.career-wins',
                 'career_earnings_raw':'.career-earnings',
                 'world_rank_raw':'.world-rank'}
    
    #Get current rating and other stats
    url = f'https://www.pdga.com/player/{str(player_pdga)}/details' #Casey White
    response = requests.get(url)

    # Parse the HTML content
    collection_dict = {'pdga_number':player_pdga}
    soup = BeautifulSoup(response.content, 'html.parser')
    for key, val in css_selectors.items():
        # Find element using CSS selector
        elements = soup.select(val)
        if elements:
            extracted_text = ' '.join([elem.get_text(strip=True) for elem in elements])
        else:
            extracted_text = 'Element not found'

        # Print or process the extracted text
        print(extracted_text.strip(f'{key}:{val}'))
        collection_dict[key]=extracted_text

    return collection_dict
    


get_player_career_stats('81739')       

Career Events:179
Member Since:2016
Current Rating:1018(as of 14-Nov-2023)
Career Wins:22
Career Earnings:$65,331.00
United States Tour Rank:#67


{'pdga_number': '81739',
 'career_events_raw': 'Career Events:179',
 'join_date_raw': 'Member Since:2016',
 'rating_current_raw': 'Current Rating:1018(as of 14-Nov-2023)',
 'career_wins_raw': 'Career Wins:22',
 'career_earnings_raw': 'Career Earnings:$65,331.00',
 'world_rank_raw': 'United States Tour Rank:#67'}

In [112]:
def execute_player_stats_scrape():
    for index, row in tqdm(players_24.iterrows()):

        stats = get_player_career_stats(player_pdga=row['pdga_number'], total=players_24.shape[0])

        for key, value in stats.items():
            players_24.at[index, key]=value

#players_24.to_csv('./data/players_basic_stats.csv')
players_24 = pd.read_csv('./data/players_basic_stats.csv', index_col=0)

In [113]:
players_24 = players_24[:-1]

In [121]:
import re
def extract_numbers(s):
    # Regular expression pattern
    pattern = r'.*[:$#]\s?((\d{1,5}(?:,\d{3})*|\d+)(\.\d+)?)'
    
    # Find all matches
    matches = re.findall(pattern, s)

    if matches:
        # Extracting only the numbers from the matches
        return [match[0] for match in matches][0]
    else:
        #Return none if no matches are found
        return None

In [115]:
players_24['career_events'] = players_24['career_events_raw'].apply(extract_numbers)
players_24['join_date'] = players_24['join_date_raw'].apply(extract_numbers)
players_24['rating_current'] = players_24['rating_current_raw'].apply(extract_numbers)
players_24['career_wins'] = players_24['career_wins_raw'].apply(extract_numbers)
players_24['career_earnings'] = players_24['career_earnings_raw'].apply(extract_numbers)
players_24['world_rank'] = players_24['world_rank_raw'].apply(extract_numbers)

players_24 = players_24.drop(columns=players_24.columns[2:8])

In [120]:
#players_24.to_csv('./data/players_basic_stats_processed.csv')