In [None]:
#Author - Dustin Eagar
#January, 2024

## Fantasy Disc Golf Auction Valuation

The objective of this project is to use past results to create a draft auction valuation model for 2024 DGPT Fantasy Disc Golf.

## EDA of Past Results

In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import pymc as pm 

plt.style.use('ggplot')

In [6]:
players_24 = pd.read_csv('./data/2024_pdga_tourcards_mpo.csv')
players_24

Unnamed: 0,Player,pdga_number
0,Aaron Gossage,35449
1,Adam Hammes,57365
2,Aidan Scott,99246
3,AJ Carey,61770
4,Albert Tamm,76669
...,...,...
88,Tuomas Hyytiäinen,65715
89,Ty Love,89959
90,Väinö Mäkelä,59635
91,Zach Arlinghaus,65266


## Scraping Data

In [20]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = 'https://www.pdga.com/tour/event/65208'#Music City Open
table_id = 'tournament-stats-0'

def scrape_pdga_table(url, table_id, event=False):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    table = soup.find('table', id=table_id)
    rows = table.find_all('tr')

    # Extracting the header
    headers = []
    counter=1 #Counter for naming round rating columns
    for i, header in enumerate(rows[0].find_all('th')):
        header_text = header.text.strip()
        if event and not header_text:  # If the header is empty
            header_text = f'rating_{counter}'  # Assign a custom name
            counter+=1
        headers.append(header_text)

    # Extracting the data
    data = []
    for row in rows[1:]:
        cols = [ele.text.strip() for ele in row.find_all('td')]
        data.append(cols)

    # Creating the DataFrame
    df = pd.DataFrame(data, columns=headers)
    return df

# Displaying the DataFrame
df = scrape_pdga_table(url, table_id, event=True)
df.head()


Unnamed: 0,Place,Points,Name,PDGA#,Rating,Par,Rd1,rating_1,Rd2,rating_2,Rd3,rating_3,Total,Prize
0,1,1160.0,Simon Lizotte,8332,1037,-23,59,1050,59,1046,54,1068,172,"$8,000"
1,2,1150.0,Anthony Barela,44382,1042,-22,59,1050,57,1059,57,1047,173,"$5,000"
2,3,1140.0,Calvin Heimburg,45971,1042,-21,61,1037,57,1059,56,1054,174,"$2,352"
3,3,1140.0,Gannon Buhr,75412,1041,-21,59,1050,58,1052,57,1047,174,"$2,352"
4,3,1140.0,Chris Dickerson,62467,1042,-21,59,1050,55,1072,60,1025,174,"$2,352"


In [22]:
def scrape_player_stats(pdga_number, year):
    url_stats = f'https://www.pdga.com/player/{pdga_number}/stats/{year}'
    table_id_stats = "player-results-mpo"

    stats = scrape_pdga_table(url=url_stats, table_id = table_id_stats)

    url_ratings = f'https://www.pdga.com/player/{pdga_number}/details'
    table_id_ratings = "player-results-details"

    ratings = scrape_pdga_table(url=url_ratings, table_id=table_id_ratings)

    return stats, ratings

stats, ratings = scrape_player_stats('45971', '2023')
stats

Unnamed: 0,Place,Points,Tournament,Tier,Dates,Prize
0,1,1190.0,DGPT - Las Vegas Challenge presented by Innova,ES,23-Feb to 26-Feb-2023,"$7,500"
1,3,1140.0,DGPT - Prodigy presents WACO,ES,10-Mar to 12-Mar-2023,"$2,500"
2,3,1140.0,DGPT - The Open at Austin presented by Lone St...,ES,17-Mar to 19-Mar-2023,"$2,325"
3,1,1240.0,DGPT Silver - Innova Open at The 28th Annual T...,A,24-Mar to 26-Mar-2023,"$4,000"
4,3,1140.0,DGPT Elite - Music City Open presented by Lone...,ES,07-Apr to 09-Apr-2023,"$2,352"
5,4,1430.0,DGPT Silver - Innova Blue Ridge Championship a...,A,14-Apr to 16-Apr-2023,"$1,450"
6,16,1395.0,PDGA Champions Cup Presented by Bushnell,M,20-Apr to 23-Apr-2023,"$1,738"
7,1,1120.0,DGPT - Play It Again Sports Jonesboro Open pre...,ES,28-Apr to 30-Apr-2023,"$8,000"
8,6,1070.0,DGPT - OTB Open presented by MVP Disc Sports,ES,12-May to 14-May-2023,"$2,112"
9,2,1110.0,DGPT Silver - Beaver State Fling Presented by ...,A,19-May to 21-May-2023,"$1,750"


In [25]:
#Get current rating and other stats
url = 'https://www.pdga.com/player/81739/details' #Casey White
response = requests.get(url)

# Parse the HTML content
soup = BeautifulSoup(response.content, 'html.parser')

# Find element using CSS selector
elements = soup.select('.career-events')
if elements:
    extracted_text = ' '.join([elem.get_text(strip=True) for elem in elements])
else:
    extracted_text = 'Element not found'

# Print or process the extracted text
print(extracted_text.strip('Career Events:'))


#css selectors
css_selectors = {'career_events':'.career-events',
                 'join_date':'.join-date',
                 'rating_current':'.current-rating',
                 'career_events':'.career-events',
                 'career_wins':'.career-wins',
                 'career_earnings':'.career-earnings',
                 'world_rank':'.world-rank'}


179
